爬取京東商品資訊

2022-05-15 16:03:09 字數 2113 閱讀 4784

爬取京東商品資訊

from selenium import webdriver

from selenium.webdriver import chromeoptions

from selenium.webdriver import actionchains

from selenium.webdriver.common.keys import keys

import time

option = chromeoptions()

option.add_argument('disable-infobars')

driver = webdriver.chrome(chrome_options=option)

def get_goods(driver):

number = 400

for line in range(20):

js = '''

window.scrollto(0, %s)

''' % number

number += 500

driver.execute_script(js)

time.sleep(0.2)

# 查詢所有商品的父標籤

good_div = driver.find_element_by_id('j_goodslist')

# 獲取所有商品的標籤

good_list = good_div.find_elements_by_class_name('gl-item')

for good in good_list:

'''商品資訊:

名稱**

鏈結評價人數

'''# 商品名稱

good_name = good.find_element_by_css_selector('.p-name em').text.replace('\n', '')

# 商品**

good_price = good.find_element_by_css_selector('.p-price').text.replace('\n', '')

# 商品鏈結

good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')

# 商品

good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')

# 評價人數

good_commit = good.find_element_by_css_selector('.p-commit').text.replace('\n', ' ')

goods = '''

商品名稱: %s

商品**: %s

商品: %s

評價人數: %s

''' % (good_name, good_price, good_link, good_img, good_commit)

print(goods)

with open('京東女士內衣資料爬去.txt', 'a', encoding='utf-8') as f:

f.write(goods + '\n')

next_tag = driver.find_element_by_class_name('pn-next')

next_tag.click()

time.sleep(3)

# 遞迴執行get_goods函式

get_goods(driver)

try:

driver.get('')

driver.implicitly_wait(10)

input_tag = driver.find_element_by_id('key')

input_tag.send_keys('女士內衣')

search_button = driver.find_element_by_class_name('button')

search_button.click()

get_goods(driver)

time.sleep(1000)

finally:

driver.close()

python 爬取京東商品資訊

coding utf 8 import os import re import time from urllib.parse import urlencode import requests from lxml import etree import pymysql from time import...

京東app商品資訊爬取

準備工作 配置網路,確認手機和pc處於同一區域網下,並配置好 服務 安裝證書,確保可以抓取https的請求資訊。安裝並開啟mongodb資料庫。抓取分析 抓取資訊格式為json格式。具體如下圖所示 連線mongodb資料庫jddb,選擇集合shop client pymongo.mongoclien...

爬取京東商城商品資訊

from selenium import webdriver from selenium.webdriver import actionchains 獲取屬性 from selenium.webdriver.common.keys import keys from selenium.webdrive...