python 爬取京東商品資訊

2021-10-09 16:21:40 字數 3223 閱讀 4102

# -*- coding: utf-8 -*-

import os

import re

import time

from urllib.parse import urlencode

import requests

from lxml import etree

import pymysql

from time import strftime, gmtime

# 方法二,從本地資料夾獲取

# 將資料存入mysql中

def data_import(title, img, price, category):

riqi=strftime("%y-%m-%d %h:%m:%s", gmtime())

connection=pymysql.connect(host = '192.168.2.176', user = 'marketing',

password = 'an**5vvylgnxrom0', db = 'marketing_api', charset = 'utf8')

try:

# 獲取會話指標

with connection.cursor() as cursor:

# 建立sql語句

sql="insert into `jd_material`(`id`, `title`, `pic_url`, `price`, `sales`, `create_time`, `category`) values ('',%s,%s,%s,%s,%s,%s);"

# 執行sql語句

cursor.execute(sql, (title, img, price, 0, riqi, category))

# 提交

connection.commit()

finally:

connection.close()

if __name__ == '__main__':

# "家用電器", "手機", "運營商", "數碼", "電腦", "辦公", "家居", "家具", "家裝", "廚具", "**", "**", "童裝", "內衣", "美妝", "個護清潔", "寵物", "女鞋", "箱包", "鐘錶", "珠寶", "男鞋", "運動", "戶外", "房產", "汽車", "汽車用品",

# "母嬰", "玩具樂器", "食品", "酒類", "生鮮", "特產",

goods_list=[ "禮品鮮花", "農資綠植", "醫藥保健", "計生情趣", "圖書", "文娛", "教育", "電子書", "機票", "酒店", "旅遊", "生活", "理財", "眾籌", "白條", "保險", "安裝", "維修", "清洗", "二手", "工業品"]

for drnindex in range(len(goods_list)):

keywords = goods_list[drnindex]

# 定義要爬取的頁數

num = 10

for page in range(num):

get_html(keywords, page)

print('---------------------------')

time.sleep(2)

print('第%s頁結束' % page)

time.sleep(2)

cookie資訊和referer資訊

表sql

create table `jd_material` (

`id` int(11) not null auto_increment comment '自增主鍵',

`title` varchar(255) default '' comment '商品名稱',

`pic_url` varchar(255) default '' comment '位址',

`price` varchar(10) default null comment '**',

`sales` int(11) default 0 comment '銷量',

`create_time` datetime default '0000-00-00 00:00:01' comment '建立時間',

`category` varchar(255) default '' comment '類目',

primary key (`id`) using btree

) engine=innodb auto_increment=1 default charset=utf8;

爬取京東商品資訊

爬取京東商品資訊 from selenium import webdriver from selenium.webdriver import chromeoptions from selenium.webdriver import actionchains from selenium.webdriv...

京東app商品資訊爬取

準備工作 配置網路,確認手機和pc處於同一區域網下,並配置好 服務 安裝證書,確保可以抓取https的請求資訊。安裝並開啟mongodb資料庫。抓取分析 抓取資訊格式為json格式。具體如下圖所示 連線mongodb資料庫jddb,選擇集合shop client pymongo.mongoclien...

爬取京東商城商品資訊

from selenium import webdriver from selenium.webdriver import actionchains 獲取屬性 from selenium.webdriver.common.keys import keys from selenium.webdrive...