京東評論爬取並寫入sqlite資料庫

import json
import requests
import sqlite3
base_url =
''# 可能還需要偽造的是
headers =
# 列編輯模式alt+shift
params =
# 連線資料庫，如果資料庫不存在會自動生成
connect = sqlite3.connect(
'./jingdongsqlite.db'
)# 從會話中生成游標,相當於excel的游標
cursor = connect.cursor(
)# execute(sql)
cursor.execute(
""" create table if not exists comment(
cid integer primary key,
content text,
creation_time text,
product_color text,
product_size text
);""")
for i in
range(1
,60):
params[
'page'
]= i
resp = requests.get(base_url, headers=headers, params=params)
status_code = resp.status_code
comments_json = resp.text
print
(comments_json)
# 方法1：python切片 方法2：正則 方法3：本例比較特殊，可以直接返回json
comments_obj = json.loads(comments_json)
print
(comments_obj)
comments = comments_obj[
'comments'
]for c in comments:
cid = c[
'id'
] content = c[
'content'
] creation_time = c[
'creationtime'
] product_color = c[
'productcolor'
] product_size = c[
'productsize'
]print
('-'
*100
)print
(cid, content)
cursor.execute(
"""insert or ignore into comment (cid, content, creation_time, product_color, product_size) values (?,?,?,?,?);"""
,[cid,content,creation_time,product_color,product_size]
)# 提交確認（插入和更新）
connect.commit(
)cursor.execute(
""" select * from comment;
""")
# 取出查詢資料
rs = cursor.fetchall(
)print
(rs)
# 關閉游標
cursor.close(
)# 關閉資料庫
connect.close(
)

import sqlite3
import jieba
import pil .image as image
import numpy as np
from wordcloud import wordcloud
connect = sqlite3.connect(
'../l05/jingdongsqlite.db'
)cursor = connect.cursor(
)cursor.execute(
"""select * from comment order by creation_time desc limit 0,499;"""
)comments_rs = cursor.fetchall(
)comments =
[c[1
]for c in comments_rs]
comments =
''.join(comments)
words = jieba.cut(comments, cut_all=
false
)comment_words_list =
list
(words)
with
open
('../l05/dict/stop_words_zh.txt'
, mode=
'r', encoding=
'utf-8'
)as f:
stop_words = f.read(
).splitlines(
)# print(stop_words)
filtered_comment_word_list =
for word in comment_words_list:
if word not
in stop_words:
# print(filtered_comment_word_list)
comment_words_str =
' '.join(filtered_comment_word_list)
print
(comment_words_str)
wc = wordcloud(
font_path=
'./問藏書房.ttf'
, background_color=
'black'
, mask=np.array(image.
open
('./三角形.jpg'))
, width=
1000
, height=
800,
max_words=
500,
relative_scaling=
0.3,
min_font_size=50,
).generate(comment_words_str)
wc.to_file(
)

python爬取京東評論

這不是我的第乙個爬蟲，但大多數都是像這樣簡單粗暴的，因為一開始對於定義函式，然後再進行相應的操作，是比較困難的，這能直接寫for迴圈語句。然後，我們便開始進行相應的爬蟲第一步匯入必要的包 import requests import json header這個的作用在於偽裝成瀏覽器進行操作，有些網...

京東爬取評論簡單分析

def get comment url i 0 while true url str i pagesize 10 isshadowsku 0 fold 1 headers response requests.get url,headers headers comment list re.compil...

python爬取京東評論一

作為乙個爬蟲小白解決問題是十分蛋疼的皿就這幾行我折磨了一下午，然後我發現，學習最大的難題是學習資源獲取的途徑並不是本身，只要學，任何人都能學會開發者選項 3.知道了這個是相應的請求了，去headers弄url,去看看怎麼樣複製url 4下面開始搞 import requests imp...

京東評論爬取並寫入sqlite資料庫

python爬取京東評論

京東爬取評論簡單分析

python爬取京東評論 一

相關推薦

python爬取京東評論一