Python爬蟲例項，爬取小說

import pprint
import requests
from bs4 import beautifulsoup
# 獲取原始碼
defget_source
(url)
: r = requests.get(url)
if r.status_code !=
200:
print
(r.status_code+
"錯誤"
)raise exception(
"error"
) source = r.text
return source
url =
""html = get_source(url)
# 解析出章節名以及連線（目錄）
defparse_title_href
(html_source)
: soup = beautifulsoup(html_source,
"html.parser"
)# 獲取class為chapter的ul標籤中的所有li標籤
lis = soup.find(
"ul"
, class_=
"chapter"
).find_all(
"li"
) datas =
for li in lis:
title_node = li.find(
"a")
# 獲取章節鏈結
title = title_node.get_text(
)# 獲取章節名
link =
""+title_node[
"href"
]# 補全章節連線
)return datas
# 解析正文
defparse_body
(link_html)
: soup = beautifulsoup(link_html,
"html.parser"
)# 從class和id為nr_title的div中獲取章節名
title = soup.find(
"div"
, class_=
"nr_title",id
="nr_title"
).get_text(
)# get_text()只獲取文字內容
# 從id為nr1的div中獲取正文
txt = soup.find(
"div",id
="nr1"
)# 將正文轉為str字串型別
s =str(txt)
# 將正文本串中所有
標籤替換為\n換行符
# 別問，問就是不知道
s_replace = s.replace(''
,"\n"
)while
true
: index_begin = s_replace.find(
"<"
) index_end = s_replace.find(
">"
, index_begin +1)
if index_begin ==-1
:break
s_replace = s_replace.replace(s_replace[index_begin:index_end+1]
,"")# 將str物件s_replace轉為object型別,同時將標題(title)和內容(s_replace)組合
sb =
bytes
(title+
'\n'
+s_replace+
'\n\n'
, encoding=
"utf8"
)# 以可追加二進位制方式寫入檔案，a：可追加，b：二進位製流
with
open
("e:\dk\滄元圖.txt"
,"ab"
)as ob:
ob.write(sb)
txts = parse_title_href(html)
# 遍歷章節鏈結列表
for i in
range
(len
(txts)):
# 倒序讀取章節鏈結列表
link_html = txts[
len(txts)
-i-1
]# 獲取章節位址
# 爬取正文
"第"+
str(i+1)
+"章爬取結束"
)

python爬蟲例項之多執行緒爬取小說

之前寫過一篇爬取的部落格，但是單執行緒爬取速度太慢了，之前爬取一部花了700多秒，1秒兩章的速度有點讓人難以接受。所以弄了個多執行緒的爬蟲。這次的思路和之前的不一樣，之前是一章一章的爬，每爬一章就寫入一章的內容。這次我新增加了乙個字典用於存放每章爬取完的內容，最後當每個執行緒都爬取完之後，再將所...

爬蟲之小說爬取

以筆趣閣為例，爬取一念永恆這本具體如下 1 from bs4 import beautifulsoup 2from urllib import request 3import requests 4importre5 import sys6 def down this chapter chapt...

爬取小說的簡易python爬蟲

學習一段時間的python之後決定寫些東西剛好自己喜歡看就像寫一段爬取的爬蟲，這裡以筆趣閣的為例。我發現筆趣閣的每個的目錄源基本都包含其所有的章節的url，所以這段是先獲取所有的url然後逐頁獲取其文字 import requests 這裡以天地霸氣訣為例 import re imp...

Python爬蟲例項，爬取小說

python爬蟲例項之 多執行緒爬取小說

爬蟲之小說爬取

爬取小說的簡易python爬蟲

相關推薦

python爬蟲例項之多執行緒爬取小說