python3 爬蟲繼續爬筆趣閣 ,,,,,,,

2022-03-11 12:02:52 字數 2238 閱讀 4115

學如逆水行舟,不進則退

今天想看**..找了半天,沒有資源..

只能自己爬了

想了半天.,,,忘記了這個古老的技能

撿了一下 

import requests

from

bs4 import beautifulsoup

cookies =

headers =

response = requests.get('

', headers=headers, cookies=cookies)

# print(response.text)

class downloder(object

): def __init__(self):

self.server = '

'self.target = '

/1_1094/

'self.names = #存放章節名字

self.urls = #存放章節鏈結

self.nums = 0

# 章節數量

def get_download_url(self):

req = requests.get('

', headers=headers, cookies=cookies)

html =req.text

# print(html)

div_bf =beautifulsoup(html)

div = div_bf.find_all('

div',id='

list')

a_bf = beautifulsoup(str(div[0

])) a = a_bf.find_all('a'

)

for each in

a:string)'

'+each.get('

href'))

self.nums =len(a)

def writer(self, name, path, text):

write_flag =true

with open(path, 'a

', encoding='

utf-8

') as

f: f.write(name + '\n'

) f.writelines(text)

f.writelines(

'\n\n')

def get_contents(self, target):

req = requests.get(url=target)

html =req.content

# print(

'html

',html)

bf =beautifulsoup(html)

texts = bf.find_all('

div', id='

content')

texts=str(texts[0]).replace('

','\n')

# print(

'texts

',texts)

# texts = texts[0].text.replace('

', '

\n\n')

# texts = texts[0].text.replace('

', '

\n\n')

# texts = texts[0].text.replace('

', '

\n\n')

# texts = texts[0].text.replace('

', '

\n\n')

return

texts

if __name__ == '

__main__':

dl =downloder()

dl.get_download_url()

# print(dl.urls)

print(dl.nums)

print(''

)

for i in

range(dl.nums):

dl.writer(dl.names[i],

'用點.txt

', dl.get_contents(dl.urls[i]))

print('第

'+str(i)+'')

print(

"")

不是什麼難的東西....

筆趣閣小說 python3爬蟲例項

import urllib.request import re from bs4 import beautifulsoup as bs def urlopen url req urllib.request.request url html urllib.request.urlopen req htm...

Python爬蟲 筆趣閣小說爬取

import requests from lxml import etree以 我有百萬技能點 為例,在筆趣閣搜尋進入目錄頁,複製目錄頁url 對目錄頁的每個章節的url進行爬取,分析網頁利用xpath定位每個章節的url然後進行爬取,然後重新構造url。目錄每一章節的url href html e...

初級爬蟲爬取筆趣閣小說

import requests from pyquery import pyquery as pq def get content a response requests.get a response.encoding gbk doc pq response.text text doc conten...