爬取台灣地區資訊

2021-08-14 15:33:53 字數 3101 閱讀 2631

#檔名:

from selenium import webdriver

from urllib.parse import quote

from bs4 import beautifulsoup as bs

import requests

import sys

import json

def req(url,data,headers):

res = requests.post(url=url,data=data,headers=headers)

if res.status_code == 200:

return res

else:

req(url, data, headers)

url = ""

headers =

data =

# res= requests.post(url=url,data=data,headers=headers)

res = req(url,data,headers)

result = {}

r_data = {}

city_list =

area_list =

road_list =

if res:

soup = bs(res.text,'lxml')

'''獲取城市列表'''

buttons = soup.findall('button',)

if buttons:

city_list =

print(len(buttons))

for cityname in buttons:

# print('城市列表:',city_list)

print("第二階段開始***********************************")

if city_list:

for city in city_list:

url = ""

data =

headers =

# res_area = requests.post(url=url,data=data,headers=headers)

res_area = req(url, data, headers)

if res_area:

area_list =

# print(res_area.text)

soup2 = bs(res_area.text,'lxml')

buttons2 = soup2.findall('button',)

if buttons2:

for area in buttons2:

if area.text not in '上一步':

result[city] = area_list

road_city = {}

road_data =

if area_list:

for city,areals in result.items():

road_area = {}

for area in result[city]:

url = ""

data =

headers =

# road_res = requests.post(url=url,data=data,headers=headers)

road_res = req(url,data,headers)

if road_res:

# print(road_res.text)

soup3 = bs(road_res.text,'lxml')

buttons3 = soup3.findall('button',)

if buttons3:

road_list =

for road in buttons3:

if road.text not in '上一步':

road_area[area] = road_list

# print('城市{}->{}->街道列表{}:'.format(city,area,road_data))

road_city[city] = road_area

print('第三階段開始********************===')

f = open(r'c:\users\hobart\desktop\台灣地區資訊.json','a+',encoding='utf-8')

item_city = {}

if road_city:

for city,areainf in road_city.items():

item_area = {}

for area,roadinf in areainf.items():

item_road = {}

for road in roadinf:

url = ""

data =

headers =

item_res = requests.post(url=url,data=data,headers=headers)

if item_res:

inf = {}

soup4 = bs(item_res.text, 'lxml')

table = soup4.findall('div', )

if table:

detail = table[0].contents[3].contents

inf['門市服務代號'] = detail[1].text

inf['位址'] = detail[3].text

inf['**'] = detail[5].text

item_road[road] = inf

item_area[area] = item_road

item_city[city] = item_area

print('城市{}資訊已完成'.format(item_city))

r = json.dumps(item_city, ensure_ascii=false)

f.write(r)

print('城市{}資訊寫完'.format(city))

台灣地區為什麼會丟包高?

台灣地區的伺服器由於距離大陸比較近,在實際使用上出現丟包可能性比較小,基本都維持在5 以下,理論上使用體驗是上快的,描建 也能快速載入頁面,然而現在有不少站長發現自己租用的台灣伺服器的丟包軍有點高,如果本身延遲經過測問題不大而丟包奉比較高,那麼收到外部影響的概本相對而言更大,接下來就來簡單介紹一 灣...

富士康回應台灣地區大裁員傳言 假訊息

don程式設計客棧ews7 月 30 日訊息 記者 趙晉傑 據台灣 報道,富士康母公司 鴻海精密 預計將在 9 月份裁員 7000 人,同時還要關停台灣生產線。對此,富士康回應稱,該傳聞純屬子虛烏有的錯誤報導。鴻海方面強調,為了做好提質增效降本減存,實現向工業網際網路的人工智慧轉型公升級,並配合全球...

帖子資訊爬取

coding utf 8 import requests import re j dub 小吧主 13 有的人天之驕子,從選秀便是球隊核心有的人甘作綠葉,一心一意幹好自己的事有的人能力有限,最後只能被聯盟淘汰而有的人 雖有天賦,但是球隊的處境讓他無法讓他的天賦得到兌現,而他們可能在離開球隊後,便得到...