python爬蟲實戰 爬取汽車之家上車型價格

2021-08-26 05:51:29 字數 4884 閱讀 9043

import pymysql

import pymysql.cursors

from bs4 import beautifulsoup

import requests

import random

import time

from selenium import webdriver

from selenium.webdriver.common.by import by

from selenium.webdriver.support.ui import webdriverwait

from selenium.webdriver.support import expected_conditions as ec

import codecs

from selenium.common.exceptions import timeoutexception

cars =

conn = pymysql.connect(host=

'*******'

,charset=

'utf8'

,user=

*******

',passwd='

****

*',db='mysql',cursorclass=pymysql.cursors.dictcursor)

try:

cur = conn.cursor(

) cur.execute(

"use data_etl"

) cur.execute(

"select distinct(car_id),car_name from user_car_port"

) item = cur.fetchone(

) count =

0while item is

notnone

: count+=

1 item = cur.fetchone(

)print

(count)

finally

: conn.close(

)

driver = webdriver.chrome(

'chromedriver.exe'

)

def

getcarpriceoffsale

(innerhtml)

: button =

0.0 top =

0.0print

("此車型已經停售!"

) bsobj = beautifulsoup(innerhtml)

try:

spanprice = bsobj.findall(

"span",)

[0]if spanprice is

notnone

: strongprice = spanprice.find(

"strong",)

if strongprice is

notnone

: text = strongprice.text

if text is

notnone

: prices = text.split(

"-")

prices = text.split(

"-")

prices[0]

= prices[0]

.replace(

"萬","")

prices[0]

= prices[0]

.replace(

"元","")

button =

float

(prices[0]

)if(len

(prices)==2

):prices[1]

= prices[1]

.replace(

"萬","")

prices[1]

= prices[1]

.replace(

"元","")

top =

float

(prices[1]

)else

: top = button

else

:print

("**欄位為空"

)else

:print

("**strong為空"

)else

:print

("**span為空"

)except exception:

print

("程式出錯!停售車型"

)return button,top

def

getcarpriceonsale

(innerhtml)

: button =

0.0 top =

0.0print

("此車型在售"

) bsobj = beautifulsoup(innerhtml)

try:

ddprice = bsobj.findall(

"dd")[

0]if ddprice is

notnone

: a = ddprice.find(

"a",

)if a is

notnone

: text = a.text

prices = text.split(

"-")

prices[0]

= prices[0]

.replace(

"萬","")

prices[0]

= prices[0]

.replace(

"元","")

button =

float

(prices[0]

)if(len

(prices)==2

):prices[1]

= prices[1]

.replace(

"萬","")

prices[1]

= prices[1]

.replace(

"元","")

top =

float

(prices[1]

)else

: top = button

else

:print

("此車型暫時無法查詢**"

)except exception:

print

("程式出錯!在售車型"

)return button,top

def

getcarprice

(carid)

: button =

0.0 top =

0.0try

: driver.get(url+

str(carid)

) wait = webdriverwait(driver,5)

.until(ec.presence_of_element_located(

(by.class_name,

"information-summary"))

) ele = driver.find_element_by_class_name(

"information-price"

).get_attribute(

'innerhtml'

) button,top=getcarpriceonsale(ele)

except timeoutexception:

try:

wait = webdriverwait(driver,5)

.until(ec.presence_of_element_located(

(by.class_name,

"car_price"))

) ele = driver.find_element_by_class_name(

"car_price"

).get_attribute(

'innerhtml'

) button,top=getcarpriceoffsale(ele)

except timeoutexception:

print

("此車型有問題:"

+str

(carid)

)return button,top

for car in cars:

id= car[

"car_id"

] time.sleep(random.randint(1,

5)) button,top = getcarprice(id)

if button ==

0.0and top ==

0.0:

car[

"button"]=

9999

car[

"top"]=

9999

else

: car[

"button"

]= button

car[

"top"

]= top

Python實戰爬蟲 爬取段子

不管三七二十一我們先導入模組 段子所在的 import re import requests 如果沒這模組執行cmd pip install requests領域 web開發,爬蟲,資料分析,資料探勘,人工智慧 零基礎到專案實戰,7天學習上手做專案 獲取 的內容 段子所在的 import re im...

python爬蟲實戰 爬取豆瓣影評資料

爬取豆瓣影評資料步驟 1 獲取網頁請求 2 解析獲取的網頁 3 提速資料 4 儲存檔案 1 匯入需要的庫 import urllib.request from bs4 import beautifulsoup 隨機數的庫 import random 時間庫 import time 庫 import ...

Python爬蟲實戰之爬取鏈家廣州房價 03儲存

系列目錄 python爬蟲實戰之爬取鏈家廣州房價 01簡單的單頁爬蟲 python爬蟲實戰之爬取鏈家廣州房價 02把小爬蟲變大 這一小節主要講一下前面一直沒有實現的儲存,儲存主要分兩大類 檔案和資料庫。結合這次爬蟲的資料量及後期分析的需要,這次主要介紹sqlite。通過對sqlite資料庫的封裝,處...