python爬取陽光電影儲存mysql

2021-08-25 02:57:53 字數 1641 閱讀 9892

import requests

from lxml import etree

import re

import pymysql

# 定義url

headers =

# 連線資料庫

db = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='123456', database='python', charset='utf8')

# 建立游標

cursor = db.cursor()

# 獲取三次分頁

for p in range(3):

url = ''

# 迴圈一次 頁碼加一

# print(p)

p+=1

url = url.format(p)

print(url)

# 傳送請求

req = requests.get(url,headers=headers)

# 強制定義編碼

req.encoding ='gb2312'

# 格式化資料

response = req.text

# print(response)

# with open('dianying.html','wb' ) as f:

# f.write(req.content)

# 獲取網頁的obj

html_obj = etree.html(response)

# 定位資料

html_list = html_obj.xpath('//div[@class="co_content8"]/ul/td/table/tr[2]/td[2]/b/a/@href')

# print(html_list)

# 迴圈列表 拿出每條資料

for i in html_list:

# print(i)

# 拼接url

url_b = ''.format(i)

# print(url)

# 傳送請求

req = requests.get(url_b, headers=headers)

# with open('dianying.html', 'wb') as f:

# f.write(response.content)

# 定義編碼

req.encoding = 'gb2312'

# 轉換格式

response = req.text

# 正則匹配

href =re.search(r'href="(.*)">, response).group(1)

title =re.search(r'(.*)

', response).group(1)

# print(href)

print(title)

sql = '''insert into dianying values (null,'{}','{}')'''.format(title,href)

cursor.execute(sql)

db.commit()

cursor.close()

db.close()

Python爬取貓眼電影

不多說,直接上 import requests import re import random import pymysql import time 連線資料庫 db pymysql.connect host localhost port 3306,user root passwd a db pyt...

Python爬取電影天堂資源

from urllib import request,parse from lxml import etree import requests,re url1 req1 request.request url1 response1 request.urlopen req1 html1 respons...

python爬取貓眼電影排行

完整的 如下在這裡 閒著沒事,把解析html中的正則方法改用了xpath與beautifulsoup,只能說各有各的優點吧。正則的話,提取資訊可以連貫,一次性提取出所有需要的資訊,當然前提是你的正則式子沒有寫錯,所以說正則寫起來相比xpath與beautifulsoup來說要複雜一下,提取出錯後,除...