python爬蟲 爬取貓眼電影資料

2021-08-22 08:25:37 字數 2675 閱讀 7797

# 定義乙個函式獲取貓眼電影的資料​

import requests

def main():

url = url = ''

html = requests.get(url).text

print(html)

if __name__ == '__main__':

main()

# 利用正則匹配,獲得我們想要的資訊

"""

< dd >

< i class ="board-index board-index-10">10

< a href = "/films/2760" title = "魂斷藍橋" class ="image-link" data-act="boarditem-click"

data-val="" >

< img src = "" alt = "" class ="poster-default" / >

< img data - src = ""

alt = "魂斷藍橋" class ="board-img" / >< / a >

< div class ="board-item-main" >

< div class ="board-item-content" >

< div class ="movie-item-info" >

< p class ="name" > < a href="/films/2760" title="魂斷藍橋"

data-act="boarditem-click" data-val="" > 魂斷藍橋 < / a > < / p >

< p class ="star" >主演:費雯·麗, 羅伯特·泰勒, 露塞爾·沃特森< / p >

< div class ="movie-item-number score-num" >

< p class ="score" > < i class ="integer" > 9. < / i > < i class ="fraction" > 2 < / i > < / p >

< / div >< / div >< / div >

< / dd >

"""

import re

reg = r'.*?>(.*?).*?data-src="(.*?)".*?title="(.*?)".*?主演:(.*?)

.*?' \

reg = re.compile(reg, re.s)

items = re.findall(reg, html)

print(items)

# 迴圈遍歷列表並且把列表轉換為字典

for item in items:

index = item[0]

image = item[1]

title = item[2]

actor = item[3]

time = item[4]

score = item[5] + item[6]

dict1 =

print(dict1)

# 把獲得的資料儲存在檔案中

import json

with open('result.txt', 'a', encoding='utf-8') as f:

f.write(json.dumps(dict1, ensure_ascii=false))

# 利用迴圈獲取貓眼電影所有資料

def main():

for i in range(10):

url = '' + str(i*10)

# 最後**整理如下

import json

import re

from time import sleep

import requests

def main():

for i in range(10):

url = '' + str(i * 10)

html = requests.get(url).text

reg = r'.*?>(.*?).*?data-src="(.*?)".*?title="(.*?)"'

r'(.*?).*?fraction.*?>(.*?).*?'

reg = re.compile(reg, re.s)

items = re.findall(reg, html)

for item in items:

# print(item)

index = item[0]

image = item[1]

title = item[2]

actor = item[3]

time = item[4]

score = item[5] + item[6]

dict1 =

sleep(1)

with open('result.txt', 'a', encoding='utf-8') as f:

f.write(json.dumps(dict1, ensure_ascii=false))

if __name__ == '__main__':

main()

python爬蟲基礎爬取貓眼電影

import requests from requests.exceptions import requestexception from sqlalchemy import create engine from lxml import etree import pandas as pd impor...

Python爬取貓眼電影

不多說,直接上 import requests import re import random import pymysql import time 連線資料庫 db pymysql.connect host localhost port 3306,user root passwd a db pyt...

爬取貓眼電影

有乙份工作需要我列出兩個電影院的每天電影排期資訊,我不想每次都要去貓眼上覆制貼上。所以做了個爬蟲 功能 能夠知道每天的電影排期資訊 使用限制 只能在當天使用,不能在前一晚上使用,後面我會再考慮修改 coding utf 8 import requests import re from bs4 imp...