python反反爬,爬取貓眼評分

2022-08-30 08:54:10 字數 4019 閱讀 8773

.
用fontcreator開啟base.woff.檢視對應字型關係

初始化時將對應關係寫入字典中。

1

#!/usr/bin/env python2#

coding:utf-83#

__author__ = "南樓"45

6import

requests

7importre8

importos9

10from fonttools.ttlib import

ttfont

1112#13

class

maoyan(object):

1415

def__init__

(self):

16 self.url = '

'17 self.headers =

20 self.base_num = {} #

編號—數字

21 self.base_obj = {} #

編號—物件22#

23 self.base_font_file = ttfont('

./fonts/base.woff')

24#25 self.base_num["

unif3ba

"] = "0"

26 self.base_num["

unif2a9

"] = "1"

27 self.base_num["

unie6a5

"] = "2"

28 self.base_num["

unif680

"] = "3"

29 self.base_num["

unie69c

"] = "4"

30 self.base_num["

unie710

"] = "5"

31 self.base_num["

unie07d

"] = "6"

32 self.base_num["

unie5a7

"] = "7"

33 self.base_num["

uniec7a

"] = "8"

34 self.base_num["

unie2a3

"] = "9"

3536

for key in

self.base_num:

37 self.base_obj[key] =self.base_font_file['

glyf

'][key]

3839

defbaseobj(self):

40for key in

self.base_num:

4142 self.base_obj[key] =self.base_font_file['

glyf

'][key] #

獲得woff內編號對應的字型物件

43return

self.base_obj

4445

#傳送請求獲得響應

46def

get_html(self, url):

47 response = requests.get(url, headers=self.headers)

48return

response.content

4950

defcreate_font(self, re_font_file):51#

52 file_list = os.listdir('

./fonts')

53#54if re_font_file not

infile_list:

5556

print('

', re_font_file)

57 url = '

' +re_font_file

58 new_file =self.get_html(url)

59 with open('

./fonts/

' + re_font_file, 'wb'

) as f:

60f.write(new_file)

6162

#開啟字型檔案,建立 self.font_file屬性

63 self.font_file = ttfont('

./fonts/

' +re_font_file)

6465

defget_num_from_font_file(self, re_star):

6667 newstar = re_star.upper().replace("

", "

uni"

)68 realnum = newstar.replace("

;", ""

)69 numlist = realnum.split("."

)70#gly_list = self.font_file.getglyphorder() #uni列表['glyph00000', 'x', 'unif680', 'unie2a3', 'unie710', 'unie69c', 'uniec7a', 'unif2a9', 'unie5a7', 'unie07d', 'unie6a5', 'unif3ba']

71 star_rating =

72for hax_num in

numlist:

73 font_file_num = self.font_file['

glyf

'][hax_num]

74for key in

self.baseobj():

75if font_file_num ==self.base_obj[key]:

7677

#星級評分待優化,暫不支援10.0,

78 star_rating = star_rating[0]+"

."+star_rating[1]

79return

star_rating

8081

defstart_crawl(self):

82 html = self.get_html(self.url).decode('

utf-8')

8384

#正則匹配字型檔案

85 re_font_file = re.findall(r'

vfile\.meituan\.net\/colorstone\/(\w+\.woff)

', html)[0]

86self.create_font(re_font_file)87#

正則匹配星級評分

88 re_star_rating = re.findall(r'

\s+(.*?)\s+

', html)[0]

89 star_rating =self.get_num_from_font_file(re_star_rating)

90print("

星級評分:

", star_rating)

9192

93if

__name__ == '

__main__':

9495 m =maoyan()

96 m.start_crawl()

Python爬取貓眼電影

不多說,直接上 import requests import re import random import pymysql import time 連線資料庫 db pymysql.connect host localhost port 3306,user root passwd a db pyt...

爬取貓眼電影

有乙份工作需要我列出兩個電影院的每天電影排期資訊,我不想每次都要去貓眼上覆制貼上。所以做了個爬蟲 功能 能夠知道每天的電影排期資訊 使用限制 只能在當天使用,不能在前一晚上使用,後面我會再考慮修改 coding utf 8 import requests import re from bs4 imp...

python爬取貓眼電影排行

完整的 如下在這裡 閒著沒事,把解析html中的正則方法改用了xpath與beautifulsoup,只能說各有各的優點吧。正則的話,提取資訊可以連貫,一次性提取出所有需要的資訊,當然前提是你的正則式子沒有寫錯,所以說正則寫起來相比xpath與beautifulsoup來說要複雜一下,提取出錯後,除...