python爬取智聯招聘資訊

2021-08-20 17:38:58 字數 1914 閱讀 4433

分享今天寫的乙個爬取智聯招聘資訊的爬蟲,使用了requests和re模組,沒有寫注釋,但是**都比較簡單,不是太難,

這是爬取的資訊:

# -*- coding: utf-8 -*-

import requests

import re

from itertools import izip

from json import dumps

from urllib import quote

headers =

def write_file(all_info):

fp = open('info.json', 'a')

for info in all_info:

fp.write(dumps(info, encoding='utf-8', ensure_ascii=false, sort_keys=false, indent=4))

fp.write('\n')

fp.close()

def get_html(work, where, page_num=1):

where = quote(where) # 北京

work = quote(work)

url = '' % (where, work, page_num)

response = requests.get(url, headers=headers).text

return response

def get_info(response):

one_work_info = dict()

salary = re.findall(r'

(.*?)

', response)

work_locate = re.findall(r'

(.*?)

', response)

company = re.findall(r'

(.*?)

', response)

work_name = re.findall(r'(.*?)', response)

work_paticuler_info = re.findall(r'', response)

company_info = re.findall(r'

', response)

for salary, locate, company, work,work_info, company_infoin \

izip(salary,work_locate,company, work_name, work_paticuler_info, company_info):

one_work_info['salary'] =salary

one_work_info['work_locate'] =locate

one_work_info['company'] =re.sub(r'<.>', '',company)

one_work_info['work_name'] =re.sub(r'<.>', '',work)

one_work_info['work_paticuler_info'] = work_info

one_work_info['company_info'] =company_info

yield one_work_info

def run():

work = raw_input(u"請輸入你想要查詢的工作:")

where = raw_input(u"請輸入你要查詢的工作地點:")

html = get_html(work,where)

work_info = get_info(html)

write_file(work_info)

if __name__ == '__main__':

run()

python爬取智聯招聘資訊

importrandom importre fromtimeimportsleep importrequests fromtqdmimporttqdm importuser agents importcsv defget page city,keyword,page 構造請求位址 paras 完整網...

Python爬取智聯招聘職位資訊

from urllib import request from urllib import parse from bs4 import beautifulsoup import csv 管理json資料的模組的 import json 定義智聯的爬蟲類 class zhilianspider obj...

python 爬取智聯招聘

乙個爬取智聯的乙個小爬蟲 python版本 python3.7 依賴模組 selenium pyquery 廢話少說,上 from selenium import webdriver from selenium.webdriver.chrome.options import options from...