Python 多執行緒佇列爬蟲

python+多執行緒+佇列，爬蟲例子

# -*- coding: utf-8-*-
import urllib2
import urllib
import json
import time
import datetime
import threading
import queue
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )
def get_response(url):
for a in range(3):
try:
request = urllib2.request(url)
response = urllib2.urlopen(request)
result= response.read()
return result
except exception,e:
print e
time.sleep(2)
continue
class threadcity(threading.thread):
def __init__(self,queue_zq_citys):
threading.thread.__init__(self)
self.queue_zq_citys=queue_zq_citys
def run(self):
sql = 'select cityid,cname from table '
citylist=dbhelper.sqlhelper.ms.execquery(sql)
for c in citylist:
try:
#根據搜尋城市名稱獲取城市
cjson=json.loads(result.decode('gb2312', 'ignore')) #json格式字串轉換為python物件
cityid=cjson["id"]
cityname=cjson["cname"]
#加入佇列
self.queue_zq_citys.put()
time.sleep(1)
except exception,e:
pass
class threadcitydb(threading.thread):
def __init__(self, queue_zq_citys):
threading.thread.__init__(self)
self.queue_zq_citys = queue_zq_citys
def run(self):
while true:
try:
if self.queue_zq_citys.empty(): #隊列為空
pass
else:
citys=self.queue_zq_citys.get() #從佇列中取出資料
if citys is not none:
sql = "insert into table(cityid,cityname) values(%s,'%s')" % (
citys['cityid'], citys['cityname'])
#print sql
dbhelper.sqlhelper.ms.execnonquery(sql.encode('utf-8'))
self.queue_zq_citys.task_done() #告訴執行緒我完成了這個任務 是否繼續join阻塞 讓執行緒向前執行或者退出
else:
pass
except exception,e:
pass
def main():
try:
queue_zq_citys=queue.queue() # 例項化存放抓取到的城市佇列
#建立執行緒
city=threadcity(queue_zq_citys) #抓取執行緒 入隊操作
citydb=threadcitydb(queue_zq_citys) #出隊操作 存入資料庫
#啟動執行緒
city.start()
citydb.start()
#阻塞等待子執行緒執行完畢後再執行主線程
city.join()
citydb.join()
except exception,e:
pass
if __name__ == '__main__':
main()

python爬蟲多執行緒爬蟲

在進行爬蟲工作的時候，考慮到爬蟲執行的速度慢，那麼怎樣提公升爬蟲的速度呢，那麼就得使用多執行緒爬蟲了，接下來我以糗事百科段子的爬取進行對多執行緒爬蟲的概述 github鏈結鏈結一不使用多執行緒爬取糗事百科 1.上 import urllib.request import re headers f...

python多執行緒爬蟲

先記錄一下，普通的糗事百科爬蟲 import urllib.request import re import time import urllib.error headers user agent mozilla 5.0 windows nt 10.0 win64 x64 rv 63.0 gecko...

python多執行緒爬蟲

python多執行緒爬蟲 python單執行緒爬蟲對於應付小規模資料是可以的，但是面對大量資料，我們就要用到多執行緒爬蟲技術。使用多執行緒，一方面可能會加快效率，另一方面可以施加一些小技巧，如不同的執行緒使用不同的 ip從而避免出發反爬機制。python 多執行緒 python的多執行緒可以用thr...

Python 多執行緒 佇列爬蟲

python爬蟲 多執行緒爬蟲

python多執行緒爬蟲

python多執行緒爬蟲

相關推薦

Python 多執行緒佇列爬蟲

python爬蟲多執行緒爬蟲