(現在人工智能非常火爆,很多朋友都想學,但是一般的教程都是為博碩生準備的,太難看懂了。最近發現了一個非常適合小白入門的教程,不僅通俗易懂而且還很風趣幽默。所以忍不住分享一下給大家。
? 點這里https://www.cbedai.net/ialexanderi可以跳轉到教程。)
在某些應用場景下,想要提高python的并發能力,可以使用多線程,或者協程。比如網絡爬蟲,數據庫操作等一些IO密集型的操作。下面對比python單線程,多線程和協程在網絡爬蟲場景下的速度。
一,單線程。
單線程代?
1?#!/usr/bin/env
?
2 #?coding:utf8
3 # Author: hz_oracle
4
5 import MySQLdb
6 import gevent
7 import requests
8 import time
9
10
11 class DbHandler(object):
12 def __init__(self, host, port, user, pwd, dbname):
13 self.host = host
14 self.port = port
15 self.user = user
16 self.pwd = pwd
17 self.db = dbname
18
19 def db_conn(self):
20 try:
21 self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
22 self.cursor = self.conn.cursor()
23 return 1
24 except Exception as e:
25 return 0
26
27 def get_urls(self, limitation):
28 sql = """select pic from picurltable limit %s""" % limitation
29 urls_list = list()
30 try:
31 self.cursor.execute(sql)
32 fetchresult = self.cursor.fetchall()
33 for line in fetchresult:
34 urls_list.append(line[0])
35 print len(urls_list)
36 except Exception as e:
37 print u"數據庫查詢失敗:%s" % e
38 return []
39 return urls_list
40
41 def db_close(self):
42 self.conn.close()
43
44
45 def get_pic(url):
46 try:
47 pic_obj = requests.get(url).content
48 except Exception as e:
49 print u"圖片出錯"
50 return ""
51 filename = url.split('/')[-2]
52 file_path = "./picture/" + filename + '.jpg'
53 fp = file(file_path, 'wb')
54 fp.write(pic_obj)
55 fp.close()
56 return "ok"
57
58
59 def main():
60 start_time = time.time()
61 db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
62 db_obj.db_conn()
63 url_list = db_obj.get_urls(100)
64 map(get_pic, url_list)
65 #for url in url_list:
66 # get_pic(url)
67 end_time = time.time()
68 costtime = float(end_time) - float(start_time)
69 print costtime
70 print "download END"
71
72 if __name__ == "__main__":
73 main()
運行結果
100
45.1282339096
download END
單線程情況下,下載100張圖片花了45秒。
再來看多線程的情況下。
#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle
import MySQLdb
import gevent
import requests
import time
import threading
import Queue
lock1 = threading.RLock()
url_queue = Queue.Queue()
urls_list = list()
class DbHandler(object):
def __init__(self, host, port, user, pwd, dbname):
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = dbname
def db_conn(self):
try:
self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
self.cursor = self.conn.cursor()
return 1
except Exception as e:
return 0
def get_urls(self, limitation):
sql = """select pic from picurltable limit %s""" % limitation
try:
self.cursor.execute(sql)
fetchresult = self.cursor.fetchall()
for line in fetchresult:
url_queue.put(line[0])
except Exception as e:
print u"數據庫查詢失敗:%s" % e
return 0
return 1
def db_close(self):
self.conn.close()
class MyThread(threading.Thread):
def __init__(self):
super(MyThread, self).__init__()
def run(self):
url = url_queue.get()
try:
pic_obj = requests.get(url).content
except Exception as e:
print u"圖片出錯"
return ""
filename = url.split('/')[-2]
file_path = "./picture/" + filename + '.jpg'
fp = file(file_path, 'wb')
fp.write(pic_obj)
fp.close()
def main():
start_time = time.time()
db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
db_obj.db_conn()
db_obj.get_urls(100)
for i in range(100):
i = MyThread()
i.start()
while True:
if threading.active_count()<=1:
break
end_time = time.time()
costtime = float(end_time) - float(start_time)
print costtime
print "download END"
if __name__ == "__main__":
main()
?
運行結果
15.408192873
download END
?
啟用100個線程發現只要花15秒即可完成任務,100個線程可能不是最優的方案,但較單線程有很明顯的提升 。接著再來看協程。
?
?
協程代碼
#!/usr/bin/env python
# coding:utf8
# Author: hz_oracle
import MySQLdb
import requests
import time
import threading
import Queue
from gevent import monkey; monkey.patch_all()
import gevent
class DbHandler(object):
def __init__(self, host, port, user, pwd, dbname):
self.host = host
self.port = port
self.user = user
self.pwd = pwd
self.db = dbname
def db_conn(self):
try:
self.conn = MySQLdb.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db, charset="utf8")
self.cursor = self.conn.cursor()
return 1
except Exception as e:
return 0
def get_urls(self, limitation):
urls_list = list()
sql = """select pic from picurltable limit %s""" % limitation
try:
self.cursor.execute(sql)
fetchresult = self.cursor.fetchall()
for line in fetchresult:
urls_list.append(line[0])
except Exception as e:
print u"數據庫查詢失敗:%s" % e
return []
return urls_list
def db_close(self):
self.conn.close()
def get_pic(url):
try:
pic_obj = requests.get(url).content
except Exception as e:
print u"圖片出錯"
return ""
filename = url.split('/')[-2]
file_path = "./picture/" + filename + '.jpg'
fp = file(file_path, 'wb')
fp.write(pic_obj)
fp.close()
return "ok"
def main():
start_time = time.time()
db_obj = DbHandler(host='127.0.0.1', port=3306, user='root', pwd='123456', dbname='pic')
db_obj.db_conn()
url_list = db_obj.get_urls(100)
gevent.joinall([gevent.spawn(get_pic,url) for url in url_list])
end_time = time.time()
costtime = float(end_time) - float(start_time)
print costtime
print "download END"
if __name__ == "__main__":
main()
?
運行結果
10.6234440804
download END
?
使用協程發現只花了10秒多,也就是三種方法中最快的。
?
總結:
? ? ? ? 三種方法中,單線程最慢,多線程次之,而協程最快。 不過如果對多線程進行優化,也可能變快,這里不討論。
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061
微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元

