初學Python爬蟲時都會從最簡單的方法開始,以下為幾種常見的基礎做法。
"""
簡單的循環處理
"""
import
requests
url_list = [
"https://www.baidu.com"
,
"https://www.cnblogs.com/"
]
for
url
in
url_list:
result = requests.get(url)
print
(result.text)
"""
線程池處理
"""
import
requests
from
concurrent.futures
import
ThreadPoolExecutor
def
fetch_request
(
url
):
result = requests.get(
url
)
print
(result.text)
url_list = [
"https://www.baidu.com/"
,
"https://www.cnblogs.com/"
]
pool = ThreadPoolExecutor(
10
)
for
url
in
url_list:
# 線程池中獲取線程,執行fetch_request方法
pool.submit(fetch_request
,
url)
# 關閉線程池
pool.shutdown()
"""
線程池+回調函數
"""
import
requests
from
concurrent.futures
import
ThreadPoolExecutor
def
fetch_async
(
url
):
response = requests.get(
url
)
return
response
def
callback
(
future
):
print
(
future
.result().text)
url_list = [
"https://www.baidu.com/"
,
"https://www.cnblogs.com/"
]
pool = ThreadPoolExecutor(
10
)
for
url
in
url_list:
v = pool.submit(fetch_async
,
url)
# 調用回調函數
v.add_done_callback(callback)
pool.shutdown()
"""
進程池處理
"""
import
requests
from
concurrent.futures
import
ProcessPoolExecutor
def
fetch_requst
(
url
):
result = requests.get(
url
)
print
(result.text)
url_list = [
"https://www.baidu.com/"
,
"https://www.cnblogs.com/"
]
if
__name__ ==
'__main__'
:
pool = ProcessPoolExecutor(
max_workers
=
10
)
for
url
in
url_list:
pool.submit(fetch_requst
,
url)
pool.shutdown()
"""
進程池+回調函數
"""
import
requests
from
concurrent.futures
import
ProcessPoolExecutor
def
fetch_async
(
url
):
response = requests.get(
url
)
return
response
def
callback
(
future
):
print
(
future
.result().text)
url_list = [
"https://www.baidu.com/"
,
"https://www.cnblogs.com/"
]
if
__name__ ==
'__main__'
:
pool = ProcessPoolExecutor(
10
)
for
url
in
url_list:
v = pool.submit(fetch_async
,
url)
v.add_done_callback(callback)
pool.shutdown()
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061
微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元

