?爬取圖片方法一:?requests
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-
import re
import requests
import os
import shutil
suyan_url= 'http://www.xiaohuar.com/s-1-2069.html'
response = requests.get(suyan_url)
# print(response.content)
lists = re.findall(r'href="(?:.*?)".*?class="(?:.*?)"',response.text,re.S) #re.S 把文本信息轉換成1行匹配
folder = 'aabb'
if os.path.exists(folder):
shutil.rmtree(folder)
os.mkdir(folder);
os.chdir(folder);
for each in lists:
imgurl = each.split(' ')[0][6:-1]
if imgurl[-4:] == '.jpg':
filename = imgurl.split('/')[-1]
img = requests.get(imgurl)
with open(filename,'wb') as f:
f.write(img.content)
?
爬取圖片方法二:urllib.request
import urllib.request
import os
import random
import shutil
def url_open(url):
req = urllib.request.Request(url)
#模擬瀏覽器登陸 獲取
req.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36')
proxies = ['124.207.82.166:8008', '218.89.14.142:8060', '49.64.86.43:8080', '101.231.104.82:80']
proexy = random.choice(proxies)
# 使用代理ip訪問
proxy_support = urllib.request.ProxyHandler({'http':proexy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
response = urllib.request.urlopen(req)
html = response.read()
return html
def get_page(url):
html = url_open(url).decode('utf-8')
a = html.find('current-comment-page') + 23
b = html.find(']', a)
# print(html[a:b])
print(url)
return html[a:b]
def find_imgs(url):
html = url_open(url).decode('utf-8')
img_addrs = [];
a = html.find('img src=')
while a != -1:
b = html.find('.jpg', a, a+255)
if b != -1:
img_addrs.append('http:' + html[a+9:b+4])
else:
b = a + 9
a = html.find('img src=', b)
for each in img_addrs:
print(each)
return img_addrs
def save_imgs(folder, img_addrs):
idx = 0
for each in img_addrs:
filename = each.split('/')[-1]
idx += 1
with open(filename, 'wb') as f:
print('-------' + str(idx))
img = url_open(each)
f.write(img)
def download_mm(folder='HHAA', pages=10):
if os.path.exists(folder):
shutil.rmtree(folder) #如果存在這個文件夾,則刪除文件夾 并 刪除里面的文件
os.mkdir(folder) #創建一個文件夾
os.chdir(folder) #將工作目錄定位到當前的文件夾
print(os.path)
url = 'http://jandan.net/ooxx/'
page_num = int(get_page(url))
for i in range(pages):
page_num -= i
page_url = url + 'page-' + str(page_num) + '#comments'
print(page_url)
img_addrs = find_imgs(page_url)
save_imgs(folder, img_addrs)
if __name__ == '__main__':
download_mm()
?
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
