Python如何爬取京東的評(píng)價(jià)信息
模塊:requests,BeautifulSoup
import re
import time
import csv
import requests
from bs4 import BeautifulSoup
def write_a_row_in_csv(data, csv_doc):
"save good information into a row in csv document"
with open(csv_doc, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(data)
# add headers, download page, check status code, return page
url = 'https://search.jd.com/Search?keyword=%E5%8D%8E%E4%B8%BAp20&enc=utf-8&suggest=1.def.0.V13&wq=%E5%8D%8E%E4%B8%BA&pvid=f47b5d05bba84d9dbfabf983575a6875'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"
}
response = requests.get(url, headers=headers)
print(response.status_code)
# save as html document
with open('html.html', 'w', encoding='utf8') as f:
f.write(response.text)
# save as csv document
with open('phone.csv', 'w', newline='') as f:
writer = csv.writer(f)
fields = ('id', '名稱', '價(jià)格', '評(píng)價(jià)人數(shù)', '好評(píng)率')
writer.writerow(fields)
# find elements, such as name, item, price, comment, goodrate, comment count
soup_all = BeautifulSoup(response.content, 'lxml')
sp_all_items = soup_all.find_all('li', attrs={'class': 'gl-item'})
for soup in sp_all_items[:3]:
print('-' * 50)
name = soup.find('div', attrs={'class': 'p-name p-name-type-2'}).find('em').text
print('name: ', name)
item = soup.find('div', attrs={'class': 'p-name p-name-type-2'}).find('a')
print('item: ', item['href'], re.search(r'(\d+)', item['href']).group())
price = soup.find_all('div', attrs={'class': 'p-price'})
print('price:', price[0].i.string)
comment = soup.find_all('div', attrs={'class': 'p-commit'})
print('comment url:', comment[0].find('a').attrs['href'])
time.sleep(0.2)
# need add referer into headers
item_id = re.search(r'(\d+)', item['href']).group()
url = f'https://sclub.jd.com/comment/productPageComments.action?productId={item_id}&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1'
headers = {
"referer": f"https://item.jd.com/{item_id}.html",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0"
}
response = requests.get(url, headers=headers)
with open('html.json', 'w', encoding='utf8') as f:
f.write(response.text)
data = response.json()
comment_count = data['productCommentSummary']['commentCount']
print('評(píng)價(jià)人數(shù):', comment_count)
good_rate = data['productCommentSummary']['goodRate']
print('好評(píng)率:', good_rate)
# record data into CSV sheet
write_a_row_in_csv(('id'+item_id, name, price[0].i.string, comment_count, good_rate), 'phone.csv')
更多文章、技術(shù)交流、商務(wù)合作、聯(lián)系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號(hào)聯(lián)系: 360901061
您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點(diǎn)擊下面給點(diǎn)支持吧,站長(zhǎng)非常感激您!手機(jī)微信長(zhǎng)按不能支付解決辦法:請(qǐng)將微信支付二維碼保存到相冊(cè),切換到微信,然后點(diǎn)擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對(duì)您有幫助就好】元
