使用beautifulsoup 爬取小說,并整合到txt中。
"""
======================
@Auther:CacheYu
@Time:2019/9/16:16:09
======================
"""
# -*- coding:utf-8 -*-
import urllib.request
import urllib.error
import bs4
from bs4 import BeautifulSoup
def readdown(url):
soup = BeautifulSoup(urllib.request.urlopen(url), 'html.parser')
fixed_html = soup.prettify()
table = soup.find('table', attrs={'id': 'tabletxt'})
# # if isinstance(table, bs4.element.Tag):
# tds = table.find_all('td')
i = table.find('i').string
print(i)
div = table.find_all('div', attrs={'class': 'txt'})
content = div[0].get_text().strip()
couple = i + '\n' + content
return couple
page_url = 'https://www.dushiyanqing.net/book/90/90659/index.html'
book = r'E:\story\誰把風聲聽成離別歌.txt'
soup = BeautifulSoup(urllib.request.urlopen(page_url), 'html.parser')
fixed_html = soup.prettify()
table = soup.find('table')
if isinstance(table, bs4.element.Tag):
tds = table.find_all('td', attrs={'class': 'k4'})
default_encode = 'utf-8'
print('開始寫入,請稍等……')
with open(book, 'r+', encoding=default_encode) as target_file_writer:
for td in tds:
a = td.find('a')
if a is not None:
+ a.get('href')
# print(href)
target_file_writer.write(readdown(href))
# time.sleep(random.randint(5, 10))
print('已完成!\n目錄地址為:', book)
?
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
