本文實例為大家分享了python創建單詞詞庫的具體代碼,供大家參考,具體內容如下
基本思路:以COCA兩萬單詞表為基礎,用python爬取金山詞霸的單詞詞性,詞義,音頻分別存入sqllite。背單詞的時候根據需要自定義數據的選擇方式。
效果如下:
代碼寫的比較隨意,還請見諒。
創建數據庫
完整代碼 ,效率不高,不過夠用了
import requests
from bs4 import BeautifulSoup
import re
import traceback
import sqlite3
import time
import sys
def ycl(word):
try:
url = "http://www.iciba.com/{}".format(word)
headers = { 'Host': 'www.iciba.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'http://www.baidu.com', 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', }
response = requests.get(url = url,headers = headers)
soup = BeautifulSoup(response.text,"lxml")
#輸出單詞詞性
cx = soup.find(class_='base-list switch_part')(class_='prop')
#輸出詞性詞義
mp3 = soup.find_all(class_='new-speak-step')[1]
pattern = re.compile(r'http://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+.mp3')
mp3url = re.findall(pattern,mp3['ms-on-mouseover'])
mp3url = '.'.join(mp3url)
r = requests.get(mp3url)
#單詞音頻輸出路徑
dress = "E:\\sound\\"
mp3path = dress +word+".mp3"
with open(mp3path, 'wb') as f:
f.write(r.content)
#獲取詞性個數
meanings =soup.find_all(class_='prop')
#實行每個詞性的詞義同行輸出
for i in range(len(meanings)):
s = soup.find(class_='base-list switch_part')('li')[i]('span')
c = cx[i].text
a = ''
for x in range(len(s)):
b = s[x].text
a = a + b
print(word)
print(c)
print(a)
# 存入數據庫的方法
conn = sqlite3.connect("word.db")
cu = conn.cursor()
sql =cu.execute("INSERT INTO test (id,dc,cx,cy,mp3)VALUES(NULL,'%s','%s','%s','%s');"%(word,c,a,mp3path))
print(sql)
conn.commit()
print('\n')
except Exception as e:
print(e)
print("error")
with open("log.txt",'a') as f:
f.write(word+'\n')
def duqudanci(file):
wordcount = 0
for line in open(file):
word = line.strip('\n')
wordcount += 1
print(wordcount)
ycl(word)
if __name__ == '__main__':
conn = sqlite3.connect("word.db")
cu = conn.cursor()
word = ""
#需要爬取的單詞
duqudanci(sys.argv[1])
print('下載完成')
conn.commit()
conn.close()
自定義背單詞: 根據需要可以將單詞放入txt文件中進行測試,可以輸出詞義拼寫單詞,也可以輸出單詞,選擇對應釋義。 當然還可以給每個單詞詞義加兩個屬性值,分別表示學習次數和答錯次數,然后可以根據這兩個值來選擇單詞,如果有興趣的話,可以嘗試一下。
import sqlite3
import random
import sys
from playsound import playsound
# 中譯英
def CtoE():
for j in list1:
sql =cu.execute('select id,dc,cx,cy,mp3 from wordinfo where id = ?',(j,))
for it in sql:
# 返回的是元組,直接對元組查詢
c=0
while c<3:
print("當前單詞ID = "+str(it[0]))
print("釋義:"+it[3])
# 播放音頻
playsound(it[4])
a = input("請拼寫單詞,共有三次機會:")
if a == it[1]:
print("拼寫正確")
break;
c += 1
print('第%d次拼寫錯誤'%c)
print('\n')
print("下一個")
print('\n')
# 英譯中
def EtoC():
for j in list1:
sql =cu.execute('select id,dc,cx,cy,mp3 from wordinfo where id = ?',(j,))
d =0
for it in sql:
# 返回的是元組,直接對元組查詢
c=0
while c<3:
# 釋放list2
list2 = []
sql =cu.execute('select cy from wordinfo where id !=? order by random() limit 3',(j,))
for t in sql:
for o in range(len(t)):
#將隨機取出的數據放入列表
list2.append(t[o])
# 加入正確答案
p = random.randint(0,3)
list2.insert(p,it[3])
print("當前單詞ID = "+str(it[0]))
print("選擇單詞的對應釋義:----"+it[1])
playsound(it[4])
dict1 = {'A':list2[0],'B':list2[1],'C':list2[2],'D':list2[3]}
print("A:"+dict1.get('A')+'\n')
print("B:"+dict1.get('B')+'\n')
print("C:"+dict1.get('C')+'\n')
print("D:"+dict1.get('D')+'\n')
answer1 = input("請選擇,共有三次機會(大寫):")
if dict1.get(answer1)== it[3]:
print("正確")
break;
c += 1
print('第%d次拼寫錯誤'%c)
d += 1
print('\n')
print("下一個")
print('\n')
def main(file):
for line in open(file):
word = line.strip('\n')
sql =cu.execute('select id from wordinfo where dc = ?',(word,))
for x in sql:
list1.append(x[0])
cho = input("英譯中請選1,中譯英請選2:")
if cho =="1":
EtoC()
elif cho =="2":
CtoE()
else:
print("錯誤,請重試")
if __name__ == '__main__':
conn = sqlite3.connect("word.db")
cu = conn.cursor()
list1 = []
word = ""
main(sys.argv[1])
conn.commit()
conn.close()
以上就是本文的全部內容,希望對大家的學習有所幫助,也希望大家多多支持腳本之家。
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061
微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元

