欧美三区_成人在线免费观看视频_欧美极品少妇xxxxⅹ免费视频_a级毛片免费播放_鲁一鲁中文字幕久久_亚洲一级特黄

Python爬取 工控行業(yè)系統(tǒng)漏洞

系統(tǒng) 1785 0

先貼連接,讓各位觀眾老爺看看,對(duì)不對(duì)你們的胃口

工控行業(yè)系統(tǒng)漏洞

Python爬取 工控行業(yè)系統(tǒng)漏洞_第1張圖片

可以看到,這個(gè)網(wǎng)頁是html靜態(tài)的,所以問題變的非常的簡(jiǎn)單

只需要用request請(qǐng)求網(wǎng)頁就可以了

話不多說,直接貼代碼

          
            import requests
from urllib.parse import urlencode
from lxml import etree
import pymysql
import time
import xlwt
import xlrd


def makeurl():
    # http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=0
    baseurl = 'http://ics.cnvd.org.cn/?'
    params = {
        'tdsourcetag': 's_pctim_aiomsg',
        'max': '20'
    }
    for page in range(MAX_PAGE):
        params['offset'] = page * 20
        url = baseurl + urlencode(params)
        print('url is ', url)
        yield url


def get_page_urllist(url):
    headers = {
        'Host': 'ics.cnvd.org.cn',
        'Referer': 'http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=40',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    return response.text


def parse_urllist(content):
    html = etree.HTML(content)
    for li in html.xpath('//tbody[@id="tr"]/tr'):
        yield li.xpath('td/a/@href')[0]


def get_page(url):
    headers = {
        'Host': 'www.cnvd.org.cn',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    return response.text


def parse_page(content, url):
    html = etree.HTML(content)
    item = {}
    item['url'] = url
    item['標(biāo)題'] = str(html.xpath('//div[@class="blkContainerSblk"]/h1/text()')[0])

    item['CNVD_ID'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="CNVD-ID"]/following-sibling::*[1]//text()')])
    item['公開日期'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="公開日期"]/following-sibling::*[1]//text()')])
    item['危害級(jí)別'] = ''.join([i.strip().replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '') for i in
                            html.xpath('//tbody/tr/td[text()="危害級(jí)別"]/following-sibling::*[1]//text()')])
    item['影響產(chǎn)品'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="影響產(chǎn)品"]/following-sibling::*[1]//text()')])
    try:
        item['BUGTRAQ_ID'] = ''.join(
            [i.strip() for i in html.xpath('//tbody/tr/td[text()="BUGTRAQ ID"]/following-sibling::*[1]//text()')])
    except:
        item['BUGTRAQ_ID'] = ''
    item['CVE_ID'] = ''.join(
        [i.strip() for i in
         html.xpath('//tbody/tr/td[text()="CVE ID"]/following-sibling::*[1]//text()')]) + ' ' + ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="CVE ID"]/following-sibling::*[1]//@href')])

    item['漏洞描述'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="漏洞描述"]/following-sibling::*[1]//text()')])
    item['漏洞類型'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="漏洞類型"]/following-sibling::*[1]//text()')])
    item['參考鏈接'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="參考鏈接"]/following-sibling::*[1]//text()')])
    item['漏洞解決方案'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="漏洞解決方案"]/following-sibling::*[1]//text()')])
    item['廠商補(bǔ)丁'] = ''.join(
        [i.strip() for i in html.xpath(
            '//tbody/tr/td[text()="廠商補(bǔ)丁"]/following-sibling::*[1]//text()')]) + ' http://www.cnvd.org.cn' + ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="廠商補(bǔ)丁"]/following-sibling::*[1]//@href')])
    item['驗(yàn)證信息'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="驗(yàn)證信息"]/following-sibling::*[1]//text()')])
    item['報(bào)送時(shí)間'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="報(bào)送時(shí)間"]/following-sibling::*[1]//text()')])
    item['收錄時(shí)間'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="收錄時(shí)間"]/following-sibling::*[1]//text()')])
    item['更新時(shí)間'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="更新時(shí)間"]/following-sibling::*[1]//text()')])
    item['漏洞附件'] = ''.join(
        [i.strip() for i in html.xpath('//tbody/tr/td[text()="漏洞附件"]/following-sibling::*[1]//text()')])

    return item


def save_data(index, item, workbook):
    sheet = workbook.get_sheet('sheet1')  # 創(chuàng)建一個(gè)sheet表格
    for col, value in enumerate(item.values()):
        sheet.write(index, col, value)
    workbook.save(filename)
    print('保存成功')


def excel_prepare(heads):
    workbook = xlwt.Workbook()
    sheet = workbook.add_sheet('sheet1', cell_overwrite_ok=True)  # 創(chuàng)建一個(gè)sheet表格
    for col, value in enumerate(heads):
        sheet.write(0, col, value)
    return workbook


def urlisexist(url, urlset):
    if url in urlset:
        return True
    else:
        return False


def getallurl(filename):
    workbook = xlrd.open_workbook(filename)
    sheet1 = workbook.sheet_by_name('sheet1')
    results = sheet1.col_values(0, 1)
    return results


def read_old(filename):
    workbook = xlrd.open_workbook(filename)
    sheet1 = workbook.sheet_by_name('sheet1')
    alloldset = []
    for index in range(sheet1.nrows):
        alloldset.append(sheet1.row_values(index))
    return alloldset, sheet1.nrows


def save_old(index, olditem):
    sheet = workbook.get_sheet('sheet1')  # 創(chuàng)建一個(gè)sheet表格
    for col, value in enumerate(olditem):
        sheet.write(index, col, value)
    workbook.save(filename)


if __name__ == '__main__':
    # http://ics.cnvd.org.cn/?tdsourcetag=s_pctim_aiomsg&max=20&offset=0

    # 睡眠時(shí)間
    TIMESLEEP = 0

    filename = '工程控制系統(tǒng)漏洞.xls'

    MAX_PAGE = 96



    heads = ['url',
             '標(biāo)題',
             'CNVD_ID',
             '公開日期',
             '危害級(jí)別',
             '影響產(chǎn)品',
             'BUGTRAQ_ID',
             'CVE_ID',
             '漏洞描述',
             '漏洞類型',
             '參考鏈接',
             '漏洞解決方案',
             '廠商補(bǔ)丁',
             '驗(yàn)證信息',
             '報(bào)送時(shí)間',
             '收錄時(shí)間',
             '更新時(shí)間',
             '漏洞附件']

    try:
        alloldset, length = read_old(filename)
    except:
        alloldset = []
        length = 1

    workbook = excel_prepare(heads)

    for index, olditem in enumerate(alloldset):
        save_old(index, olditem)

    try:
        urlset = getallurl(filename)
    except:
        urlset = []

    index = length
    for urlofpage in makeurl():
        pagelistcontent = get_page_urllist(urlofpage)
        for url in parse_urllist(pagelistcontent):
            print('url is >>>', url)
            if not urlisexist(url, urlset):
                time.sleep(TIMESLEEP)
                result = get_page(url)
                item = parse_page(result, url)
                print('item is >>>', item)
                save_data(index, item, workbook)
                index = index + 1

    workbook.save(filename)

          
        

不懂的地方,下方評(píng)論提問


更多文章、技術(shù)交流、商務(wù)合作、聯(lián)系博主

微信掃碼或搜索:z360901061

微信掃一掃加我為好友

QQ號(hào)聯(lián)系: 360901061

您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點(diǎn)擊下面給點(diǎn)支持吧,站長非常感激您!手機(jī)微信長按不能支付解決辦法:請(qǐng)將微信支付二維碼保存到相冊(cè),切換到微信,然后點(diǎn)擊微信右上角掃一掃功能,選擇支付二維碼完成支付。

【本文對(duì)您有幫助就好】

您的支持是博主寫作最大的動(dòng)力,如果您喜歡我的文章,感覺我的文章對(duì)您有幫助,請(qǐng)用微信掃描上面二維碼支持博主2元、5元、10元、自定義金額等您想捐的金額吧,站長會(huì)非常 感謝您的哦!!!

發(fā)表我的評(píng)論
最新評(píng)論 總共0條評(píng)論
主站蜘蛛池模板: 国产精品一卡二卡三卡 | 2017av伦理片 | 天天曰天天射 | 国产美女久久 | 色吊丝欧美 | 国产91影院| 国产福利在线视频 | 嫩草影院ncyy在线观看 | 99成人| 欧美性色黄大片www 成人免费播放视频777777 | 久久久九九精品国产毛片A片 | 久久99国产亚洲精品观看 | 国产 日韩 欧美 高清 | 日日夜夜天天 | www97影院| 天天操,夜夜操 | 日日操夜夜操天天操 | 精品一卡2卡三卡4卡免费视频 | 国产精品免费观看 | 欧美激情精品久久久久 | 国产成人综合网 | 97av在线| 韩漫重考生漫画画免费读漫画下拉式土豪漫 | 99精品一区二区免费视频 | 五月激情六月 | 亚洲另类天天更新影院在线观看 | 国产欧美亚洲精品a | 欧美福利视频一区二区三区 | 久久99综合国产精品亚洲首页 | 一区二区不卡在线观看 | 日韩精品视频美在线精品视频 | 成年人黄网站 | 成人免费体验区福利云点播 | 国产精品91视频 | 国产精品爱久久久久久久 | 成人国产精品免费视频 | 亚洲av毛片成人精品 | 一级黄色大毛片 | 日韩美女中文字幕 | 99动漫| 中文字幕一区二区三区四区不卡 |