美文网首页上海Python
LXML爬虫——当日开放式基金净值

LXML爬虫——当日开放式基金净值

作者: 徐_c90e | 来源:发表于2019-05-29 11:10 被阅读0次

    #查询排行榜,获得基金的名称、代号、单位净值

    import requests

    from lxml import etree

    import xlwt

    # 基金名称+基金代码

    symbol_url_0 =  'http://stock1.sina.cn/dpool/stock_new/v2/ofdata_center.php?yw_type=jzpaihang&of_type=open&ph=symbol &page={}&vt=4'

    # 基金名称+单位净值

    per_nav_url_0 = 'http://stock1.sina.cn/dpool/stock_new/v2/ofdata_center.php?yw_type=jzpaihang&of_type=open&ph=per_nav&page={}&vt=4'

    symbol_url=symbol_url_0.format('1')

    res = requests.get(symbol_url)

    res.encoding = 'utf-8'

    html = etree.HTML(res.text.encode('utf-8'))

    pagecount=int(html.xpath('/html/body/div[1]/div/div[3]/form/div/text()[3]')[0].split('/')[1].split('页')[0])

    #print('总共',pagecount,'页')

    #print(html)

    name_list=[]

    symbol_list=[]

    per_nav_list=[]

    print('*********获得基金名称*********')

    #获得基金名称+基金代码

    for page in range(1,pagecount + 1):

        print('第'+str(page)+'页 共'+str(pagecount)+'页')

        symbol_url = symbol_url_0.format(str(page))

        res = requests.get(symbol_url)

        res.encoding = 'utf-8'

        html = etree.HTML(res.text.encode('utf-8'))

        for i in range(1,21):

            try:

                name=  html.xpath('/html/body/div[1]/div/div[3]/a[{}]/text()'.format(i+5))[0].strip()

                symbol= html.xpath('/html/body/div[1]/div/div[3]/text()[{}]'.format(i*2+6))[0].strip()

            except:

                continue

            if symbol.isalnum():

                name_list.append(name)

                symbol_list.append(symbol)

                per_nav_list.append(0.000)

                #print(name , symbol)

            else:

                break

    print('*********获得基金净值*********')

    #获得基金净值

    j = 0

    for page in range(1,pagecount + 1):

        print('第'+str(page)+'页 共'+str(pagecount)+'页')

        per_nav_url = per_nav_url_0.format(str(page))

        res = requests.get(per_nav_url)

        res.encoding = 'utf-8'

        html = etree.HTML(res.text.encode('utf-8'))

        for i in range(1,21):

            try:

                name = html.xpath('/html/body/div[1]/div/div[3]/a[{}]/text()'.format(i+5))[0].strip()

                per_nav = html.xpath('/html/body/div[1]/div/div[3]/text()[{}]'.format(i*2+5))[0].strip()

            except:

                continue

            if not per_nav.replace('.', '').isdecimal(): #本次查到的金额异常,则放弃

                continue

            try:

                j = name_list.index(name)

            except:

                continue #本次查到的基金名称在列表中不存在,则放弃

            print(name, per_nav, '=>',j,name_list[j])

            per_nav_list[j] = per_nav

    print('*********输出全部数据*********')

    book = xlwt.Workbook(encoding='utf-8')

    sheet = book.add_sheet('当日基金净值表')

    for k in range(1,len(name_list) + 1):

        print(symbol_list[k - 1], name_list[k - 1], per_nav_list[k - 1])

        sheet.write(k - 1, 0, symbol_list[k - 1])

        sheet.write(k - 1, 1, name_list[k - 1])

        sheet.write(k - 1, 2, per_nav_list[k - 1])

    book.save('D:\当日开放式基金净值表.xls')

    相关文章

      网友评论

        本文标题:LXML爬虫——当日开放式基金净值

        本文链接:https://www.haomeiwen.com/subject/opuetctx.html