美文网首页
2018-07-11

2018-07-11

作者: 加勒比海带_4bbc | 来源:发表于2018-07-11 17:01 被阅读0次

    coding=utf-8

    import requests

    from bs4 import BeautifulSoup

    import re

    import pymongo

    import json

    import pandas as pd

    import numpy as np

    import xlrd

    import datetime

    from pyecharts import Line,Grid,EffectScatter,Overlap
    from pyecharts.echarts.axis import YAxisLabel

    def onedayPriceSpider(spiderDay):

    for m in range(1,4):
    url = "http://www.hfzgncp.com.cn/index.php?m=content&c=index&a=lists&catid=59&sendtime="+str(spiderDay)+"&page="+m
    
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0)' +\
    
              'Gecko/20100101 Firefox/61.0'}
    
    r = requests.get(url,headers=headers)
    
    html = r.text.encode(encoding='utf_8').decode()
    
    soup = BeautifulSoup(html,"lxml")
    
    table = soup.find('table', attrs={'class' :{'h_list_table r_list_table'}})
    
    #print(table)
    
    #查看表格数据行数
    
    height = len(table.findAll(lambda tag:tag.name=='tr' and
    
                              len(tag.findAll('td'))>=1))
    if height <=0:
        return
    
    print('height:'+str(height))
    
    #收集表头
    
    columns = [x.text for x in table.tr.findAll('th')]
    
    columns = [x.replace('\xa0','') for x in columns]
    
    #print(columns)
    
    #构造dataframe 准备存储表格
    
    width = len(columns)  #表头列数
    
    #print('width:'+str(width))
    
    df = pd.DataFrame(data = np.full((height,width),'',dtype = 'U'),columns = columns)
    
    rows = [row for row in table.findAll('tr') if row.find('td')!=None]
    
    #逐行分析表格
    
    for i in range(len(rows)):
    
        cells = rows[i].findAll('td')
    
        if len(cells) == width:
    
            df.iloc[i] = [cell.text.replace(' ','').replace('\n','') for cell in cells]  #去点空格和换行
    
        else:
    
            w=len(cells)
    
            df.iloc[i,width-w:] = [cell.text.replace(' ','').replace('\n','') for cell in cells]
    
    #print(df)
    
    sendtime = soup.find('input', attrs={'id' :{'sendtime'}})['value'].rstrip('/-') #获取数据时间
    
    sendtimeStr=re.sub("\-","",sendtime)
    
    #print(sendtimeStr)
    
    outputfilePath="D:/xlsx/"+sendtimeStr+".xlsx"
    
    df.to_excel(outputfilePath)
    
    xlsx_data=xlrd.open_workbook(outputfilePath)
    
    xlsx_table=xlsx_data.sheet_by_name(u'Sheet1')
    
    columnIndex = None
    
    rowIndex = None 
    
    
    
    for j in range(xlsx_table.ncols):       
    
        for i in range(height):
    
            if(xlsx_table.cell_value(i, j) == '平均价'):
    
                columnIndex = j
    
                break       
    
            if(xlsx_table.cell_value(i, j) == '草鱼'):
    
                rowIndex = i
    
                break
    
    print(xlsx_table.cell_value(rowIndex, columnIndex))
    
    
    
    
    
    price_data.append(xlsx_table.cell_value(rowIndex, columnIndex))
    price_date.append(sendtime)
    #client= pymongo.MongoClient()
    
    #获取一个数据库
    
    #db=client.priceSpider
    
    #创建 或获取一个集合,并在collection下新建books
    
    #account=db.prcie
    
    #data=xlrd.open_workbook("D:/"+sendtimeStr+".xlsx")
    
    #table=data.sheets()[0]
    
    #读取excel第一行数据作为存入mongodb的字段名
    
    #rowstag=table.row_values(0)
    
    #nrows=table.nrows
    
    #print('-------------nrows----------------'+str(nrows))
    
    #ncols=table.ncols #print rows
    
    #returnData={}
    
    #for i in range(1,nrows):
    
        #将字段名和excel数据存储为字典形式,并转换为json格式
    
        #returnData[i]=json.dumps(dict(zip(rowstag,table.row_values(i))))
    
        #通过编解码还原数据
    
        #returnData[i]=json.loads(returnData[i])
    
        #print returnData[i]
    
        #account.insert(returnData[i])
    
        #return daySpider
    

    def getColumnIndex(table, columnName):

    columnIndex = None   
    
    for i in range(table.ncols):       
    
        if(table.cell_value(0, i) == columnName):
    
            columnIndex = i
    
        break
    
    return columnIndex
    

    def getPriceLine(price_date,price_data):
    attr = price_date
    v1 = price_data
    line = Line('价格走势图')
    yAxis: {
    type: 'value',
    min: 400,
    max: 700
    }
    line.add('鲫鱼',attr,v1,is_smooth = True,mark_point = ['max','min'],mark_line=["average"],yaxis_formatter="元")
    grid =Grid()
    grid.add(line,grid_top="10%")

    grid.render('./鲫鱼.html')
    #es = EffectScatter()
    #es.add('',attr,v1,effect_scale=8)   #闪烁
    #overlop = Overlap()
    #overlop.add(line)                   #必须先添加line,在添加es
    #overlop.add(es)
    #overlop.render('./line-es01.html')
    

    if name=="main":
    price_data = []
    price_date = []
    spiderDaynumber =10

    for i in range(spiderDaynumber):
    
        spiderDay = datetime.date.today()- datetime.timedelta(days=i+1)   
    
        print(spiderDay)
    
        onedayPriceSpider(spiderDay)
    print(price_data)
    print(price_date)
    getPriceLine(price_date,price_data)
    #xlsfile= 'F:/test_AutoTesting/TestCase/RunList.xlsx'
    
    #  table = readExcelDataByName(xlsfile, 'Sheet1')[0]
    
        #获取第一行的值   
    
      #  testcase_id = table.cell_value(1, getColumnIndex(table,'平均价'))

    相关文章

      网友评论

          本文标题:2018-07-11

          本文链接:https://www.haomeiwen.com/subject/crahpftx.html