2018-07-11晚上改的

作者: 加勒比海带_4bbc | 来源:发表于2018-07-11 00:14 被阅读0次

2018-07-11晚上改的
2018-07-11
2018-07-11
亲子打卡20180712
2020-11-02
react高阶组件
这几道清凉食谱，挽救你炎夏低迷的食欲
(码友推荐)2018-07-11 .NET及相关开发资讯速递
2018-07-11
忙

# coding=utf-8

import requests

from bs4 import BeautifulSoup

import re

import pymongo

import json

import pandas as pd

import numpy as np

import xlrd

import datetime

def onedayPriceSpider(spiderDay):

url = "http://www.hfzgncp.com.cn/index.php?m=content&c=index&a=lists&catid=59&sendtime="+str(spiderDay)+"&page=1"

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0)' +\

'Gecko/20100101 Firefox/61.0'}

r = requests.get(url,headers=headers)

html = r.text.encode(encoding='utf_8').decode()

soup = BeautifulSoup(html,"lxml")

table = soup.find('table', attrs={'class' :{'h_list_table r_list_table'}})

#print(table)

#查看表格数据行数

height = len(table.findAll(lambda tag:tag.name=='tr' and

len(tag.findAll('td'))>=1))

print('height:'+str(height))

#收集表头

columns = [x.text for x in table.tr.findAll('th')]

columns = [x.replace('\xa0','') for x in columns]

#print(columns)

#构造dataframe 准备存储表格

width = len(columns) #表头列数

#print('width:'+str(width))

df = pd.DataFrame(data = np.full((height,width),'',dtype = 'U'),columns = columns)

rows = [row for row in table.findAll('tr') if row.find('td')!=None]

#逐行分析表格

for i in range(len(rows)):

cells = rows[i].findAll('td')

if len(cells) == width:

df.iloc[i] = [cell.text.replace(' ','').replace('\n','') for cell in cells] #去点空格和换行

else:

w=len(cells)

df.iloc[i,width-w:] = [cell.text.replace(' ','').replace('\n','') for cell in cells]

#print(df)

sendtime = soup.find('input', attrs={'id' :{'sendtime'}})['value'].rstrip('/-') #获取数据时间

sendtimeStr=re.sub("\-","",sendtime)

#print(sendtimeStr)

outputfilePath="D:/"+sendtimeStr+".xlsx"

df.to_excel(outputfilePath)

xlsx_data=xlrd.open_workbook(outputfilePath)

xlsx_table=xlsx_data.sheet_by_name(u'Sheet1')

columnIndex = None

rowIndex = None

for j in range(xlsx_table.ncols):

for i in range(height):

if(xlsx_table.cell_value(i, j) == '平均价'):

columnIndex = j

break

if(xlsx_table.cell_value(i, j) == '鲫鱼'):

rowIndex = i

break

print(xlsx_table.cell_value(rowIndex, columnIndex))

#price_data = []

#data.append(sheet.row_values(i))

#client= pymongo.MongoClient()

#获取一个数据库

#db=client.priceSpider

#创建或获取一个集合，并在collection下新建books

#account=db.prcie

#data=xlrd.open_workbook("D:/"+sendtimeStr+".xlsx")

#table=data.sheets()[0]

#读取excel第一行数据作为存入mongodb的字段名

#rowstag=table.row_values(0)

#nrows=table.nrows

#print('-------------nrows----------------'+str(nrows))

#ncols=table.ncols #print rows

#returnData={}

#for i in range(1,nrows):

#将字段名和excel数据存储为字典形式，并转换为json格式

#returnData[i]=json.dumps(dict(zip(rowstag,table.row_values(i))))

#通过编解码还原数据

#returnData[i]=json.loads(returnData[i])

#print returnData[i]

#account.insert(returnData[i])

#return daySpider

def getColumnIndex(table, columnName):

columnIndex = None

for i in range(table.ncols):

if(table.cell_value(0, i) == columnName):

columnIndex = i

break

return columnIndex

spiderDaynumber =1

for i in range(spiderDaynumber):

spiderDay = datetime.date.today()- datetime.timedelta(days=i+1)

print(spiderDay)

onedayPriceSpider(spiderDay)

#xlsfile= 'F:/test_AutoTesting/TestCase/RunList.xlsx'

# table = readExcelDataByName(xlsfile, 'Sheet1')[0]

#获取第一行的值

# testcase_id = table.cell_value(1, getColumnIndex(table,'平均价'))

网友评论

本文标题：2018-07-11晚上改的

本文链接：https://www.haomeiwen.com/subject/fircpftx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

2018-07-11晚上改的

相关文章