script

作者: AliceWanderAI | 来源:发表于2019-03-01 16:52 被阅读0次

/usr/bin/python

from selenium import webdriver
import os
import xlwt
import xlrd
import xlutils

url = 'https:///zsb.bupt.edu.cn/'

url = 'https:///zsb.bupt.edu.cn/list/list.php?p=5_52_1'
def useful(list_pages):
useful_pages = []
for page in list_pages:
if page.isdigit():
if int(page)>1:
useful_pages.append(page)
return useful_pages

def save_table(filename,wbk):
sheet = wbk.add_sheet(filename,cell_overwrite_ok=True)
table_tr_list = browser.find_element_by_xpath('html/body/div/div/div/div/table/tbody').find_elements_by_tag_name('tr')
for r, tr in enumerate(table_tr_list, 1):
table_td_list = tr.find_elements_by_tag_name('td')
for c, td in enumerate(table_td_list):
sheet.write(r,c,td.text)

def get_all_items():
current_page=browser.find_elements_by_class_name('padlr20')
tmp = current_page[1].text.split('\n')
item_list = []
for i in tmp:
if i[-1].isdigit():
continue
else:
item_list.append(i)
return item_list

def get_current_page_table():
current_page = get_all_items()
for item in current_page:
get_item_table = browser.find_element_by_partial_link_text(item)
get_item_table.click()
save_table(item,wbk)
browser.back()

def get_useful_page():
page_tmp = browser.find_elements_by_class_name('page')
page_list = page_tmp[0].text.split(' ')
useful_page_list = useful(page_list)
return useful_page_list

'''
def is_year(list_year):
new_list = []
for item in list_year:
if item.isdigit():
new_list.append(item)
return new_list
'''

browser = webdriver.PhantomJS()

url = 'http:///baidu.com'

browser.get(url)
browser.implicitly_wait(3)

'''
zhaosheng = browser.find_element_by_link_text("招生信息")
zhaosheng.click()
luqufenshuxian = browser.find_element_by_partial_link_text('分数线')
luqufenshuxian.click()
'''

year_list = browser.find_elements_by_class_name('1qlist')

ylist = year_list[0].text

yl = is_year(ylist)

for year in yl:

for y in range(2018, 2019):
wbk = xlwt.Workbook(encoding='utf-8', style_compression=0)
year = str(y)+u'\u5e74'
if y < 2018:
go_to_year_page = browser.find_elements_by_partial_link_text(year)
go_to_year_page[0].click()
useful_page = get_useful_page()
get_current_page_table()
for page in useful_page:
go_to_page = browser.find_elements_by_link_text(page)
go_to_page[0].click()
get_current_page_table()
wbk.save(r'C:/Users/212683107/yiyi/worklog/w2018/December/4thweek/uni/bupt/'+year+'.xls')

相关文章

网友评论

      本文标题:script

      本文链接:https://www.haomeiwen.com/subject/xcxjuqtx.html