from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import re
from xlrd import open_workbook
from openpyxl import load_workbook
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
def q1(file_name,target_file):
desired_capabilities = DesiredCapabilities.CHROME # 懒加载模式
desired_capabilities["pageLoadStrategy"] = "none"
chrome_options = webdriver.ChromeOptions() # 无头模式
chrome_options.add_argument("--headless")
driver = Chrome(desired_capabilities=desired_capabilities,chrome_options=chrome_options)
# driver = PhantomJS(desired_capabilities=desired_capabilities)
wait = WebDriverWait(driver, timeout=5)
data = open_workbook(file_name) #读取文件
table = data.sheet_by_name(data.sheet_names()[2]) #索引为2的页
all_heng = table.nrows #行数
print('大类:',table.cell(1, 3).value,'行数:',all_heng)
for i in range(1, all_heng):
FEATURE_initial = table.cell(i, 5).value
if '&&' not in FEATURE_initial and 'http.host matches' in FEATURE_initial:
FEATURE_ls = re.findall('http.host matches \"(.*?)\"',FEATURE_initial)
for FEATURE_l in FEATURE_ls:
if ':' in FEATURE_l:
FEATURE_l=re.findall('(.*?):', FEATURE_l)[0] # 去掉端口信息
if FEATURE_l[0] == '.': # .开头的
FEATURE='http://www'+FEATURE_l
elif FEATURE_l != '' and '{@0}' not in FEATURE_l: # 只加http://的
FEATURE='http://'+FEATURE_l
elif '{@0}' in FEATURE_l: # {@0}开头的
FEATURE = 'http://' + FEATURE_l[4:]
app_name = table.cell(i, 1).value
app_class = table.cell(i, 3).value
t1=time.time()
try:
print(FEATURE)
driver.get(FEATURE)
wait.until(EC.presence_of_element_located((By.XPATH, "//title")))
driver.execute_script("window.stop();")
except:
print()
t2 = time.time() - t1
try:
title = driver.title
except:
print()
print(i,t2,FEATURE_initial, FEATURE, app_name,app_class, title)
xlsx = load_workbook(target_file)
Sheet1 = xlsx.active
Sheet1.cell(i, 1, FEATURE_initial)
Sheet1.cell(i, 2, FEATURE)
Sheet1.cell(i, 3, app_name)
Sheet1.cell(i, 4, str(app_class))
Sheet1.cell(i, 5, str(title))
xlsx.save(target_file)
if __name__ == '__main__':
task_file = './任务.xlsx'
target_file = './mb.xlsx'
q1(task_file,target_file)
网友评论