@async_generator
async def fetch(url, row, loop):
async with ClientSession() as ses:
if choice_:
pro = await get_proxy(loop)
else:
pro = None
async with ses.get(url, headers=HEADERS, proxy=pro) as res:
with closing(res) as resp:
text = await resp.text()
response = Selector(text)
tr_list = response.xpath(row[xquery.c.table_tr_query])
for tr in tr_list:
ip = tr.xpath(
row[xquery.c.extract_ip_query]).extract_first()
port = tr.xpath(
row[xquery.c.extract_port_query]).extract_first()
http_scheme = tr.xpath(
row[xquery.c.extract_type_query]).extract_first()
timeout = tr.xpath(
row[xquery.c.extract_timeout_query]).extract_first()
validate_time = tr.xpath(
row[xquery.c.extract_validate_query]).extract_first()
if ip:
await yield_({'ip': ip, 'port': port, 'http_scheme': http_scheme, 'timeout': timeout.replace('秒', ''),
'validate_time': validate_time})
HEADERS['Referer'] = url
if res.status > 400:
logging.error(f'{url}:{res.status}')
await yield_from_(fetch(url, row, loop))
网友评论