!/usr/bin/python
_ coding:utf8 _
import requests
from lxml import etree
def get_html(url):
dict={}
f1_error=open('baid_iperror.txt','a+')
find_url = u'https://www.baidu.com/baidu?wd=' + url + '&tn=monline_dg&ie=utf-8'
headersParameters = { # 发送HTTP请求时的HEAD信息,用于伪装为浏览器
'Connection': 'Keep-Alive',
'Accept': 'text/html, application/xhtml+xml, /',
'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3',
'Accept-Encoding': 'gzip, deflate',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'
}
try:
r = requests.get(find_url, timeout=5, headers=headersParameters)
page = etree.HTML(r.text)
links = page.xpath('//div[@class="c-row"]')
dict[url]=links[0].xpath('string(.)').split(url)[1]
except Exception,e:
f1_error.write(url)
return dict
print type(get_html('45.124.126.72'))
for i in get_html('45.124.126.72'):
print i
print get_html('45.124.126.72').items()[0][1]
网友评论