from lxml import etree
file=open(r'C:\Users\CY\Desktop\xpath.html','r',encoding='utf-8')
html=file.read()
file.close()
selector=etree.HTML(html)
div1=selector.xpath('//div/text()')[0].strip()
div2=selector.xpath('//div/text()')[3].strip()
print(div1,div2)
# 二
ul1=selector.xpath('//ul/text()')[0].strip()
ul2=selector.xpath('//ul/text()')[6].strip()
ul3=selector.xpath('//ul/text()')[8].strip()
print(ul1,ul2,ul3)
# 三
infos=selector.xpath('//div[@class="works"][1]/ul[@class="title"][1]/li[position()<4]/a')
for info in infos:
a_text=info.xpath('text()')[0]
print(a_text)
a_href=info.xpath('@href')[0]
print(a_href)
# 四
import requests
from lxml import etree
url='http://www.ygdy8.com/'
# headers={
# 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
# 'Accept-Encoding':'gzip, deflate',
# 'Accept-Language':'zh-CN,zh;q=0.8',
# 'Cache-Control':'max-age=0',
# 'Connection':'keep-alive',
# 'Cookie':'UM_distinctid=15e2dd3ecc65f3-0c903571f10e39-3f63450e-c0000-15e2dd3ecc931b; CNZZDATA5783118=cnzz_eid%3D1767809342-1504003387-null%26ntime%3D1504003387; 37cs_pidx=3; 37cs_user=37cs34049750185; 37cs_show=69; cscpvrich4016_fidx=3',
# 'Host':'www.ygdy8.com',
# 'If-Modified-Since':'Tue, 29 Aug 2017 11:28:52 GMT',
# 'If-None-Match':"0729ffdb920d31:530",
# 'Referer':'https://www.baidu.com/link?url=LnqhMmr1cmk2pGtcI_DH5DRcsprQywMbkFMe2ww7aB_&wd=&eqid=f8a4251600020d420000000359a54b03',
# 'Upgrade-Insecure-Requests':'1',
# 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
# }
req = requests.get(url)
req.encoding='gb2312'
html=req.text
selector=etree.HTML(html)
infos=selector.xpath('//div[@class="contain"][1]/ul/li/a')
for info in infos:
a_text=info.xpath('text()')
a_href=info.xpath('@href')
print(a_text[0],a_href[0])
网友评论