http://www.hotstar.com/movies/languages/kannada/ 这个页面一开始只显示一部分图片链接,只有当鼠标不断向下滚动,或者一直按键盘的DOWN按键的时候,才不断加载新的图片出来。如果想要获取更多加载的链接,则需要模拟鼠标滚动或者模拟按键。直接上代码:
# coding=utf8
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
from lxml import etree
url = "http://www.hotstar.com/movies/languages/kannada/"
driver = webdriver.PhantomJS(executable_path=r"phantomjs")
# driver = webdriver.Firefox
driver.get(url)
time.sleep(5)
link_len_list = []
while True:
driver.execute_script('window.scrollTo(0,1000000)')
time.sleep(3)
html = driver.page_source
html = etree.HTML(html.encode("utf-8", 'ignore'))
items = html.xpath("//div[contains(@class, 'rec-received')]/div/hs-cards-directive/article/a/@href")
# print(title, '----------------2------------------')
for item in items:
print(item)
link_len = len(items)
print(link_len)
link_len_list.append(link_len)
if len(link_len_list) > 1:
print(link_len_list[-1])
print(link_len_list[-2])
print('----------------')
if link_len_list[-1] == link_len_list[-2]:
print('渲染完成')
break
driver.quit()
网友评论