代码:
import re
import pymysql
from pyquery import PyQuery as pq
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
option = Options()
# option.add_argument("--headless")
option.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
driver = webdriver.Chrome(chrome_options=option)
wait = WebDriverWait(driver,10)
def search():
try:
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,"#q"))
)
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR,"#J_SearchForm > button"))
)
input.clear()
input.send_keys('美食')
submit.click()
total = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,"#mainsrp-pager > div > div > div > div.total"))
)
get_product()
return total.text
except TimeoutException:
return search()
def next_page(page_n):
print("翻页")
try:
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > div.form > input"))
)
submit = wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit"))
)
input.clear()
input.send_keys(page_n)
submit.click()
wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,"#mainsrp-pager > div > div > div > ul > li.item.active > span"),str(page_n)))
get_product()
except TimeoutException:
next_page(page_n)
def main():
total = search()
total=int(re.compile('(\d+)').search(total).group(1))
for i in range(2,total+1):
next_page(i)
def get_product():
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"#mainsrp-itemlist .items .item")))
html = driver.page_source
doc = pq(html)
items = doc("#mainsrp-itemlist .items .item").items()
print("开始爬取")
for item in items:
# product = {
image=item.find('.pic .img').attr("src")
price=item.find('.price').text()
deal=item.find('.deal-cnt').text()
title=item.find('.title').text()
shop=item.find('.shopname').text()
location=item.find('.location').text()
# }
coon = pymysql.connect(host='qxx2094.cn', user='root', password='qxxmysql', database="test",port=3306)
cursor = coon.cursor()
sql ="""
INSERT INTO sj(image,price,deal,title,shop,location)
VALUES (%s,%s,%s,%s,%s,%s)
"""
try:
cursor.execute(sql,(image,price,deal,title,shop,location))
coon.commit()
print("插入成功")
except:
coon.rollback()
coon.close()
print("插入失败")
if __name__ == '__main__':
main()
网友评论