美文网首页
king of hero

king of hero

作者: 等下流民 | 来源:发表于2019-11-27 18:11 被阅读0次
    
    import time
    import random
    import re
    
    import requests
    
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from bs4 import BeautifulSoup
    
    BASE_URL = "https://www.wanplus.com"
    EVENT_URL = "%s/event/839.html" % BASE_URL
    
    
    def find_schedule_pages():
        text = requests.get(EVENT_URL).text
        hrefs = re.findall("/schedule/\d+.html", text)
        return hrefs
    
    
    def chrome_get_texts(url, min_wait_seconds=1):
        print("chrome_get_texts %s" % url)
        texts = []
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--no-sandbox')  # 解决DevToolsActivePort文件不存在的报错
        driver = webdriver.Chrome(options=chrome_options)
        driver.get(url)
        time.sleep(min_wait_seconds + min_wait_seconds * random.random())
    
        for i in range(1, 10):
            try:
                driver.find_element_by_xpath('//*[@id="info"]/div[2]/div[3]/ul/li[%d]' % i).click()
                time.sleep(min_wait_seconds + min_wait_seconds * random.random())
                texts.append(driver.page_source)
            except:
                break
        return texts
    
    
    def get_kings_infos(text):
        soup = BeautifulSoup(text, 'lxml')
    
    home_team = soup.find("div", attrs={"class", "led_left"}).text.strip()
    game_result = soup.find("div", attrs={"class", "las_midd"}).text.strip()
    away_team = soup.find("div", attrs={"class", "sna_right"}).text.strip()
    
    if home_team in game_result:
        game_result = home_team
    elif away_team in game_result:
        game_result = away_team
    
    home_bans = [img['alt'] for img in soup.find("div", attrs={"class", "led_left2"}).find_all('img')]
    away_bans = [img['alt'] for img in soup.find("div", attrs={"class", "sna_right2"}).find_all('img')]
    home_kings = [img['alt'] for img in soup.find("div", attrs={"class", "led_left1"}).find_all('img')]
    away_kings = [img['alt'] for img in soup.find("div", attrs={"class", "sna_right1"}).find_all('img')]
    return home_team, away_team, game_result, home_bans, away_bans, home_kings, away_kings
    
    def main():
        with open('2019kings.csv', 'wb') as f:
            for url in find_schedule_pages():
                schedule_url = "%s%s" % (BASE_URL, url)
                texts = chrome_get_texts(schedule_url)
                for text in texts:
                    home_team, away_team, game_result, home_bans, away_bans, home_kings, away_kings = get_kings_infos(text)
                    items = [home_team, away_team, game_result] + home_bans + away_bans + home_kings + away_kings
                    print(items)
                    f.write(b'%s\n' % ','.join(items).encode("utf-8"))
    
    
    main()
    

    相关文章

      网友评论

          本文标题:king of hero

          本文链接:https://www.haomeiwen.com/subject/llhmwctx.html