美文网首页
CSDN博客标题抓取

CSDN博客标题抓取

作者: 成功在于实践 | 来源:发表于2020-07-05 18:35 被阅读0次
    import json
    import time
    from selenium import webdriver
    class CsdnBlog():
        def __init__(self):
            self.url ='https://www.cnblogs.com/imyalost/category/1040462.html'
            self.driver  =webdriver.Chrome()
            self.driver.get(self.url)
            self.file = open('./123.json', 'w')
        def parse_data(self):
            list =self.driver.find_elements_by_xpath('//*[@id="mainContent"]/div/div/div/div[1]/a')
            temp ={}
            for n in list:
                i=n.text
                j=n.get_attribute('href')
                temp[i] =j
            return temp
    
        # def __del__(self):
        #     self.file.close()
        #     self.driver.close()
    
    
        def save_data(self,data):
            data =json.dumps(data,ensure_ascii=False)+ ',\n'
            self.file.write(data)
        def run(self):
            self.driver.get(self.url)
            data=self.parse_data()
            self.save_data(data)
    if __name__ == '__main__':
        csdn =CsdnBlog()
        csdn.run()

    相关文章

      网友评论

          本文标题:CSDN博客标题抓取

          本文链接:https://www.haomeiwen.com/subject/ezavqktx.html