美文网首页Python四期爬虫作业
【Python爬虫】Beautiful Soup爬号码段

【Python爬虫】Beautiful Soup爬号码段

作者: d1b0f55d8efb | 来源:发表于2017-09-07 11:40 被阅读17次
    #coding:utf-8
    import requests,os,csv
    from bs4 import BeautifulSoup
    
    url='http://www.51hao.cc'
    req=requests.get(url)
    req.encoding="gb2312"
    soup=BeautifulSoup(req.text,'lxml')
    fkts=soup.find_all("div",class_="fkt")
    for fkt in fkts:
        fkbj=fkt.find("div", class_="fkbj")
        if fkbj:
            city=fkbj.a.text
            #print(city)
            fklk = fkt.find("div", class_="fklk")
            shis=fklk.find_all("a")
            for shi in shis:
                shi_ming=shi.text
                shi_url=shi["href"]
                #print(city,shi_ming,shi_url)
    
                req2=requests.get(shi_url)
                req2.encoding = 'gb2312'
                soup2 = BeautifulSoup(req2.text, 'lxml')
                nums=soup2.find_all("div",class_="num_bg")
                ul=soup2.find_all("ul")
                #print(len(nums))
                #print(len(ul))
                for num in range(0,len(nums)):
                    haoduans=nums[num]
                    haoduanuls=ul[num]
                    haoduan=haoduans.find("span").text
                    haoduanul=haoduans.text
                    #print(haoduan,haoduanul)
                    li_list=haoduanuls.find_all("a")
                    for li in li_list:
                        haoma=li.text
                        #print(haoma)
                        #print(city,shi_ming,shi_url,haoduan,haoduanul,haoma)
    
                        #写入CSV
                        base_dir=os.path.abspath(__file__)
                        #print(base_dir)
                        parent_dir=os.path.dirname(base_dir)
                        #print(parent_dir)
                        menu_dir=os.path.join(parent_dir,"号段查询")
                        if os.path.isdir(menu_dir):
                            pass
                        else:
                            os.mkdir(menu_dir)
                            os.chdir(menu_dir)
                            file_name='haoduan.csv'
                            file=os.path.join(menu_dir,file_name)
                            with open(file,"a",encoding="utf8") as fp:
                                write=csv.writer(fp)
                                write.writerow(city,shi_ming,shi_url,haoduan,haoduanul,haoma)
    

    相关文章

      网友评论

        本文标题:【Python爬虫】Beautiful Soup爬号码段

        本文链接:https://www.haomeiwen.com/subject/qbfijxtx.html