美文网首页
Week1_Practice_Final

Week1_Practice_Final

作者: Mark狡 | 来源:发表于2016-05-22 18:10 被阅读0次

GetItemHref.py

from bs4 import BeautifulSoup
import requests

def get_Item_Href(url):
    html=requests.get(url)
    bs_html=BeautifulSoup(html.text,'lxml')

    N_href=[]
    hrefs=bs_html.select('td > a[class="t"]')
    for i in hrefs:
        if i.get('data-addtype')!="level2":
            N_href.append(i.get('href'))
    return N_href

get_Item_Href('http://bj.58.com/pbdn/0/')

GetItemInfo.py

import requests
from bs4 import BeautifulSoup
import time


def get_Item_Info(url):
    html=requests.get(url)
    bs_html=BeautifulSoup(html.text,'lxml')

    if bs_html.select('#header > div.breadCrumb.f12 > span > a')!=[]:
     Item=bs_html.select('#header > div.breadCrumb.f12 > span > a')[-1]
    else:
        return None
    Title=bs_html.select('#content > div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.mainTitle > h1')
    Time=bs_html.select('#index_show > ul.mtit_con_left.fl > li.time')
    Price=bs_html.select('#content > div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span')[0]
    Quality=bs_html.select('#content > div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span')[1]
    Add=bs_html.select('#content > div.person_add_top.no_ident_top > div.per_ad_left > div.col_sub.sumary > ul > li > div.su_con > span')[2]
    data={
        "Item":Item.get_text(),
        'Title':Title[0].get_text(),
        'Time':Time[0].get_text(),
        'Price':Price.get_text(),
        "Quality":Quality.get_text().strip(),
        "Add":Add.stripped_strings
    }
    time.sleep(1)
    return data



week1_final.py

from GetItemInfo import get_Item_Info
from GetItemHref import get_Item_Href

def main():
    url='http://bj.58.com/pbdn/0/'
    urls=get_Item_Href(url)
    item_data=[]
    for url in urls:
        item_data.append(get_Item_Info(url))
    print(item_data)
main()

相关文章

网友评论

      本文标题:Week1_Practice_Final

      本文链接:https://www.haomeiwen.com/subject/owoorttx.html