美文网首页
杂类爬取香江百货 硬存数据库

杂类爬取香江百货 硬存数据库

作者: Meter_bulacn | 来源:发表于2019-03-12 09:10 被阅读0次

import requests
import re
import json
from lxml import etree
import urllib.parse
import urllib
import pymysql,random,time
conn=pymysql.connect(host='127.0.0.1',user='root',password='bc123',db='leshop',charset='utf8')
cur=conn.cursor()

header = {
    "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
    
}

def qingqiu(url):
    response = requests.get(url,headers=header)
    response.encoding = 'utf-8'
    # print(response.status_code)
    a= etree.HTML(response.text)
    b = a.xpath('//*[@id="pcUL"]/dl[1]/dd/div/ul')
    # print(b)
    for i in b:
   
        jiexi = i.xpath('.//li[1]/p')
        for w in jiexi:
            ww = w.xpath('.//a/text()')
            # print(ww)
            qq = w.xpath('.//a/@href')
            for e in qq:
                url = urllib.parse.urljoin(response.url,e)
                # print(url)
                yuedxq(url)
                # yield yuedxq(url)

def yuedxq(url):
    # print(url)
    response = requests.get(url,headers=header)
    # print(response)
    response.encoding = 'utf-8'
    # print(response.status_code)
    a= etree.HTML(response.text)
    v = a.xpath('//*[@id="5"]/a[2]/@title')
    for rr in v:
        ee = rr
    d = a.xpath('//*[@id="listShowStyleBody"]/li/div[3]/div[1]/p/em/text()')
    for y in d:
        wr = y
    # print(d)
    b = a.xpath('//*[@id="listShowStyleBody"]/li/div[2]/h4/a/@title')
    for qw in b:
        we = qw
        # print(we)
    c = a.xpath('//*[@id="listShowStyleBody"]/li/div[1]/a/img/@src')
    # a=random.randint(0,1000)
    # name = models.CharField(magth=100, verbose_name="商品名")
    # sql='''insert into goods_goods(goods_sn,name,click_num,sold_num,fav_num,goods_num,market_price,shop_price,goods_brief,goods_desc,ship_free,is_new,is_hot,add_time,category_id) values(0,%s,0,0,0,0,0,0,0,0,True,False,False,%s,%s)'''
    # s=time.time()
    sql='''insert into goods_goodscategory(id,name,code,`desc`,category_type,is_tab,add_time,parent_category_id) values(0,%s,1,0,3,1,%s,1)'''
    s= time.strftime('%Y-%m-%d',time.localtime(time.time()))
    ss=random.randint(20,106)
    # cur.execute(sql,(ee,s,ss)) 
    cur.execute(sql,(ee,s))

    conn.commit()   
 

if __name__ == '__main__':
    url = "http://xjbh.net/index.html"
    qingqiu(url)

相关文章

网友评论

      本文标题:杂类爬取香江百货 硬存数据库

      本文链接:https://www.haomeiwen.com/subject/rgowpqtx.html