mogujie.py(爬取数据)
import requests
import json
from mogijie9_db_helper import *
db = get_connection()
cursor = get_cursor(db)
# 取json数据
def get_one_page(page):
url = "https://list.mogujie.com/search?callback=jQuery21108297191095165726_1554948240451&_version=8193&ratio=3%3A4&cKey=15&page="+ str(page) +"&sort=pop&ad=2&fcid=50270&action=clothing&mt=12.848.r123121.3253"
headers = {
"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
text = response.content.decode('utf-8')
return text
return None
# 解析json数据
def parse_page(html):
index = html.index('(')
html = html[index + 1:][:-2]
# print(html)
json_data = json.loads(html)
is_end = json_data['result']['wall']['isEnd']
products = json_data['result']['wall']['docs']
print(len(products))
for product in products:
execute_sql2(db, cursor, product)
return is_end
def main():
page = 1
while True:
html = get_one_page(page)
print('*'*20)
print(page)
page += 1
# print(html)
is_end = parse_page(html)
if is_end:
print('爬取结束')
break
parse_page(html)
close_connection(db)
if __name__ == '__main__':
main()
/SQL/mogujie.sql(搭建数据库)
create database mogujie9 default character set=utf8;
use mogujie9;
create table product(
id integer primary key auto_increment,
trade_item_id varchar(32),
img varchar(1024),
link varchar(1024),
title varchar(512),
org_price varchar(32),
price varchar(32)
);
mogujie_db_helper.py(连接数据库储存数据)
import pymysql
# 取数据库连接
def get_connection():
host = '127.0.0.1'
port = 3306
user = 'root'
password = 'DENG5rong2hua0!'
database = 'mogujie9'
db = pymysql.connect(host, user, password, database, charset='utf8', port=port)
return db
# 取数据库游标
def get_cursor(db):
cursor = db.cursor()
return cursor
# 关闭数据库连接
def close_connection(db):
db.close()
# 执行sql语句
def execute_sql(db, cursor, item_dict):
sql = 'insert into movie (movie_name, actor, releasetime, cover_url, score, ranks, detail_url) values ("%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (item_dict['movie_name'], item_dict['actor'],item_dict['releasetime'],item_dict['cover_url'],item_dict['score'],item_dict['rank'],item_dict['detail_url'])
print(sql)
cursor.execute(sql)
db.commit()
# 执行sql语句
def execute_sql2(db, cursor, item_dict):
sql = 'insert into product (trade_item_id, img, link, title, org_price, price) values (%s, %s, %s, %s, %s, %s)'
print(sql)
cursor.execute(sql, (item_dict['tradeItemId'], item_dict['img'],item_dict['link'],item_dict['title'],item_dict['orgPrice'],item_dict['price']))
db.commit()
网友评论