# -*- coding: utf-8 -*-
import re
import requests
import time
import operator
import pymysql
from functools import reduce
import uuid
n = 0
save_route = 'E://China_Province_2019_test.txt' #数据储存路径
conn = pymysql.connect(
host='localhost',
user='root',
passwd='root',
database='province',
charset='utf8'
)
code = str(uuid.uuid4())
def fun_getName(result4):
result4a = []
s=0
for i4a in result4:
if '0' in i4a[1]:
s += 1
else:
result4a.append(i4a)
return result4a
def fun_write_to_txt(address):
with open(save_route, 'a', encoding='utf-8')as f:
f.write("---"+address)
f.write('\n')
f.close()
def fun_Insert_to_db(pkcode,value,fkcode,ntype):
cursor = conn.cursor()
sql = "INSERT INTO tbprovince (pkcode,sname,fkcode, ntype) VALUES (%s,%s,%s,%s)"
val = (pkcode,value,fkcode,ntype)
cursor.execute(sql, val)
conn.commit()
def fun_Query_from_db(value,fkcode):
cursor = conn.cursor()
#sql ="select pkcode from tbprovince where sname='"+str(value)+"'"
sql ="select pkcode from tbprovince where sname='"+str(value)+"' and fkcode = '"+fkcode+"'"
print(sql)
res = cursor.execute(sql)
print(res)
#ss = cursor.fetchone()
#print(ss[0])
#cursor.close()
#conn.close()
return res
def fun_Get_fkcode_db(value):
cursor = conn.cursor()
sql ="select fkcode from tbprovince where sname='"+str(value)+"'"
print(sql)
res = cursor.execute(sql)
if(res):
ss = cursor.fetchone()
print(ss[0])
#cursor.close()
#conn.close()
return ss[0]
def fun_Get_pkcode_db(value,fkcode):
cursor = conn.cursor()
sql ="select pkcode from tbprovince where sname='"+str(value)+"'and fkcode = '"+fkcode+"'"
print(sql)
res = cursor.execute(sql)
if(res):
ss2 = cursor.fetchone()
print(ss2[0])
#cursor.close()
#conn.close()
return ss2[0]
# areas = ['月湖区','余江区','贵溪市']
# for area in range(len(areas)):
# pkid = str(uuid.uuid4())
# fun_Insert_to_db(pkid,areas[area],"d28bb303-64aa-4abd-954c-258b2d0fe992","2")
results2 = []
results3 = []
results4 = []
results5 = []
Dates1 = []
kv = {'user-agent': 'Mozilla/5.0'}
url = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html'
r = requests.get(url, headers=kv)
r.raise_for_status()
r.encoding = r.apparent_encoding
pattern = re.compile("<a href='(.*?)'>(.*?)<") # 正则表达式
result1 = list(set(re.findall(pattern, r.text))) # 从主页面获取子页面的html
print('result1')
#print(result1)
i2 = 0
for i2 in range(len(result1)):
try:
url2a = result1[i2][0]
address1 = result1[i2][1] # 一级地址
#fun_write_to_txt(address1)
if(fun_Query_from_db(address1,'')):
print(address1) #存在记录就把pkcode取出来
code2 = fun_Get_pkcode_db(address1,'')
else:
code2 = str(uuid.uuid4())#不存在记录就插入一条
fun_Insert_to_db(code2,str(address1),"","0")
i2 += 1
url2 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/' + url2a
#http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/44.html
#print(url2)
#print(address1)
time.sleep(3)
r2 = requests.get(url2, headers=kv)
r2.raise_for_status()
r2.encoding = r2.apparent_encoding
pattern2 = re.compile("<a href='(.*?)'>(.*?)<") # 正则表达式提取目标字段
result2 = list(set(re.findall(pattern2, r2.text)))
shinames = fun_getName(result2)
m2 = 0
for m2 in range(len(shinames)):
url2a2 = shinames[m2][0]
address2 = shinames[m2][1] # 一级地址
#fun_write_to_txt(address2)
fcode3 = code2
if(fun_Query_from_db(address2,fcode3)):
fcode3 = fcode3 #存在记录就把fkcode取出来
code3 = fun_Get_pkcode_db(address2,fcode3)
else:
code3 = str(uuid.uuid4())#不存在记录就插入一条
fun_Insert_to_db(code3,str(address2),fcode3,"1")
m2 += 1
url2 = 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/' + url2a2
#http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/44.html
#print(url2)
#print(address1)
time.sleep(3)
r2 = requests.get(url2, headers=kv)
r2.raise_for_status()
r2.encoding = r2.apparent_encoding
pattern2 = re.compile("<a href='(.*?)'>(.*?)<") # 正则表达式提取目标字段
result3 = list(set(re.findall(pattern2, r2.text)))
shinames2 = fun_getName(result3)
m3 = 0
for m3 in range(len(shinames2)):
#url2a3 = shinames[m3][0]
address3 = shinames2[m3][1] # 一级地址
#fun_write_to_txt(address3)
fcode4 = code3
if(fun_Query_from_db(address3,fcode4)):
fcode4 = fcode4 #存在记录就把fkcode取出来
code4 = fun_Get_pkcode_db(address3,fcode4)
else:
code4 = str(uuid.uuid4())#不存在记录就插入一条
fun_Insert_to_db(code4,str(address3),fcode4,"2")
m3 += 1
#print(shinames)
except Exception as e:
print(e)
pass
print('well_done')
省市区.png
网友评论