Location
private Long id;
private Long code;
private String name;
private Integer level;
@TableField("create_time")
private Date createTime;
@TableField("parent_id")
private Long parentId;
HttpUtil
public static String get(String url) throws IOException {
OkHttpClient client = new OkHttpClient.Builder()
.connectTimeout(10, TimeUnit.SECONDS)
.writeTimeout(10, TimeUnit.SECONDS)
.readTimeout(10, TimeUnit.SECONDS)
.build();
Request request = new Request.Builder()
.url(url)
.build();
Response response = client.newCall(request).execute();
String string = response.body().string();
log.debug(string);
return string;
}
爬虫代码
@Test
public void locationSpider() throws IOException {
String url = "http://www.stats.gov.cn/tjsj/tjbz/xzqhdm";
String string = HttpUtil.get(url);
Document document = Jsoup.parse(string);
Elements center_list_contlist = document.getElementsByClass("center_list_contlist");
List<String> urls = new ArrayList<>();
for (Element element : center_list_contlist) {
Elements a = element.select("a");
for (Element element1 : a) {
String href = element1.attr("href");
urls.add(url + href.substring(1));
}
}
String areaStr = HttpUtil.get(urls.get(0));
Elements msoNormal = Jsoup.parse(areaStr).getElementsByClass("MsoNormal");
Map<String, String> map = new TreeMap<>();
for (Element element : msoNormal) {
// System.out.println(element);
Element lang = element.attr("lang", "EN-US");
String s = lang.text().replaceAll(" ", "").replaceAll(" ", "").replaceAll(" ", "");
String substring = s.substring(0, 6);
String substring1 = s.substring(6);
map.put(substring, substring1);
}
Map<String, String> province = new TreeMap<>();
Map<String, String> city = new TreeMap<>();
Map<String, String> district = new TreeMap<>();
System.out.println("===========省份===========");
for (String key : map.keySet()) {
if (key.endsWith("0000")) {
System.out.println(key + "->" + map.get(key));
province.put(key, map.get(key));
}
}
System.out.println(province.size());
System.out.println("===========市===========");
for (String key : map.keySet()) {
String substring = key.substring(0, 2);
if (key.endsWith("00") && key.startsWith(substring) && !key.endsWith("0000")) {
System.out.println(key + "->" + map.get(key));
city.put(key, map.get(key));
}
}
System.out.println("===========区===========");
for (String key : map.keySet()) {
boolean b = province.containsKey(key);
boolean b1 = city.containsKey(key);
if (!b && !b1) {
System.out.println(key + "->" + map.get(key));
district.put(key, map.get(key));
}
}
for (String provinceKey : province.keySet()) {
Location p = Location.builder()
.code(Long.valueOf(provinceKey))
.name(province.get(provinceKey))
.createTime(new Date())
.level(1)
.parentId(0L)
.build();
locationMapper.insert(p);
for (String cityKey : city.keySet()) {
if (cityKey.startsWith(provinceKey.substring(0, 2))) {
Location c = Location.builder()
.code(Long.valueOf(cityKey))
.name(city.get(cityKey))
.createTime(new Date())
.level(2)
.parentId(p.getId())
.build();
locationMapper.insert(c);
for (String districtKey : district.keySet()) {
if (districtKey.startsWith(cityKey.substring(0, 4))) {
Location d = Location.builder()
.code(Long.valueOf(districtKey))
.name(district.get(districtKey))
.createTime(new Date())
.level(3)
.parentId(c.getId())
.build();
locationMapper.insert(d);
}
}
}
}
}
}

网友评论