index.js
const http = require('http')
const path = require('path')
const fs = require('fs')
const cheerio = require('cheerio')
const { getLocalDatasForLnglat, createDirSync } = require('./getLnglat')
/*
url: 请求的网址
docParseDst: 存储的路径(html为`${docParseDst}.html`, 提取的数据存储在`${docParseDst}.json`)
selectorPath: 查找的路径
findSelector: 要查找的selector
lnglatDst: 最终解析好的json存放路径
*/
const getDataAndSave = ({ url, docParseDst, selectorPath, findSelector, lnglatDst }) => {
const absoluteDocParseDst = path.resolve(__dirname, docParseDst)
if (createDirSync(path.dirname(absoluteDocParseDst))) {
const aHtml = `${absoluteDocParseDst}.html`
const aJson = `${absoluteDocParseDst}.json`
// 创建写入流
const writerStream = fs.createWriteStream(
aHtml,
/* 以下两个都是默认属性, 可以不写 */
{ flags: 'w', autoClose: true }
)
const req = http
.get(url, res => {
res.setEncoding('utf-8')
res.on('data', chunk => {
// 分段写入
writerStream.write(chunk)
console.log('请求')
})
})
.on('error', e => {
console.log('request-err', e.message)
})
.on('close', () => {
// 关闭流
writerStream.close()
console.log('开始解析')
// 读取html
fs.readFile(aHtml, { encoding: 'utf-8' }, (readError, datas) => {
const $ = cheerio.load(datas)
const elements = $(selectorPath).find(findSelector)
const length = elements.length
if (length > 0) {
const result = []
elements.each((i, item) => {
result[i] = $(item).text().trim()
})
fs.writeFile(aJson, JSON.stringify(result), err => {
if (err) {
console.log(`fs-err`, err)
} else {
getLocalDatasForLnglat(aJson, lnglatDst)
}
})
}
})
})
req.end()
}
}
getDataAndSave({
url: 'http://www.lg.gov.cn/zwfw/zdfw/yl/fwsm/yljg/sqjkfwzx/',
docParseDst: './datas/社康',
selectorPath: '.sk-box .sk-item',
findSelector: 'a',
lnglatDst: './parser/smt.lg.map.nav.json'
})
getLnglat.js
const http = require('https')
const path = require('path')
const fs = require('fs')
const results = []
// 读取本地json, 遍历发送网络请求
const getLocalDatasForLnglat = (src, dst) => {
if (!fs.existsSync(src)) {
console.error('未找到对应路径的文件')
return
}
fs.readFile(src, { encoding: 'utf-8' }, (err, datas) => {
if (err) {
console.error('读取本地文件出错: ', err)
} else {
const parseDatas = JSON.parse(datas)
for (const addr of parseDatas) {
getLnglatFromAmap(addr, dst)
}
}
})
}
// 从高德地图拿经纬度
const getLnglatFromAmap = (addr, dst) => {
const url = `https://restapi.amap.com/v3/geocode/geo?key=ead4b4ffc3093ac65bf76055625e47a6&s=rsv3&city=0755&address=${addr}`
const req = http
.get(url, res => {
res.on('data', chunk => {
const {
geocodes: [{ formatted_address, location }]
} = JSON.parse(chunk.toString())
const [lng, lat] = location.split(',')
results.push({
name: formatted_address.replace(/^(广东省?)?(深圳市?)?/, ''),
lat,
lng
})
if (!this.debounce) {
this.debounce = debounce(() => writeToFile(dst, JSON.stringify(results)), 1000)
}
this.debounce()
})
})
.on('error', e => {
console.log('request-err', e.message)
})
.on('close', () => { })
req.end()
}
// 根据要存储的路径,递归创建文件夹
const createDirSync = pathName => {
if (fs.existsSync(pathName)) {
return true
} else {
if (createDirSync(path.dirname(pathName))) {
fs.mkdirSync(pathName)
return true
}
}
}
// 写入到指定路径的文件
const writeToFile = (dstPath, content) => {
if (createDirSync(path.dirname(dstPath))) {
fs.writeFile(dstPath, content, err => {
if (err) {
console.log(`写入本地报错: `, err)
} else {
console.log('地理编码成功')
}
})
}
}
// 防抖
const debounce = (func, delay) => {
let timer = null
return function (...args) {
if (timer) clearTimeout(timer)
timer = setTimeout(() => {
func.apply(this, args)
}, delay)
}
}
module.exports = {
createDirSync,
getLocalDatasForLnglat
}
网友评论