记录一些复用比较高的代码
1、循环读取文件
function getFile(path){
const files = fs.readdirSync(path);
console.log(files);
for (let file of files) {
readFile(file,path);
}
}
function readFile(file,path) {
//json
let jsonArray = JSON.parse(fs.readFileSync(`${path}${file}`).toString());
jsonArray.forEach(element => {
//do sth
});
//readline
fs.readFileSync(`${path}${file}`).toString().trim().split('\n').forEach(line => {
//do sth
})
}
2、chrome 键盘选择插件 vimium
正则取html 标签内内容 (title 为标签名称) :[^><]+(?=<\/title>)
正则网站 :https://regexr.com/
3、例子 京东parse :
'use strict';
const fs = require('fs');
const moment = require('moment');
const querystring = require('querystring');
if (process.argv.length != 3) {
console.log('node parse.js <fpath>');
console.log('eg: node parse.js data/');
process.exit(1);
}
const INPUT_DIR = process.argv[2];
const OUTPUT_DIR = `result/`;
// init files
const resultFile = `jd_product_${moment().format('YYYY-MM-DD')}.csv`;
const header =['id','skuId','分类key','分类value','抓取时间'];
function readFile(file) {
//console.log(fs.readFileSync(`${INPUT_DIR}${file}`).toString());
let jsonArray = JSON.parse(fs.readFileSync(`${INPUT_DIR}${file}`).toString());
//console.log(jsonArray[1].response.body);
//console.log(jsonArray[0].request.header.firstLine.match(/(?<=\/)[0-9]*(?=\.html)/g));
//process.exit(1)
for (let json of jsonArray) {
if (json.response.body != undefined && json.response.body.text != undefined) {
let id = json.request.header.firstLine.match(/(?<=\/)[0-9]*(?=\.html)/g);
let time = json.times.requestBegin;
parse(json.response.body.text , id , time);
}
}
}
function parse(res , id, time) {
//console.log(typeof res.match(/(?<=colorSize\:).*\}\](?=\,)/g));
if(res.match(/(?<=colorSize\:).*\}\](?=\,)/g) === null){
let result = [id,id,'商品名'];
result.push(res.match(/[^><]+(?=<\/title>)/g));
result.push(time);
result = result.map(i => {
return ('' + i).replace(/[,\s]/g, ' ')
});
console.log(result);
fs.appendFileSync(OUTPUT_DIR + resultFile, result + '\n');
return;
}
let jsonAry;
try {
jsonAry = JSON.parse(res.match(/(?<=colorSize\:).*\}\](?=\,)/g));
} catch (error) {
let result = [id,id,'商品名'];
result.push(res.match(/[^><]+(?=<\/title>)/g));
result.push(time);
result = result.map(i => {
return ('' + i).replace(/[,\s]/g, ' ')
});
console.log(result);
fs.appendFileSync(OUTPUT_DIR + resultFile, result + '\n');
return;
}
//console.log(jsonAry);
//console.log(id);
//process.exit(1);
if(jsonAry != undefined){
jsonAry.forEach(element => {
let result = [id,''];
for (let key in element){
if (key == "skuId"){
result[1] = element[key];
}else{
result.push(key);
result.push(element[key]);
}
}
result.push(time);
result = result.map(i => {
return ('' + i).replace(/[,\s]/g, ' ')
});
console.log(result);
fs.appendFileSync(OUTPUT_DIR + resultFile, result + '\n');
});
}
}
function initFile() {
fs.writeFileSync(OUTPUT_DIR + resultFile, `\ufeff${header}\n`);
}
function main() {
initFile();
const files = fs.readdirSync(INPUT_DIR);
console.log(files);
for (let file of files) {
readFile(file);
}
console.log('all done...');
}
main();
包含了head时间 , 正则 、 读文件
4、注入脚本
setTimeout(() => {
let resultString = '';
$('#J_goodsList li.gl-item').each(function () {
const id = $(this).data('sku');
const price = $('.p-price strong>i', this).text();
const vipPrice = $('.p-price>span', this).length > 0 && $('.p-price>span').attr('title').toString().match('PLUS会员') ? $('.p-price>span', this).find('em').text().toString().match(/\d+\.?\d*/) : '无PLUS会员价格';
const name = $('.p-name > a em', this).text();
const shopName = $('.p-shop a', this).text();
const comment = $('.p-commit a', this).text().toString();
const result = [id, name, price, vipPrice,shopName, comment].map(cell => cell ? cell.replace ? cell.replace(/[\n\r\s+]/g, ' ') : cell : cell);
resultString = resultString.concat(result.join('\t').concat('\n'));
})
console.log(resultString);
}, 500)
5、node 开内存
window下目录位置
C:\Users\39716(用户名)\AppData\Roaming\npm
找到内存不足发生的环境比如 webpack
打开webpack.cmd 添加 --max_old_space_size=4096
@IF EXIST "%~dp0\node.exe" (
"%~dp0\node.exe" "%~dp0\node_modules\webpack\bin\webpack.js" %*
) ELSE (
@SETLOCAL
@SET PATHEXT=%PATHEXT:;.JS;=;%
node --max_old_space_size=4096 "%~dp0\node_modules\webpack\bin\webpack.js" %*
)
或者在执行node命令时 直接携带参数
node --max_old_space_size=4096 server.js
6、filter 自由组合:
function generateCombinations(arr) {
let len = arr.length;
if (len >= 2) {
let len1 = arr[0].length;
let len2 = arr[1].length;
let lenBoth = len1 * len2;
let items = new Array(lenBoth);
let index = 0;
for (let i = 0; i < len1; i++) {
for (let j = 0; j < len2; j++) {
if (arr[0][i] instanceof Array) {
items[index] = arr[0][i].concat(arr[1][j]);
} else {
items[index] = [arr[0][i]].concat(arr[1][j]);
}
index++;
}
}
let newArr = new Array(len - 1);
for (let i = 2; i < arr.length; i++) {
newArr[i - 1] = arr[i];
}
newArr[0] = items;
return generateCombinations(newArr);
} else {
return arr[0];
}
}
7、遍历json
for (let prov in cityJson){//key 省
let infoAry = cityJson[prov];//城市ary
infoAry.forEach(city => {
city["shops"].forEach(shopInfo => {
let reqBodyJson = {"adminShopId":city["cityId"],"bizType":0};
this.crawler.queue({
uri: `https://appwechat.shixh.com/product/getCategoryList`,
method: 'POST',
body : JSON.stringify(reqBodyJson),
gene : {
shopInfo : shopInfo,
provName : prov,
cityName : city["cityName"],
cityId : city["cityId"],
page : 1
},
callback: this.categories.bind(this)
});
});
});
}
8、文件夹创建鲁棒性代码
if (!fs.existsSync(this.resultDir)) fs.mkdirSync(this.resultDir);
if (!fs.existsSync(this.logDir)) fs.mkdirSync(this.logDir);
fs.writeFileSync(this.resultDir + this.resultFile, `\ufeff${this.header}\n`);
logger.info('init done ...');
9、拆解 Charles 抓包信息
'use strict';
const fs = require('fs');
const moment = require('moment');
if (process.argv.length != 3) {
console.log('node parse.js <fpath>');
console.log('eg: node parse.js /data/');
process.exit(1);
}
const INPUT_DIR = process.argv[2];
const OUTPUT_DIR = `result/`;
// init files
const resultFile = `shengxianchuanqi_hefei_shop_${moment().format('YYYY-MM-DD')}.csv`;
//const header = ['商店id','商店名称','开店时间','省份','城市','区','街道','营业时间','休息时间','邮编'];
function readFile(file) {
//console.log(fs.readFileSync(`${INPUT_DIR}${file}`).toString());
let jsonArray = JSON.parse(fs.readFileSync(`${INPUT_DIR}${file}`).toString());
// for (let json of jsonArray) {
// if ('status' in json && json.status === 'COMPLETE') {
// let response = json.response;
// let request = json.request;
// parse(response, file.split('.')[0]);
// }
// }
parse(jsonArray);
}
function parse(json, fileName) {
if (json == null)
return;
if ("data" in json && json.data != null) {
// if ("text" in json.body && json.body.text != null) {
// let str = json.body.text.match(/{.*}/);
// let goods = JSON.parse(str);
json.data.forEach(shopObj => {
//console.log(shopObj.open_time);
//let time = new Date();
let data = [
shopObj.store_id,
shopObj.store_desc,
moment(Number(shopObj.open_time + '000')).format('YYYY-MM-DD'),
shopObj.address_province,
shopObj.address_city,
shopObj.address_district,
shopObj.address_street,
shopObj.start_time,
shopObj.end_time,
shopObj.district
];
data = data.map(i => {
return ('' + i).replace(/[,\s]/g, ' ')
});
//console.log(data);
fs.appendFileSync(OUTPUT_DIR + resultFile, data + '\n');
});
// }
}
}
function initFile() {
fs.writeFileSync(OUTPUT_DIR + resultFile, `\ufeff商店id,商店名称,开店时间,省份,城市,区,街道,营业时间,休息时间,邮编\n`);
}
function main() {
initFile();
const files = fs.readdirSync(INPUT_DIR);
console.log(files);
for (let file of files) {
readFile(file);
}
console.log('all done...');
}
main();
10、nodejs 操作 excel mysql 等
'use strict';
/**
* 作用说明: 从文件夹中读取APP文件, 导入到流量数据库app中。
* 操作步骤:
*
* 1. 读取所有文件
* 2. 数据单位转换
* 3. 增量更新 Mysql数据库
*
* 注: mysql 的 Upsert 可以配合数据库的 **组合唯一索引** 来确保对固有的数据践行更新操作
*
* > ALTER TABLE app_test ADD UNIQUE KEY aurora_unique_keys (app_name, starting_date, source);
*
*/
const fs = require('fs');
const mysql = require('mysql');
const Excel = require('exceljs');
const moment = require('moment');
const logger = require('bda-util/winston-rotate-local-timezone')
.getLogger('../log/aurora_importor' + moment().format('YYYY-MM-DD') + '.log');
if (process.argv.length != 3) {
logger.error('Usage: node 001.import.js <- data dir path ->');
process.exit(0);
}
class Importor {
constructor(dataDir) {
let self = this;
self.dataDir = dataDir;
self.conn = mysql.createConnection({
host: '192.168.99.169',
user: 'root',
password: 'Mike442144',
database: 'bdadata'
});
//---------------------
this.writeStream = fs.createWriteStream('../result/' + 'jiguang_' + moment().format('YYYY-MM-DD') + '.csv');
this.header = `\ufeff${['file_name','sheet_name','row_num'].join(',')}\n`;
this.writeStream.write(this.header);
}
_genEndDayOfMonth(s, e) {
let start = moment(s).endOf('month');
let end = moment(e).endOf('month');
let ret = [];
for (let date = start; date <= end; date = date.add(1, 'M').endOf('month')) {
ret.push(date.format('YYYY-MM-DD'));
}
return ret;
}
insert(datas) {
let self = this;
// console.log(datas.month);
// process.exit();
let insert_sql = `INSERT INTO app
(app_id, app_name, starting_date, daily_active_user, monthly_active_user,total_startup_counts, whole_network_penetration, total_use_time, user_retention ,source, app_type, createddate)
VALUES ? ON DUPLICATE KEY UPDATE
daily_active_user = VALUES(daily_active_user),
monthly_active_user = VALUES(monthly_active_user),
total_startup_counts = VALUES(total_startup_counts),
whole_network_penetration = VALUES(whole_network_penetration),
total_use_time = VALUES(total_use_time),
user_retention = VALUES(user_retention),
createddate = VALUES(createddate);`;
let records = datas.month.map(val => {
let date = moment(val['日期'].substr(0, 4) + '-' + val['日期'].substr(4, 6)).endOf('month').format('YYYY-MM-DD');
let filter = datas.day.filter(it => moment(it['日期']).format('YYYY-MM-DD') === date);
let penetration = null;
if (filter.length) {
penetration = filter[0]['全网渗透率'];
}
let dau = val['月均DAU(所有平台)'] / 1000;
let mau = val['MAU(所有平台)'] / 1000;
let sessions = val['月启动次数'] / 1000;
let ts = val['月使用时长(万小时)'] * 0.6; // mm(minutes)
let retention = isNaN(val['三十天用户留存']) ? null : val['三十天用户留存']*100;
/**
* 数据说明: 流量数据库是一个综合数据表, 数据来源斑驳繁多,所以本次插入的数据要固定一些字段做标示
* 如: source:'aurora', type:'mobile', createddate=NOW()
* */
return [datas.name, datas.name, date, dau, mau, sessions, penetration, ts, retention,'aurora', 'mobile', moment().format('YYYY-MM-DD')];
});
// return new Promise((resolve, reject) => {
// this.conn.query(insert_sql, [records], function(err, results){
// if (err) {
// reject(err);
// return;
// }
// resolve(`AppName:${datas.name}, Upserted: ${results.affectedRows} 个`);
// });
// });
}
readFile(file) {
let self = this;
let name = file.match(/^(.*?)_基础数据_/)[1];
let workbook = new Excel.Workbook();
workbook.xlsx.readFile(self.dataDir + file).then(function() {
logger.info('readding file: ', file);
let records = {
name: name,
month: [],
day: []
};
workbook.eachSheet((worksheet, sheetId) => {
if (worksheet.name !== '基础日指标' && worksheet.name !== '基础月指标' && worksheet.name !== '活跃留存率') {
return;
}
let header = [];
if (worksheet.name === '基础月指标') {
let count = 0;
worksheet.eachRow({
includeEmpty: true
}, (row, rowNumber) => {
if (rowNumber <= 2) return header = row.values;
let data = {};
count+=1;//!!!!!!!!!!!!!!!!!!!!!!!!!
header.map((val, idx) => {
data[header[idx]] = row.values[idx] || null;
});
records.month.push(data);
});
const result = [name,worksheet.name,count];
logger.info(result);
self.writeStream.write(`${result}\n`);
}
if (worksheet.name === '基础日指标') {
let count = 0;
worksheet.eachRow({
includeEmpty: true
}, (row, rowNumber) => {
if (rowNumber <= 2) return header = row.values;
let data = {
name: name
};
count+=1;//!!!!!!!!!!!!!!!!!!!!!!!!!
header.map((val, idx) => {
data[header[idx]] = row.values[idx] || null;
});
// records.day.push(data);
});
const result = [name,worksheet.name,count];
logger.info(result);
self.writeStream.write(`${result}\n`);
}
if(worksheet.name === '活跃留存率') {
let count = 0;
const infoList = [];
worksheet.eachRow({
includeEmpty: true
}, (row, rowNumber) => {
if (rowNumber <= 2) return header = row.values;
let data = {
name: name
};
count+=1;//!!!!!!!!!!!!!!!!!!!!!!!!!
header.map((val, idx) => {
data[header[idx]] = row.values[idx] || null;
});
infoList.push(data);
});
const result = [name,worksheet.name,count];
logger.info(result);
self.writeStream.write(`${result}\n`);
preprocess(infoList).forEach(info => {
records.month.filter(ele => ele['日期']===info['日期'])[0]['三十天用户留存'] = info['三十天用户留存'];
});
}
});
self.insert(records)
.then((result)=>{
logger.info(result);
})
.catch(err => {
logger.error(err);
});
}).catch(function(error){
logger.error(file,error);
});
}
readDir() {
let self = this;
let count = 1000;
let files = fs.readdirSync(this.dataDir).filter(f => f.match('_基础数据_')).map(file=>{
setTimeout(function(){self.readFile(file);},count+=1000);
});
logger.info('#file: %s from dictionary: %s', files.length, this.dataDir);
}
start() {
this.readDir();
}
}
function preprocess(list) {
// if (list.length == 0){
// return Object.keys(result).map(month => {
// return {
// '日期': month,
// '三十天用户留存': result[month].reduce((acc, val) => acc + val, 0) / result[month].length
// };
// });
// }
const result = {};
list.forEach(info => {
const month = moment(info['日期']).endOf('month').format('YYYYMM');
if(isNaN(info['第三十天']) || info['第三十天'] === null) return;
const value = parseFloat(info['第三十天']);
if(!result[`${month}`]) {
result[`${month}`] = [];
}
result[`${month}`].push(value);
});
return Object.keys(result).map(month => {
return {
'日期': month,
'三十天用户留存': result[month].reduce((acc, val) => acc + val, 0) / result[month].length
};
});
}
let instance = new Importor(process.argv[2]);
instance.start();
网友评论