- 使用line-reader模块
包含使用iconv-lite模块转义字符串
const fs = require('fs');
const lineReader = require('line-reader');
const path = require('path');
const iconv = require("iconv-lite");
const co = require('co');
/**
* @method 读取小说并解析
* @param paths 文件地址
* @returns {Function}
*/
readerTXT(paths) {
return function (cb) {
co(function* () {
let count = 0; //章节数
let contents = [];//记录内容
let oldTitle = '简介';//记录当前标题
let content = [];//记录当前内容
//调用lineReader模块的eachLine按行读取接口,并设置读取字节码,方便字符编码转义
lineReader.eachLine(paths, {encoding: 'binary'}, function (line, last) {
//转义
let oldStr = new Buffer(line, 'binary');
//默认utf8
let str = iconv.decode(oldStr, 'utf8');
//判断是否是gbk
if (str.indexOf('�') !== -1) {
str = iconv.decode(oldStr, 'gb2312');
}
//判断是否按规则提取内容
let chapterTest = /(^\第)(.{1,9})[章](\s*)(.*)|(^\[前言序章完本])(\s*)(.*)/;
if (chapterTest.test(str)) {
//保存章节
contents.push({
title: oldTitle,
filename: count,
content: content
});
oldTitle = str;
content = [];
count++;
} else {
if (str && (str.indexOf("第") === -1 && str.indexOf("卷") === -1) && (str.indexOf("第") === -1 && str.indexOf("部") === -1) && str.indexOf("===") === -1 && (str.indexOf("更多") === -1 && str.indexOf("http") === -1)) {
content.push(str);
}
}
//判断是否到最后一行
if (last) {
contents.push({
title: oldTitle,
filename: count,
content: content
});
cb(null, contents);
}
});
}).catch(function (err) {
cb(new Error(err.message), null);
})
}
}
网友评论