美文网首页
cheerIo爬取数据并通过excel-export将结果保存到

cheerIo爬取数据并通过excel-export将结果保存到

作者: b59a2ae26f20 | 来源:发表于2019-07-23 16:16 被阅读0次
    const request = require('request');
    const cheerio = require('cheerio')
    const fs = require('fs');
    const { wf, excel, l } = require('./utils')
    
    const generateHeaderByTable = (table) => {
        const ths = table.find('th')
        const titleList = []
        for (let i = 0; i < ths.length; i++){
            titleList.push(
                    {
                        caption: ths.eq(i).text().trim().replace(/\s*/g, ''),
                        type: 'string',
                        width: '30'
                    }
                )
        }
        return titleList
    };
    
    const generateRowByTable = (table) => {
        const tds = table.find('td');
        const row = [];
        for (let i = 0; i < tds.length; i++){
            row.push(
                tds.eq(i).text().trim().replace(/\s*/g, '')
            )
        }
        return row
    }
    
    const getExcelData = () => {
        fs.readFile('./d1.txt','utf-8', (err, data) => {
            $ = cheerio.load(data)
            const details = $('.details')
            const titleArr = [...generateHeaderByTable(details.eq(0)),...generateHeaderByTable(details.eq(1))]
            const rowArr = []
            for (let i = 0; i < details.length; i = i+2){
                rowArr.push([...generateRowByTable(details.eq(i)),...generateRowByTable(details.eq(i+1))])
            }
            excel('./index.xlsx', titleArr, rowArr)
        })
    }
    
    function loadFile() {
        fs.readFile('./d1.txt','utf-8', (err, data) => {
            $ = cheerio.load(data)
            const trs = $('tr')
            let str = '';
            for (let l = 0 ; l < trs.length; l++){
                let boo = String($(trs[l]).find('th').eq(0).text().trim()) === '单位名称'
                if(Boolean(boo)){
                    str += '\n'
                }
                str += ($(trs[l]).find('th').eq(0).text().trim()+' '+$(trs[l]).find('td').eq(0).text().trim().replace(/\s*/g,''));
                str += '\n'
                str += ' '
                str += '\n'
                if ($(trs[l]).find('th').length > 1){
                    str += ($(trs[l]).find('th').eq(1).text().trim()+' '+$(trs[l]).find('td').eq(1).text().trim().replace(/\s*/g,''));
                    str += '\n'
                    str += ' '
                    str += '\n'
                }
            }
            wf('./d2.txt', str)
        })
    }
    // loadFile();
    getExcelData()
    //getCompanyInfo('http://101.227.181.106/jsp/view/info.jsp?id=2092')
    

    相关文章

      网友评论

          本文标题:cheerIo爬取数据并通过excel-export将结果保存到

          本文链接:https://www.haomeiwen.com/subject/nvlalctx.html