美文网首页DevOps
SpringBoot框架(43):POI高效导出百万级Excel

SpringBoot框架(43):POI高效导出百万级Excel

作者: 奇点一氪 | 来源:发表于2019-06-25 14:32 被阅读0次

    Excel简介
    什么是excel就不用介绍了,这里主要说明不同版本下每个sheet下的行列限制。

    版本区间 行数 列数 生成文件后缀


    image.png

    由上面可知 Excel 2003及以下是无法实现单sheet百万级的数据。

    Apache POI

    • 简介
      Apache POI 是用Java编写的免费开源的跨平台的 Java API,Apache POI提供API给Java程式对Microsoft Office(Excel、WORD、PowerPoint、Visio等)格式档案读和写的功能。POI为“Poor Obfuscation Implementation”的首字母缩写,意为“可怜的模糊实现”。

    • 常用类
      HSSF - 提供读写Microsoft Excel XLS格式档案的功能。
      XSSF - 提供读写Microsoft Excel OOXML XLSX格式档案的功能。
      SXSSF - 一种基于XSSF的低内存占用的API(3.8版本开始出现)。
      HWPF - 提供读写Microsoft Word DOC97格式档案的功能。
      XWPF - 提供读写Microsoft Word DOC2003格式档案的功能。
      HSLF - 提供读写Microsoft PowerPoint格式档案的功能。
      HDGF - 提供读Microsoft Visio格式档案的功能。
      HPBF - 提供读Microsoft Publisher格式档案的功能。
      HSMF - 提供读Microsoft Outlook格式档案的功能。
      我们这里是导出Excel,所以使用的是前三个。

    导出策略

    • 方案
      使用XSSF和SXSSF分别导入1w,10w,100w数据
      使用SXSSF,SXSSF以10w分页,SXSSF多线程以10w分页导入100w数据
    • 性能对比
      时间不包含网络耗时
      图片描述


      image.png

    总结

    • 方案一:
      数据在万条时XSSF和SXSSF相差不大
      数据上十万后SXSSF性能开始突出
      数据到达百万时,XSSF已不适合使用

    • 方案二:
      不进行分表时,SXSSF最多可存储1048576行
      百万级数据分表存储时,使用多线程导出几乎是不使用多线程导出的一半时间

    最终我得出一个导出百万级数据的最高效方案:多线程分表导出
    实战
    新建一个Maven项目。
    导入依赖
    1:poi
    2:poi-ooxml
    3:poi-ooxml-schemas
    4:dom4j
    5:xmlbeans

    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
            <dependency>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi</artifactId>
                <version>4.1.0</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
            <dependency>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-ooxml</artifactId>
                <version>4.1.0</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
            <dependency>
                <groupId>org.apache.poi</groupId>
                <artifactId>poi-ooxml-schemas</artifactId>
                <version>4.1.0</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/dom4j/dom4j -->
            <dependency>
                <groupId>dom4j</groupId>
                <artifactId>dom4j</artifactId>
                <version>1.6.1</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
            <dependency>
                <groupId>org.apache.xmlbeans</groupId>
                <artifactId>xmlbeans</artifactId>
                <version>3.1.0</version>
            </dependency>
    
    • controller层:
    package com.fjq.deriveexcelmillion;
    
    import org.apache.poi.ss.usermodel.Workbook;
    import org.apache.poi.xssf.streaming.SXSSFWorkbook;
    import org.springframework.web.bind.annotation.RequestMapping;
    import org.springframework.web.bind.annotation.ResponseBody;
    import org.springframework.web.bind.annotation.RestController;
    
    import javax.servlet.http.HttpServletResponse;
    import java.io.OutputStream;
    import java.io.UnsupportedEncodingException;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.LinkedList;
    import java.util.List;
    
    /**
     * @author fjq
     * @Description:
     * @create 2019/12/10
     */
    
    @RestController
    public class DeriveExcelController {
    
    
        public static final String[] TITLE      = new String[]{"第1列", "第2列", "第3列", "第4列", "第5列", "第6列", "第7列", "第8列", "第9列", "第10列"};
        public static final String   SHEET_NAME = "sheet1";
    
        @RequestMapping(value = "/exportThread")
        @ResponseBody
        public void exportSXSSFWorkbookByPageThread(HttpServletResponse response) throws Exception {
            //excel文件名
            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMdd_HHmmss");
            String date = dateFormat.format(new Date());
            String fileName = date + ".xlsx";
            //sheet页中的行数,行数数据;
            Integer num = 1000000;
            List<LinkedList<String>> list = buildContent(num);
            System.out.println("list:{"+list.size()+"}");
            long start = System.currentTimeMillis();
            SXSSFWorkbook wb = ExcelUtil.exportExcel(TITLE, SHEET_NAME, list);
            long millis = System.currentTimeMillis() - start;
            long second = millis / 1000;
            System.out.println("SXSSF Page Thread 导出" + num + "条数据,花费:" + second + "s/ " + millis + "ms");
            writeAndClose(response, fileName, wb);
            wb.dispose();
        }
    
        /**
         * 构建内容
         *
         * @param num
         * @return
         */
        private List<LinkedList<String>> buildContent(int num) {
    
            List<LinkedList<String>> resultList = new ArrayList<>();
            //小于当前的总行数
            for (int i=1; i<=num;i++){   //i的值会在1~4之间变化
                LinkedList<String> linkedList = new LinkedList<>();
                //合同编号
                linkedList.add("1");
                linkedList.add("2");
                linkedList.add("3");
                linkedList.add("4");
                linkedList.add("5");
                linkedList.add("6");
                linkedList.add("7");
                linkedList.add("8");
                linkedList.add("9");
                linkedList.add("10");
                resultList.add(linkedList);
            }
            return resultList;
        }
    
        private void writeAndClose(HttpServletResponse response, String fileName, Workbook wb) {
            try {
                this.setResponseHeader(response, fileName);
                OutputStream os = response.getOutputStream();
                wb.write(os);
                os.flush();
                os.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        public void setResponseHeader(HttpServletResponse response, String fileName) {
            try {
                try {
                    fileName = new String(fileName.getBytes(), "UTF-8");
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
                response.setContentType("application/octet-stream;charset=ISO8859-1");
                response.setHeader("Content-Disposition", "attachment;filename=" + fileName);
                response.addHeader("Pargam", "no-cache");
                response.addHeader("Cache-Control", "no-cache");
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }
    }
    

    工具类:

    package com.fjq.deriveexcelmillion;
    
    import org.apache.poi.ss.usermodel.CellStyle;
    import org.apache.poi.xssf.streaming.SXSSFSheet;
    import org.apache.poi.xssf.streaming.SXSSFWorkbook;
    
    import java.util.LinkedList;
    import java.util.List;
    import java.util.concurrent.CountDownLatch;
    import java.util.concurrent.Executor;
    
    import static java.util.concurrent.Executors.newFixedThreadPool;
    
    /**
     * @author fjq
     * @Description:
     * @create 2019/12/10
     */
    public class ExcelUtil {
        /**
         * @Author: feijq
         * @Description:导出修改
         * @Date: 2019/7/5 11:01
         **/
        public static SXSSFWorkbook exportExcel(String[] title, String sheetName , List<LinkedList<String>> list) {
            SXSSFWorkbook wb = new SXSSFWorkbook();
            int count = 1;
            CountDownLatch downLatch = new CountDownLatch(count);
            Executor executor = newFixedThreadPool(count);
            SXSSFSheet sheet = wb.createSheet(sheetName);
            CellStyle style = wb.createCellStyle();
            style.setWrapText(true);
            executor.execute(new PageTask(downLatch, sheet, title, style, list));
            try {
                downLatch.await();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return wb;
        }
    
    }
    
    

    PageTask

    package com.fjq.deriveexcelmillion;
    
    import org.apache.poi.ss.usermodel.Cell;
    import org.apache.poi.ss.usermodel.CellStyle;
    import org.apache.poi.ss.usermodel.Row;
    import org.apache.poi.ss.usermodel.Sheet;
    
    import java.util.LinkedList;
    import java.util.List;
    import java.util.concurrent.CountDownLatch;
    
    /**
     * @author fjq
     * @Description:
     * @create 2019/12/10
     */
    public class PageTask implements Runnable {
    
        private CountDownLatch           countDownLatch;
        private Sheet                    sheet;
        private String[]                 title;
        private CellStyle                style;
        private List<LinkedList<String>> list;
        public PageTask(CountDownLatch countDownLatch, Sheet sheet, String[] title, CellStyle style, List<LinkedList<String>> list) {
            this.countDownLatch = countDownLatch;
            this.sheet = sheet;
            this.title = title;
            this.style = style;
            this.list = list;
        }
        @Override
        public void run() {
            try {
                Row row = sheet.createRow(0);
                Cell cell = null;
                for (int i = 0; i < title.length; i++) {
                    cell = row.createCell(i);
                    cell.setCellValue(title[i]);
                    cell.setCellStyle(style);
                }
                for (int i = 0; i < list.size(); i++) {
                    LinkedList<String> list1 = this.list.get(i);
                    row = sheet.createRow(i + 1);
                    for (int j = 0; j < title.length; j++) {
                        row.createCell(j).setCellValue(list1.get(j));
                    }
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                if (countDownLatch != null) {
                    countDownLatch.countDown();
                }
            }
        }
    }
    
    

    结果: 100万条数据 导出用时12s(没有组织数据的时间)


    相关文章

      网友评论

        本文标题:SpringBoot框架(43):POI高效导出百万级Excel

        本文链接:https://www.haomeiwen.com/subject/jjmtcctx.html