美文网首页
.Net 大数据量导出Excel方案

.Net 大数据量导出Excel方案

作者: 俊果果 | 来源:发表于2020-05-30 01:59 被阅读0次

    前言

    当要导出的 DataTable 数据量很大时(比如行数几十万的情况下),NPOI 内存占用非常高,这里研究一下性能更好的excel导出方式

    一、使用 closedXML

    image.png

    0、工具类-生成DataTable

    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Diagnostics;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace ExcelExportTest
    {
        public class GenerateDataTable
        {
            public DataTable GetNewTable(int rowCount=600000)
            {
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var data = new DataTable("测试表格");
                data.Columns.Add("Library", typeof(string));
                data.Columns.Add("Name", typeof(string));
                data.Columns.Add("Description", typeof(string));
                data.Columns.Add("Type", typeof(string));
                data.Columns.Add("Definer", typeof(string));
                data.Columns.Add("Definer_Description", typeof(string));
                data.Columns.Add("Creation_Date", typeof(DateTime));
                data.Columns.Add("Days_Since_Creation", typeof(string));
                data.Columns.Add("Size", typeof(string));
                data.Columns.Add("Last_Used", typeof(DateTime));
                data.Columns.Add("Attribute", typeof(string));
                data.Columns.Add("Count_Of_Objects_Referenced", typeof(string));
                data.Columns.Add("Possibly_Referenced", typeof(string));
    
                for (var i = 0; i < rowCount; i++)
                {
                    data.Rows.Add(
                        "xxxxxxxxx",
                        "xxxxxxxxx",
                        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                        "*USRSPC",
                        "xxxxxxxxx",
                        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                        DateTime.Now,
                        "789",
                        "16384",
                        DateTime.Now.AddDays(120),
                        "GRC",
                        "0",
                        "0"
                    );
                }
                sw.Stop();
                Console.WriteLine($"Generate datable used [{sw.ElapsedMilliseconds}] ms");
                return data;
            }
        }
    }
    

    1、一次导入全部数据并保存

    using System;
    using System.Collections.Generic;
    using System.Diagnostics;
    using System.Linq;
    using System.Text;
    using System.Threading;
    using System.Threading.Tasks;
    using ClosedXML.Excel;
    
    namespace ExcelExportTest
    {
        public class ClosedXmlTest
        {
            public void DoTest_GenerateDirectly()
            {
                var gdt = new GenerateDataTable();
                var table = gdt.GetNewTable();
                Thread.Sleep(5000);
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                using (var workbook = new XLWorkbook())
                {
                    var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss")+ ".xlsx");
                    var worksheet = workbook.Worksheets.Add(table, table.TableName);
                    sw.Stop();
                    Console.WriteLine($"Add workbook used {sw.ElapsedMilliseconds} ms");
                    sw.Restart();
                    workbook.SaveAs(filePath);
                }
                sw.Stop();
                Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
            }
        }
    }
    
    • 耗时及内存占用


      image.png
      image.png

      内存占用 3000 MB左右,耗时 100 秒左右,生成的文件大小 23 MB


      image.png

    2、每次导入8000笔数据,保存,再打开文件追加

            public void DoTest_GenerateThenAttach()
            {
                var gdt = new GenerateDataTable();
                var table = gdt.GetNewTable(1);
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
                int looRowCount = 8000;
                using (var workbook = new XLWorkbook())
                {
                    workbook.Worksheets.Add(gdt.GetTableWithNRows(table, looRowCount), table.TableName);
                    workbook.SaveAs(filePath);
                }
    
                int i = 2;
                int loop = 300000 / looRowCount;
                while (i <= loop)
                {
                    using (var workbook = new XLWorkbook(filePath))
                    {
                        IXLWorksheet Worksheet = workbook.Worksheet(table.TableName);
                        int NumberOfLastRow = Worksheet.LastRowUsed().RowNumber();
                        IXLCell CellForNewData = Worksheet.Cell(NumberOfLastRow + 1, 1);
                        CellForNewData.InsertTable(gdt.GetTableWithNRows(table, looRowCount));
                        if (i == loop)
                        {
                            Worksheet.Columns().AdjustToContents();
                        }
                        workbook.Save();
                        Console.WriteLine($"Loop {i} work done...");
                        i++;
                    }
    
                    GC.Collect();
                    GC.WaitForPendingFinalizers();
                    GC.Collect();
                }
    
                sw.Stop();
                Console.WriteLine($"Save all rows used {sw.ElapsedMilliseconds} ms");
            }
    
            public DataTable GetTableWithNRows(DataTable dataIn, int rowCount)
            {
                DataTable data = dataIn.Clone();
                for (var i = 0; i < rowCount; i++)
                {
                    data.Rows.Add(
                        "xxxxxxxxx",
                        "xxxxxxxxx",
                        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                        "*USRSPC",
                        "xxxxxxxxx",
                        "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
                        DateTime.Now,
                        "789",
                        "16384",
                        DateTime.Now.AddDays(120),
                        "GRC",
                        "0",
                        "0"
                    );
                }
    
                return data;
            }
    
    • 但是这个运行非常慢,耗时严重,而且越往后读取excel后内存占用还是会很大


      image.png

    二、使用 OpenXML

    image.png

    1、使用高级API

            public void DoTest()
            {
                var gdt = new GenerateDataTable();
                var table = gdt.GetNewTable();
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
                WriteExcelFile(filename, table);
                sw.Stop();
                Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
            }
    
            private static void WriteExcelFile(string filename, DataTable table)
            {
                using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
                {
                    WorkbookPart workbookPart = document.AddWorkbookPart();
                    workbookPart.Workbook = new Workbook();
    
                    WorksheetPart worksheetPart = workbookPart.AddNewPart<WorksheetPart>();
                    var sheetData = new SheetData();
                    worksheetPart.Worksheet = new Worksheet(sheetData);
    
                    Sheets sheets = workbookPart.Workbook.AppendChild(new Sheets());
                    Sheet sheet = new Sheet() { Id = workbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = table.TableName };
    
                    sheets.Append(sheet);
    
                    Row headerRow = new Row();
    
                    List<String> columns = new List<string>();
                    foreach (System.Data.DataColumn column in table.Columns)
                    {
                        columns.Add(column.ColumnName);
    
                        Cell cell = new Cell();
                        cell.DataType = CellValues.String;
                        cell.CellValue = new CellValue(column.ColumnName);
                        headerRow.AppendChild(cell);
                    }
    
                    sheetData.AppendChild(headerRow);
    
                    foreach (DataRow dsrow in table.Rows)
                    {
                        Row newRow = new Row();
                        foreach (String col in columns)
                        {
                            Cell cell = new Cell();
                            cell.DataType = CellValues.String;
                            cell.CellValue = new CellValue(dsrow[col].ToString());
                            newRow.AppendChild(cell);
                        }
    
                        sheetData.AppendChild(newRow);
                    }
    
                    workbookPart.Workbook.Save();
                }
            }
    
    • 内存占用峰值情况【1300MB】


      image.png
    • 耗时【22秒】


      image.png
    • 生成文件大小【2.33MB】


      image.png

    2、使用OpenXmlWriter

    参考文章

            public void DoTest2()
            {
                var gdt = new GenerateDataTable();
                var table = gdt.GetNewTable();
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
                LargeExport(filename, table);
                sw.Stop();
                Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
            }
            public static void LargeExport(string filename, DataTable table)
            {
                using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
                {
                    //this list of attributes will be used when writing a start element
                    List<OpenXmlAttribute> attributes;
                    OpenXmlWriter writer;
    
                    document.AddWorkbookPart();
                    WorksheetPart workSheetPart = document.WorkbookPart.AddNewPart<WorksheetPart>();
    
                    writer = OpenXmlWriter.Create(workSheetPart);
                    writer.WriteStartElement(new Worksheet());
                    writer.WriteStartElement(new SheetData());
    
                    // 表头列信息
                    //create a new list of attributes
                    attributes = new List<OpenXmlAttribute>();
                    // add the row index attribute to the list
                    attributes.Add(new OpenXmlAttribute("r", null, "1"));
                    //write the row start element with the row index attribute
                    writer.WriteStartElement(new Row(), attributes);
                    for (int columnNum = 0; columnNum < table.Columns.Count; ++columnNum)
                    {
                        //reset the list of attributes
                        attributes = new List<OpenXmlAttribute>();
                        // add data type attribute - in this case inline string (you might want to look at the shared strings table)
                        attributes.Add(new OpenXmlAttribute("t", null, "str"));
                        //add the cell reference attribute
                        attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum+1)}1"));
    
                        //write the cell start element with the type and reference attributes
                        writer.WriteStartElement(new Cell(), attributes);
                        //write the cell value
                        writer.WriteElement(new CellValue(table.Columns[columnNum].ColumnName));
    
                        // write the end cell element
                        writer.WriteEndElement();
                    }
                    // write the end row element
                    writer.WriteEndElement();
    
                    for (int rowNum = 1; rowNum <= table.Rows.Count; ++rowNum)
                    {
                        int docRowNum = rowNum + 1;
                        //create a new list of attributes
                        attributes = new List<OpenXmlAttribute>();
                        // add the row index attribute to the list
                        attributes.Add(new OpenXmlAttribute("r", null, docRowNum.ToString()));
    
                        //write the row start element with the row index attribute
                        writer.WriteStartElement(new Row(), attributes);
    
                        for (int columnNum = 1; columnNum <= table.Columns.Count; ++columnNum)
                        {
                            //reset the list of attributes
                            attributes = new List<OpenXmlAttribute>();
                            // add data type attribute - in this case inline string (you might want to look at the shared strings table)
                            attributes.Add(new OpenXmlAttribute("t", null, "str"));
                            //add the cell reference attribute
                            attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum)}{docRowNum}"));
    
                            //write the cell start element with the type and reference attributes
                            writer.WriteStartElement(new Cell(), attributes);
                            var cellValue = table.Rows[rowNum - 1][columnNum - 1];
                            string cellStr = cellValue == null ? "" : (cellValue is DateTime?((DateTime)cellValue).ToString("yyyy-MM-dd HH:mm:ss.fff") : cellValue.ToString());
                            //write the cell value
                            writer.WriteElement(new CellValue(cellStr));
    
                            // write the end cell element
                            writer.WriteEndElement();
                        }
    
                        // write the end row element
                        writer.WriteEndElement();
                    }
    
                    // write the end SheetData element
                    writer.WriteEndElement();
                    // write the end Worksheet element
                    writer.WriteEndElement();
                    writer.Close();
    
                    writer = OpenXmlWriter.Create(document.WorkbookPart);
                    writer.WriteStartElement(new Workbook());
                    writer.WriteStartElement(new Sheets());
    
                    writer.WriteElement(new Sheet()
                    {
                        Name = table.TableName,
                        SheetId = 1,
                        Id = document.WorkbookPart.GetIdOfPart(workSheetPart)
                    });
    
                    // End Sheets
                    writer.WriteEndElement();
                    // End Workbook
                    writer.WriteEndElement();
    
                    writer.Close();
    
                    document.Close();
                }
            }
    
            //A simple helper to get the column name from the column index. This is not well tested!
            private static string GetColumnName(int columnIndex)
            {
                int dividend = columnIndex;
                string columnName = String.Empty;
                int modifier;
    
                while (dividend > 0)
                {
                    modifier = (dividend - 1) % 26;
                    columnName = Convert.ToChar(65 + modifier).ToString() + columnName;
                    dividend = (int)((dividend - modifier) / 26);
                }
    
                return columnName;
            }
    
    • 内存占用【不超过 190MB】


      image.png
    • 耗时【17 秒】


      image.png
    • 生成的文件大小【26 MB】


      image.png

    将数据量增大到100万

    内存占用不超过【260 MB】,耗时【30秒】,生成文件大小【48.1MB】


    image.png
    image.png

    三、使用 npoi

            public void DoTest()
            {
                var gdt = new GenerateDataTable();
                var table = gdt.GetNewTable();
    
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
                ExportDataTableToExcel(table, filename);
                sw.Stop();
                Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
                GC.Collect();
                GC.WaitForPendingFinalizers();
                GC.Collect();
            }
    
            public static Tuple<bool, string> ExportDataTableToExcel(DataTable dt, string saveTopath)
            {
                bool result = false;
                string message = "";
                IWorkbook workbook = null;
                FileStream fs = null;
                IRow row = null;
                ISheet sheet = null;
                ICell cell = null;
                try
                {
                    if (dt != null && dt.Rows.Count > 0)
                    {
                        if (saveTopath.IndexOf(".xlsx", StringComparison.Ordinal) > 0) // 2007版本
                            workbook = new XSSFWorkbook();
                        else //if (saveTopath.IndexOf(".xls", StringComparison.Ordinal) > 0) // 2003版本
                            workbook = new HSSFWorkbook();
                        sheet = workbook.CreateSheet(dt.TableName);
                        int rowCount = dt.Rows.Count;//行数  
                        int columnCount = dt.Columns.Count;//列数  
    
                        //设置列头  
                        row = sheet.CreateRow(0);//excel第一行设为列头  
                        for (int c = 0; c < columnCount; c++)
                        {
                            cell = row.CreateCell(c);
                            cell.SetCellValue(dt.Columns[c].ColumnName);
                        }
    
                        //设置每行每列的单元格,  
                        for (int i = 0; i < rowCount; i++)
                        {
                            row = sheet.CreateRow(i + 1);
                            for (int j = 0; j < columnCount; j++)
                            {
                                cell = row.CreateCell(j);//excel第二行开始写入数据  
                                cell.SetCellValue(dt.Rows[i][j].ToString());
                            }
                        }
                        using (fs = File.OpenWrite(saveTopath))
                        {
                            workbook.Write(fs);//向打开的这个xls文件中写入数据  
                            result = true;
                        }
                    }
                    else
                    {
                        message = "没有解析到数据!";
                    }
                    return new Tuple<bool, string>(result, message);
                }
                catch (Exception ex)
                {
                    if (fs != null)
                    {
                        fs.Close();
                    }
                    return new Tuple<bool, string>(false, ex.Message);
                }
            }
    
    • 内存占用【2600MB】


      image.png
    • 耗时【50 秒】

    • 生成的文件大小【29 MB】


      image.png

    四、结论

    显然,在只考虑生成文件的情况下,应该选择使用OpenXmlWriter的方式

    代码

    GitHub Page-LargeDataExportExcel

    相关文章

      网友评论

          本文标题:.Net 大数据量导出Excel方案

          本文链接:https://www.haomeiwen.com/subject/ispqzhtx.html