前言
当要导出的 DataTable
数据量很大时(比如行数几十万的情况下),NPOI
内存占用非常高,这里研究一下性能更好的excel
导出方式
一、使用 closedXML
库
image.png
0、工具类-生成DataTable
using System;
using System.Collections.Generic;
using System.Data;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ExcelExportTest
{
public class GenerateDataTable
{
public DataTable GetNewTable(int rowCount=600000)
{
Stopwatch sw = new Stopwatch();
sw.Start();
var data = new DataTable("测试表格");
data.Columns.Add("Library", typeof(string));
data.Columns.Add("Name", typeof(string));
data.Columns.Add("Description", typeof(string));
data.Columns.Add("Type", typeof(string));
data.Columns.Add("Definer", typeof(string));
data.Columns.Add("Definer_Description", typeof(string));
data.Columns.Add("Creation_Date", typeof(DateTime));
data.Columns.Add("Days_Since_Creation", typeof(string));
data.Columns.Add("Size", typeof(string));
data.Columns.Add("Last_Used", typeof(DateTime));
data.Columns.Add("Attribute", typeof(string));
data.Columns.Add("Count_Of_Objects_Referenced", typeof(string));
data.Columns.Add("Possibly_Referenced", typeof(string));
for (var i = 0; i < rowCount; i++)
{
data.Rows.Add(
"xxxxxxxxx",
"xxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"*USRSPC",
"xxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
DateTime.Now,
"789",
"16384",
DateTime.Now.AddDays(120),
"GRC",
"0",
"0"
);
}
sw.Stop();
Console.WriteLine($"Generate datable used [{sw.ElapsedMilliseconds}] ms");
return data;
}
}
}
1、一次导入全部数据并保存
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using ClosedXML.Excel;
namespace ExcelExportTest
{
public class ClosedXmlTest
{
public void DoTest_GenerateDirectly()
{
var gdt = new GenerateDataTable();
var table = gdt.GetNewTable();
Thread.Sleep(5000);
Stopwatch sw = new Stopwatch();
sw.Start();
using (var workbook = new XLWorkbook())
{
var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss")+ ".xlsx");
var worksheet = workbook.Worksheets.Add(table, table.TableName);
sw.Stop();
Console.WriteLine($"Add workbook used {sw.ElapsedMilliseconds} ms");
sw.Restart();
workbook.SaveAs(filePath);
}
sw.Stop();
Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
}
}
}
-
耗时及内存占用
image.png
image.png
内存占用 3000 MB左右,耗时 100 秒左右,生成的文件大小 23 MB
image.png
2、每次导入8000笔数据,保存,再打开文件追加
public void DoTest_GenerateThenAttach()
{
var gdt = new GenerateDataTable();
var table = gdt.GetNewTable(1);
Stopwatch sw = new Stopwatch();
sw.Start();
var filePath = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
int looRowCount = 8000;
using (var workbook = new XLWorkbook())
{
workbook.Worksheets.Add(gdt.GetTableWithNRows(table, looRowCount), table.TableName);
workbook.SaveAs(filePath);
}
int i = 2;
int loop = 300000 / looRowCount;
while (i <= loop)
{
using (var workbook = new XLWorkbook(filePath))
{
IXLWorksheet Worksheet = workbook.Worksheet(table.TableName);
int NumberOfLastRow = Worksheet.LastRowUsed().RowNumber();
IXLCell CellForNewData = Worksheet.Cell(NumberOfLastRow + 1, 1);
CellForNewData.InsertTable(gdt.GetTableWithNRows(table, looRowCount));
if (i == loop)
{
Worksheet.Columns().AdjustToContents();
}
workbook.Save();
Console.WriteLine($"Loop {i} work done...");
i++;
}
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
sw.Stop();
Console.WriteLine($"Save all rows used {sw.ElapsedMilliseconds} ms");
}
public DataTable GetTableWithNRows(DataTable dataIn, int rowCount)
{
DataTable data = dataIn.Clone();
for (var i = 0; i < rowCount; i++)
{
data.Rows.Add(
"xxxxxxxxx",
"xxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
"*USRSPC",
"xxxxxxxxx",
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
DateTime.Now,
"789",
"16384",
DateTime.Now.AddDays(120),
"GRC",
"0",
"0"
);
}
return data;
}
-
但是这个运行非常慢,耗时严重,而且越往后读取excel后内存占用还是会很大
image.png
二、使用 OpenXML
库
image.png
1、使用高级API
public void DoTest()
{
var gdt = new GenerateDataTable();
var table = gdt.GetNewTable();
Stopwatch sw = new Stopwatch();
sw.Start();
var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
WriteExcelFile(filename, table);
sw.Stop();
Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
private static void WriteExcelFile(string filename, DataTable table)
{
using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
{
WorkbookPart workbookPart = document.AddWorkbookPart();
workbookPart.Workbook = new Workbook();
WorksheetPart worksheetPart = workbookPart.AddNewPart<WorksheetPart>();
var sheetData = new SheetData();
worksheetPart.Worksheet = new Worksheet(sheetData);
Sheets sheets = workbookPart.Workbook.AppendChild(new Sheets());
Sheet sheet = new Sheet() { Id = workbookPart.GetIdOfPart(worksheetPart), SheetId = 1, Name = table.TableName };
sheets.Append(sheet);
Row headerRow = new Row();
List<String> columns = new List<string>();
foreach (System.Data.DataColumn column in table.Columns)
{
columns.Add(column.ColumnName);
Cell cell = new Cell();
cell.DataType = CellValues.String;
cell.CellValue = new CellValue(column.ColumnName);
headerRow.AppendChild(cell);
}
sheetData.AppendChild(headerRow);
foreach (DataRow dsrow in table.Rows)
{
Row newRow = new Row();
foreach (String col in columns)
{
Cell cell = new Cell();
cell.DataType = CellValues.String;
cell.CellValue = new CellValue(dsrow[col].ToString());
newRow.AppendChild(cell);
}
sheetData.AppendChild(newRow);
}
workbookPart.Workbook.Save();
}
}
-
内存占用峰值情况【1300MB】
image.png -
耗时【22秒】
image.png -
生成文件大小【2.33MB】
image.png
2、使用OpenXmlWriter
参考文章
- Writing Large Excel Files with the Open XML SDK
- Export big amount of data from XLSX - OutOfMemoryException
- How to properly use OpenXmlWriter to write large Excel files
- Working with the shared string table (Open XML SDK)
public void DoTest2()
{
var gdt = new GenerateDataTable();
var table = gdt.GetNewTable();
Stopwatch sw = new Stopwatch();
sw.Start();
var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
LargeExport(filename, table);
sw.Stop();
Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
public static void LargeExport(string filename, DataTable table)
{
using (SpreadsheetDocument document = SpreadsheetDocument.Create(filename, SpreadsheetDocumentType.Workbook))
{
//this list of attributes will be used when writing a start element
List<OpenXmlAttribute> attributes;
OpenXmlWriter writer;
document.AddWorkbookPart();
WorksheetPart workSheetPart = document.WorkbookPart.AddNewPart<WorksheetPart>();
writer = OpenXmlWriter.Create(workSheetPart);
writer.WriteStartElement(new Worksheet());
writer.WriteStartElement(new SheetData());
// 表头列信息
//create a new list of attributes
attributes = new List<OpenXmlAttribute>();
// add the row index attribute to the list
attributes.Add(new OpenXmlAttribute("r", null, "1"));
//write the row start element with the row index attribute
writer.WriteStartElement(new Row(), attributes);
for (int columnNum = 0; columnNum < table.Columns.Count; ++columnNum)
{
//reset the list of attributes
attributes = new List<OpenXmlAttribute>();
// add data type attribute - in this case inline string (you might want to look at the shared strings table)
attributes.Add(new OpenXmlAttribute("t", null, "str"));
//add the cell reference attribute
attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum+1)}1"));
//write the cell start element with the type and reference attributes
writer.WriteStartElement(new Cell(), attributes);
//write the cell value
writer.WriteElement(new CellValue(table.Columns[columnNum].ColumnName));
// write the end cell element
writer.WriteEndElement();
}
// write the end row element
writer.WriteEndElement();
for (int rowNum = 1; rowNum <= table.Rows.Count; ++rowNum)
{
int docRowNum = rowNum + 1;
//create a new list of attributes
attributes = new List<OpenXmlAttribute>();
// add the row index attribute to the list
attributes.Add(new OpenXmlAttribute("r", null, docRowNum.ToString()));
//write the row start element with the row index attribute
writer.WriteStartElement(new Row(), attributes);
for (int columnNum = 1; columnNum <= table.Columns.Count; ++columnNum)
{
//reset the list of attributes
attributes = new List<OpenXmlAttribute>();
// add data type attribute - in this case inline string (you might want to look at the shared strings table)
attributes.Add(new OpenXmlAttribute("t", null, "str"));
//add the cell reference attribute
attributes.Add(new OpenXmlAttribute("r", "", $"{GetColumnName(columnNum)}{docRowNum}"));
//write the cell start element with the type and reference attributes
writer.WriteStartElement(new Cell(), attributes);
var cellValue = table.Rows[rowNum - 1][columnNum - 1];
string cellStr = cellValue == null ? "" : (cellValue is DateTime?((DateTime)cellValue).ToString("yyyy-MM-dd HH:mm:ss.fff") : cellValue.ToString());
//write the cell value
writer.WriteElement(new CellValue(cellStr));
// write the end cell element
writer.WriteEndElement();
}
// write the end row element
writer.WriteEndElement();
}
// write the end SheetData element
writer.WriteEndElement();
// write the end Worksheet element
writer.WriteEndElement();
writer.Close();
writer = OpenXmlWriter.Create(document.WorkbookPart);
writer.WriteStartElement(new Workbook());
writer.WriteStartElement(new Sheets());
writer.WriteElement(new Sheet()
{
Name = table.TableName,
SheetId = 1,
Id = document.WorkbookPart.GetIdOfPart(workSheetPart)
});
// End Sheets
writer.WriteEndElement();
// End Workbook
writer.WriteEndElement();
writer.Close();
document.Close();
}
}
//A simple helper to get the column name from the column index. This is not well tested!
private static string GetColumnName(int columnIndex)
{
int dividend = columnIndex;
string columnName = String.Empty;
int modifier;
while (dividend > 0)
{
modifier = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modifier).ToString() + columnName;
dividend = (int)((dividend - modifier) / 26);
}
return columnName;
}
-
内存占用【不超过 190MB】
image.png -
耗时【17 秒】
image.png -
生成的文件大小【26 MB】
image.png
将数据量增大到100万
内存占用不超过【260 MB】,耗时【30秒】,生成文件大小【48.1MB】
image.png
image.png
三、使用 npoi
public void DoTest()
{
var gdt = new GenerateDataTable();
var table = gdt.GetNewTable();
Stopwatch sw = new Stopwatch();
sw.Start();
var filename = System.IO.Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory), DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss") + ".xlsx");
ExportDataTableToExcel(table, filename);
sw.Stop();
Console.WriteLine($"Save used {sw.ElapsedMilliseconds} ms");
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
public static Tuple<bool, string> ExportDataTableToExcel(DataTable dt, string saveTopath)
{
bool result = false;
string message = "";
IWorkbook workbook = null;
FileStream fs = null;
IRow row = null;
ISheet sheet = null;
ICell cell = null;
try
{
if (dt != null && dt.Rows.Count > 0)
{
if (saveTopath.IndexOf(".xlsx", StringComparison.Ordinal) > 0) // 2007版本
workbook = new XSSFWorkbook();
else //if (saveTopath.IndexOf(".xls", StringComparison.Ordinal) > 0) // 2003版本
workbook = new HSSFWorkbook();
sheet = workbook.CreateSheet(dt.TableName);
int rowCount = dt.Rows.Count;//行数
int columnCount = dt.Columns.Count;//列数
//设置列头
row = sheet.CreateRow(0);//excel第一行设为列头
for (int c = 0; c < columnCount; c++)
{
cell = row.CreateCell(c);
cell.SetCellValue(dt.Columns[c].ColumnName);
}
//设置每行每列的单元格,
for (int i = 0; i < rowCount; i++)
{
row = sheet.CreateRow(i + 1);
for (int j = 0; j < columnCount; j++)
{
cell = row.CreateCell(j);//excel第二行开始写入数据
cell.SetCellValue(dt.Rows[i][j].ToString());
}
}
using (fs = File.OpenWrite(saveTopath))
{
workbook.Write(fs);//向打开的这个xls文件中写入数据
result = true;
}
}
else
{
message = "没有解析到数据!";
}
return new Tuple<bool, string>(result, message);
}
catch (Exception ex)
{
if (fs != null)
{
fs.Close();
}
return new Tuple<bool, string>(false, ex.Message);
}
}
-
内存占用【2600MB】
image.png -
耗时【50 秒】
-
生成的文件大小【29 MB】
image.png
四、结论
显然,在只考虑生成文件的情况下,应该选择使用OpenXmlWriter
的方式
网友评论