当前的工作里,需要实现xls格式到xlsx格式的转换,因为个人常规使用c#编码,所以使用的库就是NPOI了,本来只想当个代码搬运工,就想着在网上找点代码copy一下,但是找了半天确实没有找到合适的,就自己来处理了!
先讲一些基本概念:
- IWorkbook:在npoi中代表一个excel文件。
- ISheet:代表excel中的一个sheet页面。
- IRow:代表excel中sheet页面里的一行。
- ICell:代表excel中的一个单元格。
NPOI现在很多地方都用这样的接口形式来进行封装了,所以在不需要转换格式的情况下,直接使用这样的类型即可,但是在格式转换地方就会存在一些坑,这个坑就在于如果直接调用方法和传参,会告知你类型不对!说白了,就是有些地方不能用xls读取的格式信息赋值给xlsx格式的处理方法。
因为当前是需要将xls转为xlsx格式,所以就针对xls格式的页面类进行了方法扩展,代码如下:
主代码
public static class HSheetExt
{
/// <summary>
/// 将xls中的sheet转为xlsx格式的sheet(保留处理单元格格式以及字体格式)
/// </summary>
/// <param name="hSSFSheet"></param>
/// <param name="dWb"></param>
/// <returns></returns>
public static XSSFSheet ConvertToXSheet(this HSSFSheet hSSFSheet, XSSFWorkbook dWb)
{
XSSFSheet xSSFSheet = dWb.CreateSheet() as XSSFSheet;
var rowCount = hSSFSheet.LastRowNum;//行数
#region 预定义样式和字体信息
List<XSSFCellStyle> xSSFCellStyles = new List<XSSFCellStyle>();
List<XSSFFont> xSSFFonts = new List<XSSFFont>();
#endregion
//列宽度处理
int maxColumnWidth = 256 * 255;
var xCommnetsDic = ConvertHCommentToXComment(xSSFSheet, hSSFSheet, xSSFFonts);
for (var i = 0; i <= rowCount; i++)
{
var hRow = hSSFSheet.GetRow(i);
if (hRow == null)
{
continue;
}
var xRow = xSSFSheet.CreateRow(i);
xRow.Height = hRow.Height;
var curHRowColumnCount = hRow.LastCellNum;//列数
for(var j = 0;j <= curHRowColumnCount; j++)
{
var orgWidth = hSSFSheet.GetColumnWidth(i);
if (orgWidth > maxColumnWidth)
{
xSSFSheet.SetColumnWidth(i, maxColumnWidth);
}
else
{
xSSFSheet.SetColumnWidth(i, orgWidth);
}
XSSFCell xCell = xRow.CreateCell(j) as XSSFCell;
HSSFCell hCell = hRow.GetCell(j) as HSSFCell;
ConvertHCellToXCell(hCell, xCell, xSSFCellStyles, xSSFFonts, xCommnetsDic, true);
}
}
//最后处理合并单元格的相关情况
int sheetMergerCount = hSSFSheet.NumMergedRegions;
for (int k = 0; k < sheetMergerCount; k++)
xSSFSheet.AddMergedRegion(hSSFSheet.GetMergedRegion(k));
return xSSFSheet;
}
}
上面这段代码有一些内容需要说明,为啥要专门描述是给HSSFSheet类型进行扩展?因为xls格式的sheet页面对应的NPOI类型就是HSSFSheet,如果你扩展ISheet进行处理,如果读取的是xlsx格式文件内容,将会报错!所以在这里会明确指定到底是xls格式还是xlsx格式!
在NPOI库中,HSSF开头的为xls格式文件中的各种处理类型,XSSF开头的为xlsx格式文件中的各种处理类型。
中间为啥会设置maxColumnWidth = 256*255,因为有些列宽确实太长了,然后加载到NPOI处理转换时就会报错,所以在转换时,需要设置最大值!
处理批注信息代码
/// <summary>
/// 处理批注信息转换
/// </summary>
/// <param name="xSSFSheet"></param>
/// <param name="hSSFSheet"></param>
/// <param name="xSSFFonts"></param>
/// <returns></returns>
private static Dictionary<CellAddress,XSSFComment> ConvertHCommentToXComment(XSSFSheet xSSFSheet,HSSFSheet hSSFSheet, List<XSSFFont> xSSFFonts)
{
var hCommnetsDic = hSSFSheet.GetCellComments();
var drawing = xSSFSheet.CreateDrawingPatriarch();
Dictionary<CellAddress,XSSFComment> dic = new Dictionary<CellAddress, XSSFComment> ();
foreach (var item in hCommnetsDic)
{
XSSFClientAnchor anchor = new XSSFClientAnchor();
anchor.AnchorType = item.Value.ClientAnchor.AnchorType;
anchor.Col1 = item.Value.ClientAnchor.Col1;
anchor.Col2 = item.Value.ClientAnchor.Col2;
anchor.Dx1 = item.Value.ClientAnchor.Dx1;
anchor.Dx2 = item.Value.ClientAnchor.Dx2;
anchor.Dy1 = item.Value.ClientAnchor.Dy1;
anchor.Dy2 = item.Value.ClientAnchor.Dy2;
anchor.Row1 = item.Value.ClientAnchor.Row1;
anchor.Row2 = item.Value.ClientAnchor.Row2;
var comment = drawing.CreateCellComment(anchor) as XSSFComment;
comment.Visible = item.Value.Visible;
HSSFRichTextString richText = (HSSFRichTextString)item.Value.String;
int formattingRuns = richText.NumFormattingRuns;
comment.String = new XSSFRichTextString(richText.String);
for (int i = 0; i < formattingRuns; i++)
{
int startIdx = richText.GetIndexOfFormattingRun(i);
short fontIndex = richText.GetFontOfFormattingRun(i);
HSSFFont hSSFFont = hSSFSheet.Workbook.GetFontAt(fontIndex) as HSSFFont;
var xFont = FindXFont(hSSFFont, xSSFFonts, xSSFSheet);
comment.String.ApplyFont(startIdx, richText.Length, xFont);
}
dic.Add(item.Key,comment);
}
return dic;
}
因为批注中可能涉及多种字体样式信息,故在传入参数的时候,也将字体格式信息进行传入了!
转换单元格信息
/// <summary>
/// 转换单元格信息
/// </summary>
/// <param name="hSSFCell"></param>
/// <param name="xSSFCell"></param>
/// <param name="xSSFCellStyles"></param>
/// <param name="xSSFFonts"></param>
/// <param name="isConvertStyle"></param>
private static void ConvertHCellToXCell(HSSFCell hSSFCell,XSSFCell xSSFCell,List<XSSFCellStyle> xSSFCellStyles, List<XSSFFont> xSSFFonts, Dictionary<CellAddress,XSSFComment> xCommnetsDic, bool isConvertStyle = true)
{
if (hSSFCell != null)
{
var hCellType = hSSFCell.CellType;
if (hSSFCell.CellComment != null)
{
var xCellComment = xCommnetsDic.Where(w => w.Key.Column == hSSFCell.CellComment.Address.Column && w.Key.Row == hSSFCell.CellComment.Address.Row).FirstOrDefault().Value;
xSSFCell.CellComment = xCellComment;
}
switch (hCellType)
{
case NPOI.SS.UserModel.CellType.Numeric:
xSSFCell.SetCellValue(hSSFCell.NumericCellValue);
break;
case NPOI.SS.UserModel.CellType.Unknown:
return;
case NPOI.SS.UserModel.CellType.Boolean:
xSSFCell.SetCellValue(hSSFCell.BooleanCellValue);
break;
case NPOI.SS.UserModel.CellType.Blank:
return;
case NPOI.SS.UserModel.CellType.Formula:
xSSFCell.SetCellValue(hSSFCell.CellFormula);
break;
case NPOI.SS.UserModel.CellType.Error:
xSSFCell.SetCellErrorValue(hSSFCell.ErrorCellValue);
return;
case NPOI.SS.UserModel.CellType.String:
//xSSFCell.SetCellValue(hSSFCell.StringCellValue);
HSSFRichTextString richText = (HSSFRichTextString)hSSFCell.RichStringCellValue;
int formattingRuns = hSSFCell.RichStringCellValue.NumFormattingRuns;
XSSFRichTextString xSSFRichTextString = new XSSFRichTextString(richText.String);
for (int i = 0; i < formattingRuns; i++)
{
int startIdx = richText.GetIndexOfFormattingRun(i);
short fontIndex = richText.GetFontOfFormattingRun(i);
HSSFFont hSSFFont = hSSFCell.Sheet.Workbook.GetFontAt(fontIndex) as HSSFFont;
var xFont = FindXFont(hSSFFont, xSSFFonts, (XSSFSheet)xSSFCell.Sheet);
xSSFRichTextString.ApplyFont(startIdx, richText.Length, xFont);
}
xSSFCell.SetCellValue(xSSFRichTextString);
break;
}
if (isConvertStyle)
{
//获取需要处理的一些需要处理的样式元素信息
HSSFCellStyle hSSFCellStyle = hSSFCell.CellStyle as HSSFCellStyle;
if (hSSFCellStyle != null)
{
var xStyle = FindXStyle(hSSFCellStyle, xSSFCellStyles, (XSSFSheet)xSSFCell.Sheet);
if (xStyle != null)
{
xSSFCell.CellStyle = xStyle;
}
}
}
}
return;
}
大家可以看到,上面的方法中传入了style样式信息,为啥要这样做呢?NPOI中对单元格样式的创建数量是有上限的,所以如果存在我们认为相同格式的样式时,就从已有的样式库中去获取就可以了,这样可以减少创建样式信息,以避免因样式问题报错!上限值我记得应该是4000个,这很容易超限啊,4000个样式定义也就20列200行的数量级。
处理样式定义
/// <summary>
/// 获取并处理单元格背景色信息
/// </summary>
/// <param name="hSSFCellStyle"></param>
/// <param name="xSSFCellStyles"></param>
/// <param name="xSSFSheet"></param>
/// <returns></returns>
private static XSSFCellStyle FindXStyle(HSSFCellStyle hSSFCellStyle, List<XSSFCellStyle> xSSFCellStyles, XSSFSheet xSSFSheet)
{
foreach (var item in xSSFCellStyles)
{
if (item.Alignment != hSSFCellStyle.Alignment)
continue;
else if (item.BorderLeft != hSSFCellStyle.BorderLeft)
continue;
else if (item.BorderRight != hSSFCellStyle.BorderRight)
continue;
else if (item.BorderTop != hSSFCellStyle.BorderTop)
continue;
else if (item.BorderBottom != hSSFCellStyle.BorderBottom)
continue;
else if (item.FillBackgroundColor != hSSFCellStyle.FillBackgroundColor)
continue;
else if (item.VerticalAlignment != hSSFCellStyle.VerticalAlignment)
continue;
else if (item.IsHidden != hSSFCellStyle.IsHidden)
continue;
else if (item.WrapText != hSSFCellStyle.WrapText)
continue;
else if (item.FillForegroundColor != hSSFCellStyle.FillForegroundColor)
continue;
else if (item.FillPattern != hSSFCellStyle.FillPattern)
continue;
else
{
return item;
}
}
var xSSFCellStyle = xSSFSheet.Workbook.CreateCellStyle() as XSSFCellStyle;
xSSFCellStyle.Alignment = hSSFCellStyle.Alignment;
xSSFCellStyle.BorderLeft = hSSFCellStyle.BorderLeft;
xSSFCellStyle.BorderRight = hSSFCellStyle.BorderRight;
xSSFCellStyle.BorderTop = hSSFCellStyle.BorderTop;
xSSFCellStyle.BorderBottom = hSSFCellStyle.BorderBottom;
xSSFCellStyle.FillBackgroundColor = hSSFCellStyle.FillBackgroundColor;
xSSFCellStyle.VerticalAlignment = hSSFCellStyle.VerticalAlignment;
xSSFCellStyle.IsHidden = hSSFCellStyle.IsHidden;
xSSFCellStyle.WrapText = hSSFCellStyle.WrapText;
xSSFCellStyle.FillForegroundColor = hSSFCellStyle.FillForegroundColor;
xSSFCellStyle.FillPattern = hSSFCellStyle.FillPattern;
xSSFCellStyles.Add(xSSFCellStyle);
return xSSFCellStyle;
}
处理字体格式信息
/// <summary>
/// 获取并处理文字格式信息
/// </summary>
/// <param name="hSSFFont"></param>
/// <param name="xSSFFonts"></param>
/// <param name="xSSFSheet"></param>
/// <returns></returns>
private static XSSFFont FindXFont(HSSFFont hSSFFont,List<XSSFFont> xSSFFonts, XSSFSheet xSSFSheet)
{
foreach (var item in xSSFFonts)
{
if (item.IsBold != hSSFFont.IsBold)
continue;
else if (item.Charset != hSSFFont.Charset)
continue;
else if (item.Color != hSSFFont.Color)
continue;
else if (item.FontHeight != hSSFFont.FontHeight)
continue;
//else if (item.FontHeightInPoints != hSSFFont.FontHeightInPoints)
// continue;
else if (item.FontName != hSSFFont.FontName)
continue;
else if (item.IsItalic != hSSFFont.IsItalic)
continue;
else if (item.IsStrikeout != hSSFFont.IsStrikeout)
continue;
else if (item.TypeOffset != hSSFFont.TypeOffset)
continue;
else if (item.Underline != hSSFFont.Underline)
continue;
else
{
return item;
}
}
var xSSFFont = xSSFSheet.Workbook.CreateFont() as XSSFFont;
xSSFFont.IsBold = hSSFFont.IsBold;
xSSFFont.Charset = hSSFFont.Charset;
xSSFFont.Color = hSSFFont.Color;
xSSFFont.FontHeight = hSSFFont.FontHeight;
//xSSFFont.FontHeightInPoints = hSSFFont.FontHeightInPoints;
xSSFFont.FontName = hSSFFont.FontName;
xSSFFont.IsItalic = hSSFFont.IsItalic;
xSSFFont.IsStrikeout = hSSFFont.IsStrikeout;
xSSFFont.TypeOffset = hSSFFont.TypeOffset;
xSSFFont.Underline = hSSFFont.Underline;
xSSFFonts.Add(xSSFFont);
return xSSFFont;
}
之前在网上找的代码,转换格式花费的时间实在是不能接受(超过一分钟了),现在工作中要转换的文件大概在50份上下,转换时间不超过10s,还能接受。
前面的调用方法如下:
private void ConvertXLSToXLSX(string srcFilePath, string targetFilePath)
{
using (var fs = new FileStream(srcFilePath, FileMode.Open, FileAccess.Read, FileShare.Read))
{
var oldWorkbook = WorkbookFactory.Create(fs);
if(oldWorkbook is HSSFWorkbook)
{
var numberOfSheets = oldWorkbook.NumberOfSheets;
if(numberOfSheets == 1)
{
//先处理第一个sheet
var oldWorksheet = oldWorkbook.GetSheetAt(0) as HSSFSheet;
using (var fileStream = new FileStream(targetFilePath, FileMode.Create))
{
var newWorkBook = new XSSFWorkbook();
var sheet = oldWorksheet.ConvertToXSheet(newWorkBook);
newWorkBook.Add(sheet);
newWorkBook.Write(fileStream);
newWorkBook.Close();
}
oldWorkbook.Close();
}
else if(numberOfSheets > 1)
{
using (var fileStream = new FileStream(targetFilePath, FileMode.Create))
{
var newWorkBook = new XSSFWorkbook();
for (int i = 0; i < numberOfSheets; i++)
{
var oldWorksheet = oldWorkbook.GetSheetAt(i) as HSSFSheet;
var sheet = oldWorksheet.ConvertToXSheet(newWorkBook);
newWorkBook.Add(sheet);
}
newWorkBook.Write(fileStream);
newWorkBook.Close();
}
oldWorkbook.Close();
}
}
if(oldWorkbook is XSSFWorkbook)
{
if (File.Exists(targetFilePath))
{
File.Delete(targetFilePath);
}
File.Copy(srcFilePath, targetFilePath);
}
}
}
建议大家使用WorkbookFactory.Create(fs);
方式进行读取,并进行类型判断if(oldWorkbook is HSSFWorkbook)
和if(oldWorkbook is XSSFWorkbook)
,因为有些坑啊!表面看着是xls格式,但是读取结果是xlsx格式的,如果这样的情况,直接文件copy过去改后缀就行了!
以上就是相关代码,以供大伙参考!
网友评论