using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Text;
using System.Xml.Linq;
namespace NewLife.IO
{
/// <summary>轻量级Excel读取器,仅用于导入数据</summary>
/// <remarks>
/// 文档 https://www.yuque.com/smartstone/nx/excel_reader
/// 仅支持xlsx格式,本质上是压缩包,内部xml。
/// 可根据xml格式扩展读取自己想要的内容。
/// </remarks>
public class ExcelReader : DisposeBase
{
#region 属性
/// <summary>文件名</summary>
public String FileName { get; }
/// <summary>工作表</summary>
public ICollection<String> Sheets => _entries.Keys;
private ZipArchive _zip;
private String[] _sharedStrings;
private String[] _styles;
private IDictionary<String, ZipArchiveEntry> _entries;
#endregion
#region 构造
/// <summary>实例化读取器</summary>
/// <param name="fileName"></param>
public ExcelReader(String fileName)
{
if (fileName.IsNullOrEmpty()) throw new ArgumentNullException(nameof(fileName));
FileName = fileName;
//_zip = ZipFile.OpenRead(fileName.GetFullPath());
// 共享访问,避免文件被其它进程打开时再次访问抛出异常
var fs = new FileStream(fileName.GetFullPath(), FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
_zip = new ZipArchive(fs, ZipArchiveMode.Read, true);
Parse();
}
/// <summary>实例化读取器</summary>
/// <param name="stream"></param>
/// <param name="encoding"></param>
public ExcelReader(Stream stream, Encoding encoding)
{
if (stream == null) throw new ArgumentNullException(nameof(stream));
if (stream is FileStream fs) FileName = fs.Name;
_zip = new ZipArchive(stream, ZipArchiveMode.Read, true, encoding);
Parse();
}
/// <summary>销毁</summary>
/// <param name="disposing"></param>
protected override void Dispose(Boolean disposing)
{
base.Dispose(disposing);
_entries?.Clear();
_zip.TryDispose();
}
#endregion
#region 方法
private void Parse()
{
// 读取共享字符串
{
var entry = _zip.GetEntry("xl/sharedStrings.xml");
if (entry != null) _sharedStrings = ReadStrings(entry.Open());
}
// 读取样式
{
var entry = _zip.GetEntry("xl/styles.xml");
if (entry != null) _styles = ReadStyles(entry.Open());
}
// 读取sheet
{
_entries = ReadSheets(_zip);
}
}
private static DateTime _1900 = new(1900, 1, 1);
/// <summary>逐行读取数据,第一行很可能是表头</summary>
/// <param name="sheet">工作表名。一般是sheet1/sheet2/sheet3,默认空,使用第一个数据表</param>
/// <returns></returns>
public IEnumerable<Object[]> ReadRows(String sheet = null)
{
if (sheet.IsNullOrEmpty()) sheet = Sheets.FirstOrDefault();
if (!_entries.TryGetValue(sheet, out var entry)) throw new ArgumentOutOfRangeException(nameof(sheet), "找不到工作表");
var doc = XDocument.Load(entry.Open());
var data = doc.Root.Elements().FirstOrDefault(e => e.Name.LocalName.EqualIgnoreCase("sheetData"));
foreach (var row in data.Elements())
{
var vs = new List<String>();
var c = 'A';
foreach (var col in row.Elements())
{
// 值
var val = col.Value;
// 某些列没有数据,被跳过。r=CellReference
var r = col.Attribute("r");
if (r != null)
{
var c2 = r.Value[0];
while (c2 != c) { vs.Add(null); c++; }
}
// t=DataType, s=SharedString, b=Boolean, n=Number, d=Date
var t = col.Attribute("t");
if (t != null && t.Value == "s")
val = _sharedStrings[val.ToInt()];
else if (_styles != null)
{
// 特殊支持时间日期,s=StyleIndex
var s = col.Attribute("s");
if (s != null)
{
var si = s.Value.ToInt();
if (si < _styles.Length && _styles[si] != null && _styles[si].StartsWith("yy"))
{
if (val.Contains('.'))
{
var ss = val.Split('.');
var dt = _1900.AddDays(ss[0].ToInt() - 2);
dt = dt.AddSeconds(ss[1].ToLong() / 115740);
val = dt.ToFullString();
}
else
{
val = _1900.AddDays(val.ToInt() - 2).ToString("yyyy-MM-dd");
}
}
}
}
vs.Add(val);
c++;
}
yield return vs.ToArray();
}
}
private String[] ReadStrings(Stream ms)
{
var doc = XDocument.Load(ms);
var list = new List<String>();
foreach (var item in doc.Root.Elements())
{
list.Add(item.Value);
}
return list.ToArray();
}
private String[] ReadStyles(Stream ms)
{
var doc = XDocument.Load(ms);
var fmts = new Dictionary<Int32, String>();
var numFmts = doc.Root.Elements().FirstOrDefault(e => e.Name.LocalName == "numFmts");
if (numFmts != null)
{
foreach (var item in numFmts.Elements())
{
var id = item.Attribute("numFmtId").Value.ToInt();
var code = item.Attribute("formatCode").Value;
fmts.Add(id, code);
}
}
var list = new List<String>();
var xfs = doc.Root.Elements().FirstOrDefault(e => e.Name.LocalName == "cellXfs");
foreach (var item in xfs.Elements())
{
var fid = item.Attribute("numFmtId").Value.ToInt();
if (fmts.TryGetValue(fid, out var code))
list.Add(code);
else
list.Add(null);
}
return list.ToArray();
}
private IDictionary<String, ZipArchiveEntry> ReadSheets(ZipArchive zip)
{
var dic = new Dictionary<String, String>();
var entry = _zip.GetEntry("xl/workbook.xml");
if (entry != null)
{
var doc = XDocument.Load(entry.Open());
var list = new List<String>();
var sheets = doc.Root.Elements().FirstOrDefault(e => e.Name.LocalName == "sheets");
if (sheets != null)
{
foreach (var item in sheets.Elements())
{
var id = item.Attribute("sheetId").Value;
var name = item.Attribute("name").Value;
dic[id] = name;
}
}
}
//_entries = _zip.Entries.Where(e =>
// e.FullName.StartsWithIgnoreCase("xl/worksheets/") &&
// e.Name.EndsWithIgnoreCase(".xml"))
// .ToDictionary(e => e.Name.TrimEnd(".xml"), e => e);
var dic2 = new Dictionary<String, ZipArchiveEntry>();
foreach (var item in zip.Entries)
{
if (item.FullName.StartsWithIgnoreCase("xl/worksheets/") && item.Name.EndsWithIgnoreCase(".xml"))
{
var name = item.Name.TrimEnd(".xml");
if (dic.TryGetValue(name.TrimStart("sheet"), out var str)) name = str;
dic2[name] = item;
}
}
return dic2;
}
#endregion
}
}
|