代码整理
大石头 authored at 2023-06-22 16:30:04 大石头 committed at 2023-07-02 08:52:22
7.07 KiB
X
using System.Text;
using System.Text.RegularExpressions;
using NewLife.Collections;

namespace NewLife.IO;

/// <summary>Csv文件</summary>
/// <remarks>
/// 文档 https://newlifex.com/core/csv_file
/// 支持整体读写以及增量式读写,目标是读写超大Csv文件
/// </remarks>
#if NET5_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER
public class CsvFile : IDisposable, IAsyncDisposable
#else
public class CsvFile : IDisposable
#endif
{
    #region 属性
    /// <summary>文件编码</summary>
    public Encoding Encoding { get; set; } = Encoding.UTF8;

    private readonly Stream _stream;
    private readonly Boolean _leaveOpen;

    /// <summary>分隔符。默认逗号</summary>
    public Char Separator { get; set; } = ',';
    #endregion

    #region 构造
    /// <summary>数据流实例化</summary>
    /// <param name="stream"></param>
    public CsvFile(Stream stream) => _stream = stream;

    /// <summary>数据流实例化</summary>
    /// <param name="stream"></param>
    /// <param name="leaveOpen">保留打开</param>
    public CsvFile(Stream stream, Boolean leaveOpen)
    {
        _stream = stream;
        _leaveOpen = leaveOpen;
    }

    /// <summary>Csv文件实例化</summary>
    /// <param name="file"></param>
    /// <param name="write"></param>
    public CsvFile(String file, Boolean write = false)
    {
        file = file.GetFullPath();
        if (write)
            _stream = new FileStream(file.EnsureDirectory(true), FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
        else
            _stream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
    }

    private Boolean _disposed;
    /// <summary>销毁</summary>
    public void Dispose()
    {
        Dispose(true);
        GC.SuppressFinalize(this);
    }

    /// <summary>销毁</summary>
    /// <param name="disposing"></param>
    protected virtual void Dispose(Boolean disposing)
    {
        if (_disposed) return;
        _disposed = true;

        // 必须刷新写入器,否则可能丢失一截数据
        _writer?.Flush();

        if (!_leaveOpen && _stream != null)
        {
            _reader.TryDispose();

            _writer.TryDispose();

            _stream.Close();
        }
    }

#if NET5_0_OR_GREATER || NETSTANDARD2_1_OR_GREATER
    /// <summary>异步销毁</summary>
    /// <returns></returns>
    public virtual async ValueTask DisposeAsync()
    {
        if (_disposed) return;
        _disposed = true;

        // 必须刷新写入器,否则可能丢失一截数据
        await _writer?.FlushAsync();

        if (!_leaveOpen && _stream != null)
        {
            _reader.TryDispose();

            await _writer.DisposeAsync();

            await _stream.DisposeAsync();
        }

        GC.SuppressFinalize(this);
    }
#endif
    #endregion

    #region 读取
    /// <summary>读取一行</summary>
    /// <returns></returns>
    public String[] ReadLine()
    {
        EnsureReader();

        var line = _reader.ReadLine();
        if (line == null) return null;

        var list = new List<String>();

        // 直接分解,引号合并
        var arr = line.Split(Separator);
        for (var i = 0; i < arr.Length; i++)
        {
            var str = (arr[i] + "").Trim();
            if (str.StartsWith("\""))
            {
                var txt = "";
                if (str.EndsWith("\"") && !str.EndsWith("\"\""))
                    txt = str.Trim('\"');
                else
                {
                    // 找到下一个以引号结尾的项
                    for (var j = i + 1; j < arr.Length; j++)
                    {
                        if (arr[j].EndsWith("\""))
                        {
                            txt = arr.Skip(i).Take(j - i + 1).Join(Separator + "").Trim('\"');

                            // 跳过去一大步
                            i = j;
                            break;
                        }
                    }
                }

                // 两个引号是一个引号的转义
                txt = txt.Replace("\"\"", "\"");
                list.Add(txt);
            }
            else
                list.Add(str);
        }

        return list.ToArray();
    }

    /// <summary>读取所有行</summary>
    /// <returns></returns>
    public String[][] ReadAll()
    {
        var list = new List<String[]>();

        while (true)
        {
            var line = ReadLine();
            if (line == null) break;

            list.Add(line);
        }

        return list.ToArray();
    }

    private StreamReader _reader;
    private void EnsureReader()
    {
        _reader ??= new StreamReader(_stream, Encoding);
    }
    #endregion

    #region 写入
    /// <summary>写入全部</summary>
    /// <param name="data"></param>
    public void WriteAll(IEnumerable<IEnumerable<Object>> data)
    {
        foreach (var line in data)
        {
            WriteLine(line);
        }
    }

    /// <summary>写入一行</summary>
    /// <param name="line"></param>
    public void WriteLine(IEnumerable<Object> line)
    {
        EnsureWriter();

        var str = BuildLine(line);

        _writer.WriteLine(str);
    }

    /// <summary>
    /// 写入一行
    /// </summary>
    /// <param name="values"></param>
    public void WriteLine(params Object[] values) => WriteLine(line: values);

#if !NET40
    /// <summary>异步写入一行</summary>
    /// <param name="line"></param>
    public async Task WriteLineAsync(IEnumerable<Object> line)
    {
        EnsureWriter();

        var str = BuildLine(line);

        await _writer.WriteLineAsync(str);
    }
#endif

    /// <summary>构建一行</summary>
    /// <param name="line"></param>
    /// <returns></returns>
    protected virtual String BuildLine(IEnumerable<Object> line)
    {
        var sb = Pool.StringBuilder.Get();

        foreach (var item in line)
        {
            if (sb.Length > 0) sb.Append(Separator);

            var str = item switch
            {
                String str2 => str2,
                DateTime dt => dt.ToFullString(""),
                Boolean b => b ? "1" : "0",
                _ => item + "",
            };

            // 避免出现科学计数问题 数据前增加制表符"\t"
            // 不同软件显示不太一样 wps超过9位就自动转为科学计数,有的软件是超过11位,所以采用最小范围9
            var reg = new Regex("^\\d+$");
            if (str.Length > 9 && reg.Match(str).Success)
            {
                str = $"\t{str}";
            }

            if (str.Contains('"'))
                sb.AppendFormat("\"{0}\"", str.Replace("\"", "\"\""));
            else if (str.Contains(Separator) || str.Contains('\r') || str.Contains('\n'))
                sb.AppendFormat("\"{0}\"", str);
            else
                sb.Append(str);
        }

        return sb.Put(true);
    }

    private StreamWriter _writer;
    private void EnsureWriter()
    {
#if NET40
        _writer ??= new StreamWriter(_stream, Encoding);
#else
        _writer ??= new StreamWriter(_stream, Encoding, 1024, _leaveOpen);
#endif
    }
    #endregion
}