v10.10.2024.0601 优化Json序列化,支持DateOnly/TimeOnly,支持带时区的时间序列化
石头 编写于 2024-06-01 08:10:50
X
using System;
using NewLife.Data;

namespace NewLife.Algorithms
{
    /// <summary>
    /// LTTB最大三角形三桶降采样算法
    /// </summary>
    /// <remarks>
    /// Largest triangle three buckets
    /// LTTB算法由冰岛大学的Sveinn在2013年提出,其对时序数据的降维拟合效果显著,且适用于海量数据处理,已被欧洲多家商业公司采纳
    /// 算法步骤:
    /// 1) 首先确定桶的大小,并将数据点平分到桶中,注意首尾点各占一个桶确保选中
    /// 2) 选中第一个点
    /// 3) 从第二个桶开始,遍历桶中的点,计算每个点的有效区域,并选取有效区域最大的点作为桶的代表点。三角形的选取为[前一个桶的选中点,当前点,后一个桶的平均点]。
    /// 4) 选中最后一个点
    /// </remarks>
    public class LTTBSampling : ISampling
    {
        /// <summary>
        /// 对齐模式。每个桶X轴对齐方式
        /// </summary>
        public AlignModes AlignMode { get; set; }

        /// <summary>
        /// 插值填充算法
        /// </summary>
        public IInterpolation Interpolation { get; set; } = new LinearInterpolation();

        /// <summary>
        /// 降采样处理
        /// </summary>
        /// <param name="data">原始数据</param>
        /// <param name="threshold">阈值,采样数</param>
        /// <returns></returns>
        public TimePoint[] Down(TimePoint[] data, Int32 threshold)
        {
            if (data == null || data.Length < 2) return data;
            if (threshold < 2 || threshold >= data.Length) return data;

            var buckets = SamplingHelper.SplitByAverage(data.Length, threshold, AlignMode == AlignModes.None);

            // 三角形选择相邻三个桶的ABC点,A是前一个桶选择点,C是后一个桶平均点,当前桶选择B,使得三角形有效面积最大
            TimePoint pointA = default;
            if (AlignMode == AlignModes.None) pointA = data[0];
            var sampled = new TimePoint[buckets.Length];
            for (var i = 0; i < buckets.Length; i++)
            {
                var item = buckets[i];
                // 计算下一个桶的平均点作为C
                TimePoint pointC = default;
                {
                    var start = buckets[i + 1].Start;
                    var end = buckets[i + 1].End;
                    end = end < data.Length ? end : data.Length;

                    var length = end - start;
                    for (; start < end; start++)
                    {
                        pointC.Time += data[start].Time;
                        pointC.Value += data[start].Value;
                    }
                    if (length > 0)
                    {
                        pointC.Time /= length;
                        pointC.Value /= length;
                    }
                    else
                    {
                        pointC = data[end - 1];
                    }
                }

                // 计算每个点的有效区域,并选取有效区域最大的点作为桶的代表点
                TimePoint point = default;
                {
                    var max_area = -1.0;
                    for (var j = item.Start; j < item.End; j++)
                    {
                        // 选择一个点B,计算ABC三角形面积
                        var pointB = data[j];
                        var area = Math.Abs(
                            (pointA.Time - pointC.Time) * (pointB.Value - pointA.Value) -
                            (pointA.Time - pointB.Time) * (pointC.Value - pointA.Value)
                            ) / 2;
                        if (area > max_area)
                        {
                            max_area = area;
                            point = pointB;
                        }
                    }
                }
                pointA = point;

                // 对齐
                switch (AlignMode)
                {
                    case AlignModes.Left:
                        point.Time = data[item.Start].Time;
                        break;
                    case AlignModes.Right:
                        point.Time = data[item.End - 1].Time;
                        break;
                    case AlignModes.Center:
                        point.Time = data[(Int32)Math.Round((item.Start + item.End) / 2.0)].Time;
                        break;
                }

                sampled[i++] = point;
            }

            // 第一个点和最后一个点
            if (AlignMode == AlignModes.None)
            {
                sampled[0] = data[0];
                sampled[threshold - 1] = data[data.Length - 1];
            }

            return sampled;
        }

        /// <summary>
        /// 混合处理,降采样和插值
        /// </summary>
        /// <param name="data">原始数据</param>
        /// <param name="size">桶大小。如60/3600/86400</param>
        /// <param name="offset">偏移量。时间不是对齐零点时使用</param>
        /// <returns></returns>
        public TimePoint[] Process(TimePoint[] data, Int32 size, Int32 offset = 0)
        {
            if (data == null || data.Length < 2) return data;
            if (size <= 1) return data;

            var xs = new Int64[data.Length];
            for (var i = 0; i < data.Length; i++) xs[i] = data[i].Time;

            var buckets = SamplingHelper.SplitByFixedSize(xs, size, offset);

            // 每个桶选择一个点作为代表
            var sampled = new TimePoint[buckets.Length];
            var last = 0;
            for (var i = 0; i < buckets.Length; i++)
            {
                // 断层,插值
                var b = buckets[i];
                if (b.Start < 0)
                {
                    sampled[i].Time = i * size;
                    sampled[i].Value = Interpolation.Process(data, last, b.End, i);
                    continue;
                }

                TimePoint point = default;
                var vs = 0.0;
                for (var j = b.Start; j < b.End; j++)
                {
                    vs += data[j].Value;
                }
                last = b.End - 1;
                point.Value = vs / (b.End - b.Start);

                // 对齐
                switch (AlignMode)
                {
                    case AlignModes.Left:
                    default:
                        point.Time = i * size;
                        break;
                    case AlignModes.Right:
                        point.Time = (i + 1) * size - 1;
                        break;
                    case AlignModes.Center:
                        point.Time = data[(Int32)Math.Round((i + 0.5) * size)].Time;
                        break;
                }

                sampled[i] = point;
            }

            return sampled;
        }
    }
}