支持根据项目选择报警组
大石头 authored at 2023-11-06 09:27:14
31.92 KiB
Stardust
using System.Text;
using System.Web;
using NewLife;
using NewLife.Caching;
using NewLife.Log;
using NewLife.Threading;
using Stardust.Data;
using Stardust.Data.Monitors;
using Stardust.Data.Nodes;

namespace Stardust.Server.Services;

//public interface IAlarmService
//{
//    /// <summary>添加需要统计的应用,去重</summary>
//    /// <param name="appId"></param>
//    void Add(Int32 appId);
//}

public class AlarmService : IHostedService
{
    /// <summary>计算周期。默认30秒</summary>
    public Int32 Period { get; set; } = 30;

    private TimerX _timer;
    private readonly ICache _cache;
    private readonly StarServerSetting _setting;
    private readonly ITracer _tracer;

    public AlarmService(StarServerSetting setting, ICacheProvider cacheProvider, ITracer tracer)
    {
        _setting = setting;
        _cache = cacheProvider.Cache;
        _tracer = tracer;

        Period = setting.AlarmPeriod;
    }

    public Task StartAsync(CancellationToken cancellationToken)
    {
        // 初始化定时器
        _timer = new TimerX(DoAlarm, null, 5_000, Period * 1000) { Async = true };

        return Task.CompletedTask;
    }

    public Task StopAsync(CancellationToken cancellationToken)
    {
        _timer.TryDispose();

        return Task.CompletedTask;
    }

    ///// <summary>添加需要统计的应用,去重</summary>
    ///// <param name="appId"></param>
    //public void Add(Int32 appId)
    //{
    //    if (!_bag.Contains(appId)) _bag.Add(appId);
    //}

    private void DoAlarm(Object state)
    {
        //while (_bag.TryTake(out var appId))
        //{
        //    //Process(appId);
        //}

        // 应用告警
        var list = AppTracer.FindAllWithCache();
        foreach (var item in list)
        {
            ProcessAppTracer(item);
            ProcessTraceItem(item);
            ProcessRingRate(item);
        }

        // 节点告警
        //var nodes = Node.FindAllWithCache();
        //foreach (var item in nodes)
        //{
        //    ProcessNode(item);
        //}
        var onlines = NodeOnline.FindAll();
        foreach (var item in onlines)
        {
            if (item.Node != null) ProcessNode(item.Node);
        }

        // Redis告警
        var rnodes = RedisNode.FindAllWithCache();
        foreach (var item in rnodes)
        {
            ProcessRedisNode(item);
        }

        if (Period > 0) _timer.Period = Period * 1000;
    }

    #region 应用性能追踪告警
    private void ProcessAppTracer(AppTracer app)
    {
        // 应用是否需要告警
        if (app == null || !app.Enable) return;
        if (app.AlarmThreshold <= 0 && app.AlarmErrorRate <= 0) return;

        var appId = app.ID;
        var webhook = RobotHelper.GetAlarm(app.Project, app.Category, app.AlarmRobot);
        if (webhook.IsNullOrEmpty()) return;

        using var span = _tracer?.NewSpan($"alarm:{nameof(AppTracer)}");

        // 最近一段时间的5分钟级数据
        var time = DateTime.Now;
        var minute = time.Date.AddHours(time.Hour).AddMinutes(time.Minute / 5 * 5);
        span?.AppendTag(new { time, minute });

        var st = AppMinuteStat.FindByAppIdAndTime(appId, minute);
        if (st != null)
        {
            // 判断告警
            if (app.AlarmThreshold > 0 && st.Errors >= app.AlarmThreshold ||
                app.AlarmErrorRate > 0 && st.ErrorRate >= app.AlarmErrorRate)
            {
                span?.AppendTag(new { st.Errors, st.ErrorRate });

                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Int32>("alarm:AppTracer:" + appId);
                if (error2 == 0 || st.Errors > error2 * 2)
                {
                    _cache.Set("alarm:AppTracer:" + appId, st.Errors, 5 * 60);

                    var msg = GetMarkdown(app, st, true);
                    RobotHelper.SendAlarm(app.Category ?? app.ProjectName, webhook, "应用告警", msg);
                }
            }
        }
    }

    private String GetMarkdown(AppTracer app, AppMinuteStat st, Boolean includeTitle)
    {
        var sb = new StringBuilder();
        if (includeTitle) sb.AppendLine($"### [{app}]应用告警");
        sb.AppendLine($">**时间:**<font color=\"blue\">{st.StatTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**总数:**<font color=\"red\">{st.Errors}</font>");
        sb.AppendLine($">**错误率:**<font color=\"red\">{st.ErrorRate:p2}</font>");

        var url = _setting.WebUrl;
        var appUrl = "";
        var traceUrl = "";
        if (!url.IsNullOrEmpty())
        {
            appUrl = url.EnsureEnd("/") + "Monitors/appMinuteStat?appId=" + st.AppId + "&minError=1";
            traceUrl = url.EnsureEnd("/") + "Monitors/traceMinuteStat?appId=" + st.AppId + "&minError=1";
        }

        // 找找具体接口错误
        var names = new List<String>();
        var sts = TraceMinuteStat.FindAllByAppIdAndTime(st.AppId, st.StatTime).OrderByDescending(e => e.Errors).ToList();
        foreach (var item in sts)
        {
            if (item.Errors > 0)
            {
                sb.AppendLine($">**错误:**<font color=\"red\">埋点[{item.Name}]报错[{item.Errors:n0}]次</font>[更多]({traceUrl}&itemId={item.ItemId})");

                // 相同接口的错误,不要报多次
                if (!names.Contains(item.Name))
                {
                    var ds = TraceData.Search(st.AppId, item.ItemId, "minute", item.StatTime, 20);
                    if (ds.Count > 0)
                    {
                        // 应用节点
                        var nodes = new Dictionary<String, Node>();
                        foreach (var traceData in ds.Where(e => e.Errors > 0).OrderByDescending(e => e.Errors))
                        {
                            if (!nodes.ContainsKey(traceData.ClientId))
                            {
                                var online = AppOnline.FindByClient(traceData.ClientId);
                                var node = online?.Node;
                                if (node != null) nodes[traceData.ClientId] = node;
                            }
                        }
                        if (nodes.Count > 0) sb.AppendLine($">**节点:**<font color=\"greed\">{nodes.Join(",", e => e.Value.Name)}</font>");

                        var sms = SampleData.FindAllByDataIds(ds.Select(e => e.Id).ToArray(), item.StatTime).Where(e => !e.Error.IsNullOrEmpty()).ToList();
                        if (sms.Count > 0)
                        {
                            var msg = sms[0].Error?.Trim();
                            if (!msg.IsNullOrEmpty())
                            {
                                // 错误内容取第一行,详情看更多
                                var p = msg.IndexOfAny(new[] { '\r', '\n' });
                                if (p > 0) msg = msg[..p];

                                sb.AppendLine($">内容:{msg}");

                                names.Add(item.Name);
                            }
                        }
                    }
                }
            }
        }

        var str = sb.ToString();
        if (str.Length > 1600) str = str[..1600];

        // 构造网址
        if (!appUrl.IsNullOrEmpty())
        {
            str += Environment.NewLine + $"[更多信息]({appUrl})";
        }

        return str;
    }

    private void ProcessTraceItem(AppTracer app)
    {
        if (app == null || !app.Enable) return;

        // 应用是否配置了全局跟踪项告警
        var flag = app.ItemAlarmThreshold > 0 || app.ItemAlarmErrorRate > 0;

        // 监控项单独告警
        var tis = app.TraceItems;
        if (!flag) tis = tis.Where(e => e.AlarmThreshold > 0 || e.AlarmErrorRate > 0).ToList();
        if (tis.Count > 0)
        {
            // 最近一段时间的5分钟级数据
            var time = DateTime.Now;
            var minute = time.Date.AddHours(time.Hour).AddMinutes(time.Minute / 5 * 5);

            using var span = _tracer?.NewSpan($"alarm:{nameof(TraceItem)}", new { appId = app.ID, time, minute });

            var list = TraceMinuteStat.Search(app.ID, minute, tis.Select(e => e.Id).ToArray());
            foreach (var st in list)
            {
                var ti = tis.FirstOrDefault(e => e.Id == st.ItemId);
                if (ti != null)
                {
                    var max = ti.AlarmThreshold;
                    var rate = ti.AlarmErrorRate;
                    if (max <= 0 && rate <= 0)
                    {
                        max = app.ItemAlarmThreshold;
                        rate = app.ItemAlarmErrorRate;
                    }

                    // 必须两个条件同时满足,才能告警
                    if (max > 0 && st.Errors >= max &&
                        rate > 0 && st.ErrorRate >= rate)
                    {
                        span?.AppendTag(new { st.Errors, st.ErrorRate, rate });

                        // 一定时间内不要重复报错,除非错误翻倍
                        var error2 = _cache.Get<Int32>("alarm:TraceMinuteStat:" + ti.Id);
                        if (error2 == 0 || st.Errors > error2 * 2)
                        {
                            _cache.Set("alarm:TraceMinuteStat:" + ti.Id, st.Errors, 5 * 60);

                            // 优先本地跟踪项,其次应用,最后是告警分组
                            var webhook = ti.AlarmRobot;
                            if (webhook.IsNullOrEmpty()) webhook = app.AlarmRobot;

                            var group = ti.AlarmGroup;
                            if (group.IsNullOrEmpty()) group = app.Category;

                            var msg = GetMarkdown(app, st, true);
                            RobotHelper.SendAlarm(group, webhook, "埋点告警", msg);
                        }
                    }
                }
            }
        }
    }

    private String GetMarkdown(AppTracer app, TraceMinuteStat st, Boolean includeTitle)
    {
        var sb = new StringBuilder();
        if (includeTitle) sb.AppendLine($"### [{app}]埋点告警");
        sb.AppendLine($">**时间:**<font color=\"blue\">{st.StatTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**总数:**<font color=\"red\">{st.Errors}</font>");
        sb.AppendLine($">**错误率:**<font color=\"red\">{st.ErrorRate:p2}</font>");

        var url = _setting.WebUrl;
        var traceUrl = "";
        if (!url.IsNullOrEmpty())
        {
            traceUrl = url.EnsureEnd("/") + $"Monitors/traceData?appId={st.AppId}&kind=minute&time={HttpUtility.UrlEncode(st.StatTime.ToFullString())}&itemId={st.ItemId}&minError=1";
        }

        // 找找具体接口错误
        var item = st;
        sb.AppendLine($">**错误:**<font color=\"red\">埋点[{item.Name}]报错[{item.Errors:n0}]次</font>");

        var ds = TraceData.Search(st.AppId, item.ItemId, "minute", item.StatTime, 100);
        if (ds.Count > 0)
        {
            // 应用节点
            var nodes = new Dictionary<String, Node>();
            foreach (var traceData in ds.Where(e => e.Errors > 0).OrderByDescending(e => e.Errors))
            {
                if (!nodes.ContainsKey(traceData.ClientId))
                {
                    var online = AppOnline.FindByClient(traceData.ClientId);
                    var node = online?.Node;
                    if (node != null) nodes[traceData.ClientId] = node;
                }
            }
            if (nodes.Count > 0)
            {
                var names = nodes.Select(e => e.Value.Name).Distinct().ToArray();
                sb.AppendLine($">**节点:**<font color=\"greed\">{names.Join(",")}</font>");
            }

            var sms = SampleData.FindAllByDataIds(ds.Select(e => e.Id).ToArray(), item.StatTime).Where(e => !e.Error.IsNullOrEmpty()).ToList();
            if (sms.Count > 0)
            {
                var msg = sms[0].Error?.Trim();
                if (!msg.IsNullOrEmpty())
                {
                    // 错误内容取第一行,详情看更多
                    var p = msg.IndexOfAny(new[] { '\r', '\n' });
                    if (p > 0) msg = msg[..p];

                    sb.AppendLine($">内容:{msg}");
                }
            }
        }

        var str = sb.ToString();
        if (str.Length > 1600) str = str[..1600];

        // 构造网址
        if (!traceUrl.IsNullOrEmpty())
        {
            str += Environment.NewLine + $"[更多信息]({traceUrl})";
        }

        return str;
    }

    private void ProcessRingRate(AppTracer app)
    {
        if (app == null || !app.Enable) return;

        // 监控项单独告警
        var tis = app.TraceItems.Where(e => e.MaxRingRate > 0 || e.MinRingRate > 0).ToList();
        if (tis.Count <= 0) return;

        // 最近一段时间的小时级数据
        var time = DateTime.Now;
        var hour = time.Date.AddHours(time.Hour);
        if (time.Minute < 5) return;

        using var span = _tracer?.NewSpan($"alarm:RingRate", new { app.ID, app.Name, app.DisplayName, time, hour });

        var list = TraceHourStat.Search(app.ID, -1, null, hour, hour.AddHours(1), null, null);
        foreach (var st in list)
        {
            var ti = tis.FirstOrDefault(e => e.Id == st.ItemId);
            if (ti != null && st.RingRate > 0)
            {
                var max = ti.MaxRingRate;
                var min = ti.MinRingRate;

                // 昨日基数必须大于一定值,避免分母过小导致误报
                var st2 = TraceHourStat.FindAllByStatTimeAndAppIdAndItemId(st.StatTime.AddDays(-1), st.AppId, st.ItemId).FirstOrDefault();
                var yesterday = st2 != null ? st2.Total : (st.Total / st.RingRate);
                if (yesterday > 10)
                {
                    // 根据当前小时已过去时间,折算得到新的环比率
                    var seconds = time.Minute * 60 + time.Second;
                    var rate = st.Total / (yesterday * seconds / 3600);

                    // 满足任意一个条件,都要告警
                    if (max > 0 && rate >= max ||
                        min > 0 && rate <= min)
                    {
                        span?.AppendTag(new { seconds, yesterday, rate });

                        // 一定时间内不要重复报错,除非错误翻倍
                        var error2 = _cache.Get<Double>("alarm:RingRate:" + ti.Id);
                        if (error2 == 0 || rate > error2 * 2 || rate < error2 / 2)
                        {
                            _cache.Set("alarm:RingRate:" + ti.Id, rate, 60 * 60);

                            // 优先本地跟踪项,其次应用,最后是告警分组
                            var webhook = ti.AlarmRobot;
                            if (webhook.IsNullOrEmpty()) webhook = app.AlarmRobot;

                            var group = ti.AlarmGroup;
                            if (group.IsNullOrEmpty()) group = app.Category;

                            var msg = GetMarkdown(app, st, (Int32)yesterday, rate, true);
                            RobotHelper.SendAlarm(group, webhook, "埋点告警", msg);
                        }
                    }
                }
            }
        }
    }

    private String GetMarkdown(AppTracer app, TraceHourStat st, Int32 yesterday, Double rate, Boolean includeTitle)
    {
        var sb = new StringBuilder();
        if (includeTitle) sb.AppendLine($"### [{app}]环比{(rate >= 1 ? "高调用" : "调用量下滑")}告警");
        sb.AppendLine($">**埋点:**<font color=\"blue\">{st.Name}</font>");
        sb.AppendLine($">**时间:**<font color=\"blue\">{st.StatTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**今日:**<font color=\"red\">{st.Total}</font>");
        sb.AppendLine($">**昨日:**<font color=\"red\">{yesterday}</font>");
        sb.AppendLine($">**环比:**<font color=\"red\">{st.RingRate:p2}</font>");
        sb.AppendLine($">**折算环比:**<font color=\"red\">{rate:p2}</font>");

        var url = _setting.WebUrl;
        var traceUrl = "";
        if (!url.IsNullOrEmpty())
        {
            traceUrl = url.EnsureEnd("/") + $"Monitors/traceHourStat?appId={st.AppId}&itemId={st.ItemId}";
        }

        var str = sb.ToString();
        if (str.Length > 1600) str = str[..1600];

        // 构造网址
        if (!traceUrl.IsNullOrEmpty())
        {
            str += Environment.NewLine + $"[更多信息]({traceUrl})";
        }

        return str;
    }
    #endregion

    #region 节点告警
    private void ProcessNode(Node node)
    {
        if (node == null || !node.Enable) return;

        var webhook = RobotHelper.GetAlarm(node.Project, node.Category, node.WebHook);
        if (webhook.IsNullOrEmpty()) return;

        if (node.AlarmCpuRate <= 0 && node.AlarmMemoryRate <= 0 && node.AlarmDiskRate <= 0 && node.AlarmProcesses.IsNullOrEmpty()) return;

        using var span = _tracer?.NewSpan($"alarm:{nameof(Node)}");

        // 最新数据
        var data = NodeData.FindLast(node.ID);
        if (data == null) return;

        // CPU告警
        if (node.AlarmCpuRate > 0)
        {
            var rate = data.CpuRate * 100;
            if (rate >= node.AlarmCpuRate)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Double>("alarm:CpuRate:" + node.ID);
                if (error2 == 0 || rate > error2 * 2)
                {
                    _cache.Set("alarm:CpuRate:" + node.ID, rate, 5 * 60);

                    SendAlarm("cpu", node, data, $"[{node.Name}]CPU告警");
                }
            }
        }

        // 内存告警
        if (node.AlarmMemoryRate > 0 && node.Memory > 0)
        {
            var rate = (node.Memory - data.AvailableMemory) * 100d / node.Memory;
            if (rate >= node.AlarmMemoryRate)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Double>("alarm:MemoryRate:" + node.ID);
                if (error2 == 0 || rate > error2 * 2)
                {
                    _cache.Set("alarm:MemoryRate:" + node.ID, rate, 5 * 60);

                    SendAlarm("memory", node, data, $"[{node.Name}]内存告警");
                }
            }
        }

        // 磁盘告警
        if (node.AlarmDiskRate > 0 && node.TotalSize > 0)
        {
            var rate = (node.TotalSize - data.AvailableFreeSpace) * 100d / node.TotalSize;
            if (rate >= node.AlarmDiskRate)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Double>("alarm:DiskRate:" + node.ID);
                if (error2 == 0 || rate > error2 * 2)
                {
                    _cache.Set("alarm:DiskRate:" + node.ID, rate, 5 * 60);

                    SendAlarm("disk", node, data, $"[{node.Name}]磁盘告警");
                }
            }
        }

        // TCP告警
        if (node.AlarmTcp > 0)
        {
            var tcp = data.TcpConnections;
            if (tcp < data.TcpTimeWait) tcp = data.TcpTimeWait;
            if (tcp < data.TcpCloseWait) tcp = data.TcpCloseWait;
            if (tcp >= node.AlarmTcp)
            {
                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Int32>("alarm:Tcp:" + node.ID);
                if (error2 == 0 || tcp > error2 * 2)
                {
                    _cache.Set("alarm:Tcp:" + node.ID, tcp, 5 * 60);

                    SendAlarm("tcp", node, data, $"[{node.Name}]Tcp告警");
                }
            }
        }

        // 进程告警
        if (!node.AlarmProcesses.IsNullOrEmpty())
        {
            var olt = NodeOnline.FindByNodeId(node.ID);
            if (olt != null && !olt.Processes.IsNullOrEmpty())
            {
                var alarms = node.AlarmProcesses.Split(",", StringSplitOptions.RemoveEmptyEntries);
                var ps = olt.Processes?.Split(",", StringSplitOptions.RemoveEmptyEntries);
                if (alarms != null && alarms.Length > 0 && ps != null && ps.Length > 0)
                {
                    // 查找丢失的进程
                    var ps2 = alarms.Where(e => !ps.Contains(e)).ToList();
                    if (ps2.Count > 0)
                    {
                        // 一定时间内不要重复报错
                        var error2 = _cache.Get<Int32>("alarm:Process:" + node.ID);
                        if (error2 == 0 || ps2.Count > error2)
                        {
                            _cache.Set("alarm:Process:" + node.ID, ps2.Count, 5 * 60);

                            SendAlarm("process", node, data, $"[{node.Name}]进程守护告警", ps2.Join());
                        }
                    }
                }
            }
        }
    }

    private void SendAlarm(String kind, Node node, NodeData data, String title, String info = null)
    {
        var msg = GetMarkdown(kind, node, data, title, info);
        RobotHelper.SendAlarm(node.Category, node.WebHook, title, msg);
    }

    private String GetMarkdown(String kind, Node node, NodeData data, String title, String msg = null)
    {
        var sb = new StringBuilder();
        if (!title.IsNullOrEmpty()) sb.AppendLine($"### {title}");
        sb.AppendLine($">**时间:**<font color=\"blue\">{data.CreateTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**节点:**<font color=\"gray\">{node} / {node.IP}</font>");
        sb.AppendLine($">**分类:**<font color=\"gray\">{node.Category}</font>");
        sb.AppendLine($">**系统:**<font color=\"gray\">{node.OS}</font>");
        sb.AppendLine($">**CPU核心:**<font color=\"gray\">{node.Cpu}</font>");
        sb.AppendLine($">**内存容量:**<font color=\"gray\">{node.Memory:n0}M,可用 {data.AvailableMemory:n0}M</font>");
        sb.AppendLine($">**磁盘容量:**<font color=\"gray\">{node.TotalSize:n0}M,可用 {data.AvailableFreeSpace:n0}M</font>");

        switch (kind)
        {
            case "cpu":
                sb.AppendLine($">**CPU使用率:**<font color=\"red\">{data.CpuRate:p0} >= {node.AlarmCpuRate / 100d:p0}</font>");
                break;
            case "memory":
                var rate1 = 1 - (node.Memory == 0 ? 0 : ((Double)data.AvailableMemory / node.Memory));
                sb.AppendLine($">**内存使用率:**<font color=\"red\">{rate1:p0} >= {node.AlarmMemoryRate / 100d:p0}</font>");
                break;
            case "disk":
                var rate2 = 1 - (node.TotalSize == 0 ? 0 : ((Double)data.AvailableFreeSpace / node.TotalSize));
                sb.AppendLine($">**磁盘使用率:**<font color=\"red\"> {rate2:p0} >= {node.AlarmDiskRate / 100d:p0}</font>");
                break;
            case "tcp":
                if (data.TcpConnections >= node.AlarmTcp)
                    sb.AppendLine($">**TCP连接数:**<font color=\"red\">{data.TcpConnections:n0} >= {node.AlarmTcp:n0}</font>");
                if (data.TcpTimeWait >= node.AlarmTcp)
                    sb.AppendLine($">**TCP主动关闭:**<font color=\"red\">{data.TcpTimeWait:n0} >= {node.AlarmTcp:n0}</font>");
                if (data.TcpCloseWait >= node.AlarmTcp)
                    sb.AppendLine($">**TCP被动关闭:**<font color=\"red\">{data.TcpCloseWait:n0} >= {node.AlarmTcp:n0}</font>");
                break;
            case "process":
                sb.AppendLine($">**进程已退出:**<font color=\"red\">{msg}</font>");
                break;
        }

        var str = sb.ToString();
        if (str.Length > 2000) str = str[..2000];

        // 构造网址
        var url = _setting.WebUrl;
        if (!url.IsNullOrEmpty())
        {
            url = url.EnsureEnd("/") + "Nodes/NodeData?nodeId=" + node.ID;
            str += Environment.NewLine + $"[更多信息]({url})";
        }

        return str;
    }
    #endregion

    #region Redis告警
    private void ProcessRedisNode(RedisNode node)
    {
        if (node == null || !node.Enable) return;

        ProcessRedisData(node);
        ProcessRedisQueue(node);
    }

    private void ProcessRedisData(RedisNode node)
    {
        //if (!RobotHelper.CanAlarm(node.Category, node.WebHook)) return;
        if (node.AlarmMemoryRate <= 0 || node.AlarmConnections == 0) return;

        var webhook = RobotHelper.GetAlarm(node.Project, node.Category, node.WebHook);
        if (webhook.IsNullOrEmpty()) return;

        // 最新数据
        var data = RedisData.FindLast(node.Id);
        if (data == null) return;

        using var span = _tracer?.NewSpan($"alarm:{nameof(RedisNode)}");

        var actions = new List<Action<StringBuilder>>();

        // 内存告警
        var rate = data.UsedMemory * 100d / node.MaxMemory;
        if (rate >= node.AlarmMemoryRate)
        {
            // 一定时间内不要重复报错,除非错误翻倍
            var error2 = _cache.Get<Double>("alarm:RedisMemory:" + node.Id);
            if (error2 == 0 || rate > error2 * 2)
            {
                _cache.Set("alarm:RedisMemory:" + node.Id, rate, 5 * 60);

                actions.Add(sb => sb.AppendLine($">**内存告警:**<font color=\"red\">{rate / 100:p0} >= {node.AlarmMemoryRate / 100:p0}</font>"));
            }
        }

        // 连接数告警
        var cs = data.ConnectedClients;
        if (node.AlarmConnections > 0 && cs >= node.AlarmConnections)
        {
            // 一定时间内不要重复报错,除非错误翻倍
            var error2 = _cache.Get<Int32>("alarm:RedisConnections:" + node.Id);
            if (error2 == 0 || cs > error2 * 2)
            {
                _cache.Set("alarm:RedisConnections:" + node.Id, cs, 5 * 60);

                actions.Add(sb => sb.AppendLine($">**连接数告警:**<font color=\"red\">{cs:n0} >= {node.AlarmConnections:n0}</font>"));
            }
        }

        // 速度告警
        var speed = data.Speed;
        if (node.AlarmSpeed > 0 && speed >= node.AlarmSpeed)
        {
            // 一定时间内不要重复报错,除非错误翻倍
            var error2 = _cache.Get<Int32>("alarm:RedisSpeed:" + node.Id);
            if (error2 == 0 || speed > error2 * 2)
            {
                _cache.Set("alarm:RedisSpeed:" + node.Id, speed, 5 * 60);

                actions.Add(sb => sb.AppendLine($">**速度告警:**<font color=\"red\">{speed:n0} >= {node.AlarmSpeed:n0}</font>"));
            }
        }

        // 入流量告警
        var input = data.InputKbps;
        if (node.AlarmInputKbps > 0 && input >= node.AlarmInputKbps)
        {
            // 一定时间内不要重复报错,除非错误翻倍
            var error2 = _cache.Get<Double>("alarm:RedisInputKbps:" + node.Id);
            if (error2 == 0 || input > error2 * 2)
            {
                _cache.Set("alarm:RedisInputKbps:" + node.Id, input, 5 * 60);

                actions.Add(sb => sb.AppendLine($">**入流量告警:**<font color=\"red\">{input:n0} >= {node.AlarmInputKbps:n0}</font>"));
            }
        }

        // 出流量告警
        var output = data.OutputKbps;
        if (node.AlarmOutputKbps > 0 && output >= node.AlarmOutputKbps)
        {
            // 一定时间内不要重复报错,除非错误翻倍
            var error2 = _cache.Get<Double>("alarm:RedisOutputKbps:" + node.Id);
            if (error2 == 0 || output > error2 * 2)
            {
                _cache.Set("alarm:RedisOutputKbps:" + node.Id, output, 5 * 60);

                actions.Add(sb => sb.AppendLine($">**出流量告警:**<font color=\"red\">{output:n0} >= {node.AlarmOutputKbps:n0}</font>"));
            }
        }

        if (actions.Count > 0)
        {
            var msg = GetMarkdown(node, data, "Redis告警", actions);
            RobotHelper.SendAlarm(node.Category, node.WebHook, "Redis告警", msg);
        }
    }

    private String GetMarkdown(RedisNode node, RedisData data, String title, IList<Action<StringBuilder>> actions)
    {
        var sb = new StringBuilder();
        if (!title.IsNullOrEmpty()) sb.AppendLine($"### [{node}]{title}");
        sb.AppendLine($">**时间:**<font color=\"blue\">{data.CreateTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**分类:**<font color=\"gray\">{node.Category}</font>");
        sb.AppendLine($">**版本:**<font color=\"gray\">{node.Version}</font>");
        sb.AppendLine($">**已用内存:**<font color=\"gray\">{data.UsedMemory:n0}</font>");
        sb.AppendLine($">**内存容量:**<font color=\"gray\">{node.MaxMemory:n0}</font>");
        sb.AppendLine($">**连接数:**<font color=\"gray\">{data.ConnectedClients:n0}</font>");
        sb.AppendLine($">**服务器:**<font color=\"gray\">{node.Server}</font>");

        //var rate = node.MaxMemory == 0 ? 0 : (data.UsedMemory * 100 / node.MaxMemory);
        //if (rate >= node.AlarmMemoryRate && node.AlarmMemoryRate > 0)
        //{
        //    sb.AppendLine($">**内存告警:**<font color=\"info\">{data.UsedMemory}/{node.MaxMemory} >= {node.AlarmMemoryRate:p0}</font>");
        //}

        //if (node.AlarmConnections > 0 && data.ConnectedClients >= node.AlarmConnections)
        //{
        //    sb.AppendLine($">**连接告警:**<font color=\"info\">{data.ConnectedClients:n0} >= {node.AlarmConnections:n0}</font>");
        //}
        foreach (var item in actions)
        {
            item(sb);
        }

        var str = sb.ToString();
        if (str.Length > 2000) str = str[..2000];

        // 构造网址
        var url = _setting.WebUrl;
        if (!url.IsNullOrEmpty())
        {
            url = url.EnsureEnd("/") + "Nodes/RedisNode?id=" + node.Id;
            str += Environment.NewLine + $"[更多信息]({url})";
        }

        return str;
    }
    #endregion

    #region Redis队列告警
    private void ProcessRedisQueue(RedisNode node)
    {
        using var span = _tracer?.NewSpan($"alarm:{nameof(RedisMessageQueue)}");

        // 所有队列
        var list = RedisMessageQueue.FindAllByRedisId(node.Id);
        foreach (var queue in list)
        {
            var groupName = !queue.Category.IsNullOrEmpty() ? queue.Category : node.Category;
            var webhook = !queue.WebHook.IsNullOrEmpty() ? queue.WebHook : node.WebHook;

            // 判断告警
            if (queue.Enable && queue.MaxMessages > 0 && queue.Messages >= queue.MaxMessages)
            {
                webhook = RobotHelper.GetAlarm(node.Project, groupName, webhook);
                if (webhook.IsNullOrEmpty()) continue;

                // 一定时间内不要重复报错,除非错误翻倍
                var error2 = _cache.Get<Int32>("alarm:RedisMessageQueue:" + queue.Id);
                if (error2 == 0 || queue.Messages > error2 * 2)
                {
                    _cache.Set("alarm:RedisMessageQueue:" + queue.Id, queue.Messages, 5 * 60);

                    var msg = GetMarkdown(node, queue, true);
                    RobotHelper.SendAlarm(groupName, webhook, "消息队列告警", msg);
                }
            }
        }
    }

    private String GetMarkdown(RedisNode node, RedisMessageQueue queue, Boolean includeTitle)
    {
        var sb = new StringBuilder();
        if (includeTitle) sb.AppendLine($"### [{queue.Name}/{node}]消息队列告警");
        sb.AppendLine($">**时间:**<font color=\"blue\">{queue.UpdateTime:yyyy-MM-dd HH:mm:ss}</font>");
        sb.AppendLine($">**主题:**<font color=\"gray\">{queue.Topic}</font>");
        sb.AppendLine($">**积压:**<font color=\"red\">{queue.Messages:n0} > {queue.MaxMessages:n0}</font>");
        sb.AppendLine($">**消费者:**<font color=\"green\">{queue.Consumers}</font>");
        sb.AppendLine($">**总消费:**<font color=\"green\">{queue.Total:n0}</font>");
        sb.AppendLine($">**服务器:**<font color=\"gray\">{node.Server}</font>");

        var str = sb.ToString();
        if (str.Length > 2000) str = str[..2000];

        // 构造网址
        var url = _setting.WebUrl;
        if (!url.IsNullOrEmpty())
        {
            url = url.EnsureEnd("/") + "Nodes/RedisMessageQueue?redisId=" + queue.RedisId + "&q=" + queue.Name;
            str += Environment.NewLine + $"[更多信息]({url})";
        }

        return str;
    }
    #endregion
}