v9.8.2018.0605   由DataReader直接映射实体列表,以支持netstandard的MySql和SQLite,且提升性能
大石头 编写于 2018-06-05 00:45:23
X
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

namespace NewLife.Web
{
    /// <summary>超链接</summary>
    public class Link
    {
        #region 属性
        /// <summary>名称</summary>
        public String Name { get; set; }

        /// <summary>超链接</summary>
        public String Url { get; set; }

        /// <summary>原始超链接</summary>
        public String RawUrl { get; set; }

        /// <summary>标题</summary>
        public String Title { get; set; }

        /// <summary>版本</summary>
        public Version Version { get; set; }

        /// <summary>时间</summary>
        public DateTime Time { get; set; }

        /// <summary>原始Html</summary>
        public String Html { get; set; }
        #endregion

        #region 方法
        static Regex _regA = new Regex("<a(?<其它1>[^>]*) href=?\"(?<链接>[^>\"]*)?\"(?<其它2>[^>]*)>(?<名称>[^<]*)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);
        static Regex _regTitle = new Regex("title=(\"?)(?<标题>[^ \']*?)\\1", RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.Compiled);

        /// <summary>分析HTML中的链接</summary>
        /// <param name="html">Html文本</param>
        /// <param name="baseurl">基础Url,用于生成超链接的完整Url</param>
        /// <param name="filter">用于基础过滤的过滤器</param>
        /// <returns></returns>
        public static Link[] Parse(String html, String baseurl = null, Func<Link, Boolean> filter = null)
        {
            if (baseurl.StartsWithIgnoreCase("ftp://")) return ParseFTP(html, baseurl, filter);

            // 分析所有链接
            var list = new List<Link>();
            var buri = new Uri(baseurl);
            foreach (Match item in _regA.Matches(html))
            {
                var link = new Link
                {
                    Html = item.Value,
                    Name = item.Groups["名称"].Value.Trim(),
                    Url = item.Groups["链接"].Value.Trim()
                };
                link.RawUrl = link.Url;

                // 过滤器
                if (filter != null && !filter(link)) continue;

                link.Url = link.Url.TrimStart("#");
                if (String.IsNullOrEmpty(link.Url)) continue;

                if (link.Url.StartsWithIgnoreCase("javascript:")) continue;

                // 分析title
                var txt = item.Groups["其它1"].Value.Trim();
                if (txt.IsNullOrWhiteSpace() || !_regTitle.IsMatch(txt)) txt = item.Groups["其它2"].Value.Trim();
                var mc = _regTitle.Match(txt);
                if (mc.Success)
                {
                    link.Title = mc.Groups["标题"].Value.Trim();
                }

                // 完善下载地址
                var uri = new Uri(buri, link.RawUrl);
                link.Url = uri.ToString();

                // 从github.com下载需要处理Url
                if (link.Url.Contains("github.com") && link.Url.Contains("/blob/")) link.Url = link.Url.Replace("/blob/", "/raw/");

                // 分割名称,计算结尾的时间 yyyyMMddHHmmss
                link.ParseTime();

                // 分割版本,_v1.0.0.0
                link.ParseVersion();

                list.Add(link);
            }

            return list.ToArray();
        }

        private static Link[] ParseFTP(String html, String url, Func<Link, Boolean> filter = null)
        {
            var list = new List<Link>();

            var ns = html.Split(Environment.NewLine);
            if (ns.Length == 0) return list.ToArray();

            // 如果由很多段组成,可能是unix格式
            var unix = ns[0].Split(" ").Length >= 6;
            var buri = new Uri(url);
            foreach (var item in ns)
            {
                var link = new Link
                {
                    Name = item
                };
                //link.Name = Path.GetFileNameWithoutExtension(item);
                //link.Url = new Uri(buri, item).ToString();
                //link.RawUrl = link.Url;

                // 过滤器
                if (filter != null && !filter(link)) continue;

                // 分析title
                link.Title = Path.GetFileNameWithoutExtension(item);

                // 完善下载地址
                var uri = new Uri(buri, item);
                link.Url = uri.ToString();

                // 分割名称,计算结尾的时间 yyyyMMddHHmmss
                var idx = link.ParseTime();
                if (idx > 0) link.Title = link.Title.Substring(0, idx);

                // 分割版本,_v1.0.0.0
                idx = link.ParseVersion();
                if (idx > 0) link.Title = link.Title.Substring(0, idx);

                list.Add(link);
            }

            return list.ToArray();
        }

        Int32 ParseTime()
        {
            // 分割名称,计算结尾的时间 yyyyMMddHHmmss
            var p = Name.LastIndexOf("_");
            if (p <= 0) return -1;

            var ts = Name.Substring(p + 1);
            if (ts.StartsWith("20") && ts.Length >= 4 + 2 + 2 + 2 + 2 + 2)
            {
                Time = new DateTime(
                    ts.Substring(0, 4).ToInt(),
                    ts.Substring(4, 2).ToInt(),
                    ts.Substring(6, 2).ToInt(),
                    ts.Substring(8, 2).ToInt(),
                    ts.Substring(10, 2).ToInt(),
                    ts.Substring(12, 2).ToInt());
            }

            return p;
        }

        Int32 ParseVersion()
        {
            // 分割版本,_v1.0.0.0
            var vs = Name.CutStart("_v", "_V", " v", " V");
            if (vs == Name)
            {
                // 也可能没有v,但是这是必须有圆点
                if (Name.Contains(".") && (Name.Contains(" ") || Name.Contains("_")))
                {
                    vs = Name.CutStart(" ", "_");
                    if (!Name.Contains(".")) return -1;
                }
            }
            if (vs == Name) return -1;

            // 返回位置
            var p = Name.LastIndexOf(vs) - 2;

            // 尾部截断
            vs = vs.CutEnd(" ", "_", "-");
            // 有可能只有_v1,而没有子版本
            var ss = vs.SplitAsInt(".");
            if (ss.Length > 0)
            {
                switch (ss.Length)
                {
                    case 1:
                        Version = new Version(ss[0], 0);
                        break;
                    case 2:
                        Version = new Version(ss[0], ss[1]);
                        break;
                    case 3:
                        Version = new Version(ss[0], ss[1], ss[2]);
                        break;
                    case 4:
                        Version = new Version(ss[0], ss[1], ss[2], ss[3]);
                        break;
                    default:
                        break;
                }
            }

            // 返回位置
            return p;
        }

        /// <summary>已重载。</summary>
        /// <returns></returns>
        public override String ToString()
        {
            //return base.ToString();
            //return "{0} {1} {2} {3}".F(Name, RawUrl, Version, Time);
            var sb = new StringBuilder();
            sb.AppendFormat("{0} {1}", Name, RawUrl);
            if (Version != null) sb.AppendFormat(" v{0}", Version);
            if (Time > DateTime.MinValue) sb.AppendFormat(" {0}", Time.ToFullString());

            return sb.ToString();
        }
        #endregion
    }
}