v7.3.2018.0614   重构高性能资源池,减少GC压力,增加线程池,让异步任务得到平等竞争CPU的机会
大石头 authored at 2018-06-14 17:56:44
14.49 KiB
X
using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace XTemplate.Templating
{
    /// <summary>模版分析器</summary>
    internal static class TemplateParser
    {
        #region 分析模版的正则表达式
        private static MatchEvaluator escapeReplacingEvaluator;
        private static Regex templateParsingRegex;
        private const String ma = @"(?<=([^\\]|^)(\\\\)*)";
        private const String startTag = @"(?<=([^\\]|^)(\\\\)*)<#";
        private const String endTag = @"(?<=[^\\](\\\\)*)#>";

        static TemplateParser()
        {
            escapeReplacingEvaluator = match =>
            {
                if (match.Success && match.Value != null)
                {
                    var length = (Int32)Math.Floor((Double)match.Value.Length / 2.0);
                    return match.Value.Substring(0, length);
                }
                return String.Empty;
            };

            var sb = new StringBuilder();
            sb.AppendFormat(@"(?<text>^(\\\\)+)(?=<#)|");
            sb.AppendFormat(@"{0}@(?<directive>.*?){1}|", startTag, endTag);
            sb.AppendFormat(@"{0}\!(?<member>.*?){1}|", startTag, endTag);
            sb.AppendFormat(@"{0}=(?<expression>.*?){1}|", startTag, endTag);
            sb.AppendFormat(@"{0}(?<statement>.*?){1}|", startTag, endTag);
            sb.AppendFormat(@"(?<text>.+?)(?=((?<=[^\\](\\\\)*)<#))|");
            sb.AppendFormat("(?<text>.+)(?=$)");
            //sb.AppendLine(@"(?<text>^(\\\\)+)(?=<\#)|");
            //sb.AppendLine(@"(?<=([^\\]|^)(\\\\)*)<\#@(?<directive>.*?)(?<=[^\\](\\\\)*)\#>|");
            //sb.AppendLine(@"(?<=([^\\]|^)(\\\\)*)<\#\!(?<member>.*?)(?<=[^\\](\\\\)*)\#>|");
            //sb.AppendLine(@"(?<=([^\\]|^)(\\\\)*)<\#=(?<expression>.*?)(?<=[^\\](\\\\)*)\#>|");
            //sb.AppendLine(@"(?<=([^\\]|^)(\\\\)*)<\#(?<statement>.*?)(?<=[^\\](\\\\)*)\#>|");
            //sb.AppendLine(@"(?<text>.+?)(?=((?<=[^\\](\\\\)*)<\#))|");
            //sb.AppendLine("(?<text>.+)(?=$)");
            templateParsingRegex = new Regex(sb.ToString(), RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        }
        #endregion

        #region 分析模版
        private static Regex unescapedTagFindingRegex = new Regex(@"(^|[^\\])(\\\\)*(<\#|\#>)", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        /// <summary>把模版分割成块</summary>
        /// <param name="name">名称</param>
        /// <param name="content"></param>
        /// <returns></returns>
        public static List<Block> Parse(String name, String content)
        {
            //if (content == null) throw new ArgumentNullException("content");

            var blocks = new List<Block>();
            if (String.IsNullOrEmpty(content)) return blocks;

            foreach (Match match in templateParsingRegex.Matches(content))
            {
                var item = new Block();
                Group group = null;
                if ((group = match.Groups["text"]).Success)
                    item.Type = BlockType.Text;
                else if ((group = match.Groups["directive"]).Success)
                    item.Type = BlockType.Directive;
                else if ((group = match.Groups["member"]).Success)
                    item.Type = BlockType.Member;
                else if ((group = match.Groups["expression"]).Success)
                    item.Type = BlockType.Expression;
                else if ((group = match.Groups["statement"]).Success)
                    item.Type = BlockType.Statement;

                if (group != null && group.Success)
                {
                    item.Text = group.Value;
                    item.Name = name;
                    blocks.Add(item);
                }
            }
            InsertPosition(blocks);

            foreach (var block in blocks)
            {
                if (unescapedTagFindingRegex.Match(block.Text).Success) throw new TemplateException(block, "不可识别的标记!可能有未编码的字符,比如\\<#。");
            }

            StripEscapeCharacters(blocks);
            return blocks;
        }

        private static Regex newlineFindingRegex = new Regex(Environment.NewLine, RegexOptions.Singleline | RegexOptions.Compiled);
        /// <summary>插入位置信息</summary>
        /// <param name="blocks"></param>
        private static void InsertPosition(List<Block> blocks)
        {
            var i = 1;
            var j = 1;
            foreach (var block in blocks)
            {
                // 类成员以<#!开始,指令以<#@开始,表达式以<#=开始,所以它们的列数加3
                if (block.Type == BlockType.Member ||
                    block.Type == BlockType.Directive ||
                    block.Type == BlockType.Expression)
                {
                    j += 3;
                }
                else if (block.Type == BlockType.Statement)
                {
                    // 代码块以<#开始
                    j += 2;
                }
                block.StartLine = i;
                block.StartColumn = j;

                // 计算换行
                var matchs = newlineFindingRegex.Matches(block.Text);
                i += matchs.Count;
                if (matchs.Count > 0)
                {
                    // 有换行的存在,从新计算列数,以最后一行的最后列数作为整个块的最后列数
                    j = ((block.Text.Length - matchs[matchs.Count - 1].Index) - Environment.NewLine.Length) + 1;
                }
                else
                {
                    j += block.Text.Length;
                }
                block.EndLine = i;
                block.EndColumn = j;

                // 非占位符块时,列数加2,因为它们都以#>结尾
                if (block.Type != BlockType.Text) j += 2;
            }
        }

        private static Regex escapeFindingRegex = new Regex(@"\\+(?=<\\#)|\\+(?=\\#>)", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        private static Regex eolEscapeFindingRegex = new Regex(@"\\+(?=$)", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        /// <summary>对编码的字符进行解码</summary>
        /// <param name="blocks"></param>
        private static void StripEscapeCharacters(List<Block> blocks)
        {
            for (var i = 0; i < blocks.Count; i++)
            {
                var block = blocks[i];
                block.Text = escapeFindingRegex.Replace(block.Text, escapeReplacingEvaluator);
                if (i != (blocks.Count - 1))
                {
                    block.Text = eolEscapeFindingRegex.Replace(block.Text, escapeReplacingEvaluator);
                }
            }
        }

        ///// <summary>
        ///// 检查块顺序是否有问题
        ///// </summary>
        ///// <param name="blocks"></param>
        //private static void CheckBlockSequence(List<Block> blocks)
        //{
        //    Boolean isMemberFeature = false;
        //    foreach (Block block in blocks)
        //    {
        //        if (!isMemberFeature)
        //        {
        //            if (block.Type == BlockType.Member) isMemberFeature = true;
        //        }
        //        else if ((block.Type == BlockType.Directive) || (block.Type == BlockType.Statement))
        //        {
        //            throw new TemplateException(block, "类成员定义后不可以有指令和语句!");
        //        }
        //    }
        //    if (isMemberFeature)
        //    {
        //        Block block2 = blocks[blocks.Count - 1];
        //        if ((block2.Type != BlockType.Member) && ((block2.Type != BlockType.Text) || !allNewlineRegex.Match(block2.Text).Success))
        //        {
        //            throw new TemplateException(block2, "类成员定义后只可以有文本或全是换行的代码语句!");
        //        }
        //    }
        //}
        #endregion

        #region 分析指令
        private static Regex directiveEscapeFindingRegex = new Regex("\\\\+(?=\")|\\\\+(?=$)", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        private static Regex directiveParsingRegex = new Regex("(?<pname>\\S+?)\\s*=\\s*\"(?<pvalue>.*?)(?<=[^\\\\](\\\\\\\\)*)\"|(?<name>\\S+)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        /// <summary>分析指令块</summary>
        /// <param name="block"></param>
        /// <returns></returns>
        public static Directive ParseDirectiveBlock(Block block)
        {
            if (block == null) throw new ArgumentNullException("block");

            if (!ValidateDirectiveString(block)) throw new TemplateException(block, "指令格式错误!");

            var matchs = directiveParsingRegex.Matches(block.Text);
            String directiveName = null;
            var parameters = new Dictionary<String, String>(StringComparer.OrdinalIgnoreCase);
            foreach (Match match in matchs)
            {
                Group group;
                if ((group = match.Groups["name"]).Success)
                {
                    directiveName = group.Value;
                }
                else
                {
                    String key = null;
                    String valueString = null;
                    if ((group = match.Groups["pname"]).Success) key = group.Value;
                    if ((group = match.Groups["pvalue"]).Success) valueString = group.Value;

                    if ((key != null) && (valueString != null))
                    {
                        if (parameters.ContainsKey(key)) throw new TemplateException(block, String.Format("指令中已存在名为[{0}]的参数!", key));

                        valueString = directiveEscapeFindingRegex.Replace(valueString, escapeReplacingEvaluator);
                        parameters.Add(key, valueString);
                    }
                }
            }
            if (directiveName != null) return new Directive(directiveName, parameters, block);

            return null;
        }

        private static Regex nameValidatingRegex = new Regex(@"^\s*[\w\.]+\s+", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        private static Regex paramValueValidatingRegex = new Regex("[\\w\\.]+\\s*=\\s*\"(.*?)(?<=[^\\\\](\\\\\\\\)*)\"\\s*", RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.ExplicitCapture);
        /// <summary>验证指令字符串格式是否正确</summary>
        /// <param name="block"></param>
        /// <returns></returns>
        private static Boolean ValidateDirectiveString(Block block)
        {
            var match = nameValidatingRegex.Match(block.Text);
            if (!match.Success) return false;

            var length = match.Length;
            var matchs = paramValueValidatingRegex.Matches(block.Text);
            if (matchs.Count == 0) return false;

            foreach (Match match2 in matchs)
            {
                if (match2.Index != length) return false;

                length += match2.Length;
            }
            if (length != block.Text.Length) return false;

            return true;
        }
        #endregion

        #region 优化处理
        private static Regex allNewlineRegex = new Regex(@"^\s*$", RegexOptions.Singleline | RegexOptions.Compiled);
        private static Regex newlineAtLineStartRegex = new Regex(@"^[ \t]*((\r\n)|\n)", RegexOptions.Singleline | RegexOptions.Compiled);
        private static Regex newlineAtLineEndRegex = new Regex(@"(?=(\r\n)|\n)[ \t]*$", RegexOptions.Singleline | RegexOptions.Compiled);
        /// <summary>删除多余的换行</summary>
        /// <remarks>
        /// 本方法的目的是为了让模版的编写更加随意灵活,有以下功能:
        /// 1,文本后面如果是语句代码段或者类成员代码段,允许忽略代码段前的一个换行和空白符 (?=(\r\n)|\n)[ \t]*$
        /// 2,文本前面如果是语句代码段或者类成员代码段,允许忽略代码段后面的空白以及一个换行符 ^[ \t]*((\r\n)|\n)
        /// 3,语句代码段和类成员代码段,允许忽略之间的空白和换行 ^\s*$
        /// </remarks>
        /// <param name="blocks"></param>
        internal static void StripExtraNewlines(List<Block> blocks)
        {
            for (var i = 0; i < blocks.Count; i++)
            {
                var block = blocks[i];
                if (block.Type != BlockType.Text) continue;

                if (i > 0)
                {
                    var last = blocks[i - 1];
                    if (last.Type != BlockType.Expression && last.Type != BlockType.Text)
                    {
                        // 占位符块,不是第一块,前一块又不是表达式和占位符时,忽略一个换行
                        block.Text = newlineAtLineStartRegex.Replace(block.Text, String.Empty);
                    }
                    if (last.Type == BlockType.Member && (i == blocks.Count - 1 || blocks[i + 1].Type == BlockType.Member))
                    {
                        // 占位符块,不是第一块,前一块和后一块都是类结构时,忽略由换行组成的占位符
                        block.Text = allNewlineRegex.Replace(block.Text, String.Empty);
                    }
                }
                if (i < blocks.Count - 1)
                {
                    var next = blocks[i + 1];
                    if (next.Type != BlockType.Expression && next.Type != BlockType.Text)
                    {
                        // 占位符块,不是最后一块,下一块又不是表达式和占位符时,忽略一个换行
                        block.Text = newlineAtLineEndRegex.Replace(block.Text, String.Empty);
                    }
                }
            }
            Predicate<Block> match = delegate (Block b)
            {
                // 类成员代码块可能需要空的结束符
                if (b.Type == BlockType.Member) return false;
                return String.IsNullOrEmpty(b.Text);
            };
            // 删除空块
            blocks.RemoveAll(match);
        }
        #endregion
    }
}