From b8c23087d864d5f8ffcbf4f379bd0fb86b454d46 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Philippe=20Bruy=C3=A8re?= Date: Thu, 3 Jul 2025 22:47:27 +0200 Subject: [PATCH] xml parsing debug --- CrowEditBase/src/Compiler/SourceDocument.cs | 2 +- CrowEditBase/src/Compiler/SyntaxAnalyser.cs | 13 ++--- .../src/Parsing/IML/ImlDocument.cs | 2 +- .../src/Parsing/IML/ImlSyntaxAnalyser.cs | 2 +- .../src/Parsing/Styling/StyleDocument.cs | 2 +- .../Parsing/Styling/StyleSyntaxAnalyser.cs | 7 ++- .../CEEbnfPlugin/src/Parsing/EbnfDocument.cs | 2 +- .../src/Parsing/EbnfSyntaxAnalyser.cs | 7 ++- plugins/CERoslynPlugin/src/CSDocument.cs | 31 +----------- plugins/CERoslynPlugin/src/CSTokenizer.cs | 4 +- .../src/Parsing/CSSyntaxAnalyser.cs | 11 ++--- .../CEXmlPlugin/src/Parsing/XmlDocument.cs | 2 +- .../src/Parsing/XmlSyntaxAnalyser.cs | 48 +++++++++---------- .../CEXmlPlugin/src/Parsing/XmlSyntaxNodes.cs | 3 +- .../CEXmlPlugin/src/Parsing/XmlTokenType.cs | 28 +++++------ 15 files changed, 66 insertions(+), 98 deletions(-) diff --git a/CrowEditBase/src/Compiler/SourceDocument.cs b/CrowEditBase/src/Compiler/SourceDocument.cs index 1c5f98f..cee7da6 100644 --- a/CrowEditBase/src/Compiler/SourceDocument.cs +++ b/CrowEditBase/src/Compiler/SourceDocument.cs @@ -144,7 +144,6 @@ namespace CrowEditBase } public virtual string GetTokenTypeString (TokenType tokenType) => tokenType.ToString(); //protected abstract Tokenizer CreateTokenizer (); - protected abstract SyntaxAnalyser CreateSyntaxAnalyser (); public abstract IList GetSuggestions (int absoluteTextPos, int currentTokenIndex, SyntaxNode currentNode, CharLocation loc); protected virtual async void parse () { if (backgroundCompilationTask != null && !backgroundCompilationTask.IsCompleted) { @@ -159,6 +158,7 @@ namespace CrowEditBase //CrowEditBase.App.Log (LogType.Low, $"Syntax Analysis done in {sw.ElapsedMilliseconds}(ms) {sw.ElapsedTicks}(ticks)"); } + protected abstract SyntaxAnalyser CreateSyntaxAnalyser (); async void parseAssync(CancellationToken cancel) { SyntaxAnalyser syntaxAnalyser = CreateSyntaxAnalyser (); diff --git a/CrowEditBase/src/Compiler/SyntaxAnalyser.cs b/CrowEditBase/src/Compiler/SyntaxAnalyser.cs index 33392d5..9496f9f 100644 --- a/CrowEditBase/src/Compiler/SyntaxAnalyser.cs +++ b/CrowEditBase/src/Compiler/SyntaxAnalyser.cs @@ -3,21 +3,22 @@ // This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT) using System; using System.Collections.Generic; -using System.Linq; using System.Threading; using System.Threading.Tasks; -using Crow.Text; namespace CrowEditBase { public abstract class SyntaxAnalyser { - protected SourceDocument document; + #region CTOR + public SyntaxAnalyser (ReadOnlyTextBuffer source) { + this.source = source; + } + #endregion + + protected ReadOnlyTextBuffer source; protected SyntaxRootNode Root; protected CancellationToken cancel; public IEnumerable Exceptions => null;// Root?.GetAllExceptions(); - public SyntaxAnalyser (SourceDocument document) { - this.document = document; - } public abstract Task Process (CancellationToken cancel = default); #region Token handling diff --git a/plugins/CECrowPlugin/src/Parsing/IML/ImlDocument.cs b/plugins/CECrowPlugin/src/Parsing/IML/ImlDocument.cs index 1c2c9d8..f4c42a3 100644 --- a/plugins/CECrowPlugin/src/Parsing/IML/ImlDocument.cs +++ b/plugins/CECrowPlugin/src/Parsing/IML/ImlDocument.cs @@ -28,7 +28,7 @@ namespace CECrowPlugin if (msbp.IsCrowProject) }*/ } - protected override SyntaxAnalyser CreateSyntaxAnalyser() => new ImlSyntaxAnalyser (this); + protected override SyntaxAnalyser CreateSyntaxAnalyser() => new ImlSyntaxAnalyser (ImmutableBufferCopy); public override string GetTokenTypeString (TokenType tokenType) => ((ImlTokenType)tokenType).ToString(); diff --git a/plugins/CECrowPlugin/src/Parsing/IML/ImlSyntaxAnalyser.cs b/plugins/CECrowPlugin/src/Parsing/IML/ImlSyntaxAnalyser.cs index 196f0bf..5c1fb3d 100644 --- a/plugins/CECrowPlugin/src/Parsing/IML/ImlSyntaxAnalyser.cs +++ b/plugins/CECrowPlugin/src/Parsing/IML/ImlSyntaxAnalyser.cs @@ -12,7 +12,7 @@ using System.Threading; namespace CECrowPlugin { public class ImlSyntaxAnalyser : XmlSyntaxAnalyser { - public ImlSyntaxAnalyser (ImlDocument document) : base (document) {} + public ImlSyntaxAnalyser (ReadOnlyTextBuffer document) : base (document) {} public override async Task Process (CancellationToken cancel = default) { diff --git a/plugins/CECrowPlugin/src/Parsing/Styling/StyleDocument.cs b/plugins/CECrowPlugin/src/Parsing/Styling/StyleDocument.cs index 31a3b8a..98c9553 100644 --- a/plugins/CECrowPlugin/src/Parsing/Styling/StyleDocument.cs +++ b/plugins/CECrowPlugin/src/Parsing/Styling/StyleDocument.cs @@ -22,7 +22,7 @@ namespace CECrowPlugin.Style }*/ } - protected override SyntaxAnalyser CreateSyntaxAnalyser() => new StyleSyntaxAnalyser (this); + protected override SyntaxAnalyser CreateSyntaxAnalyser() => new StyleSyntaxAnalyser (ImmutableBufferCopy); public override IList GetSuggestions (int absoluteTextPos, int currentTokenIndex, SyntaxNode CurrentNode, CharLocation loc) { Token currentToken = GetTokenByIndex(currentTokenIndex); diff --git a/plugins/CECrowPlugin/src/Parsing/Styling/StyleSyntaxAnalyser.cs b/plugins/CECrowPlugin/src/Parsing/Styling/StyleSyntaxAnalyser.cs index 24f15d5..2fce5e2 100644 --- a/plugins/CECrowPlugin/src/Parsing/Styling/StyleSyntaxAnalyser.cs +++ b/plugins/CECrowPlugin/src/Parsing/Styling/StyleSyntaxAnalyser.cs @@ -22,7 +22,7 @@ namespace CECrowPlugin.Style public static bool Is(this Token tok, StyleTokenType type) => (StyleTokenType)tok.Type == type; } public class StyleSyntaxAnalyser : SyntaxAnalyser { - public StyleSyntaxAnalyser (StyleDocument document) : base (document) {} + public StyleSyntaxAnalyser (ReadOnlyTextBuffer document) : base (document) {} bool skipTriviaAndComments(MultiNodeSyntax currentNode) { while (tryPeekFlag(out Token token, TokenType.Trivia)) { @@ -153,12 +153,11 @@ namespace CECrowPlugin.Style public override async Task Process (CancellationToken cancel = default) { Tokenizer tokenizer = new StyleTokenizer(); - ReadOnlyTextBuffer buff = document.ImmutableBufferCopy; - Token[] tokens = tokenizer.Tokenize(buff.Source.Span); + Token[] tokens = tokenizer.Tokenize(source.Source.Span); tokIdx = 0; this.cancel = cancel; - Root = new StyleRootSyntax (buff, tokens); + Root = new StyleRootSyntax (source, tokens); while (!EOF) { if (cancel.IsCancellationRequested) break; diff --git a/plugins/CEEbnfPlugin/src/Parsing/EbnfDocument.cs b/plugins/CEEbnfPlugin/src/Parsing/EbnfDocument.cs index cd1da2f..010a6ff 100644 --- a/plugins/CEEbnfPlugin/src/Parsing/EbnfDocument.cs +++ b/plugins/CEEbnfPlugin/src/Parsing/EbnfDocument.cs @@ -15,7 +15,7 @@ namespace CrowEdit.Ebnf public EbnfDocument (string fullPath, string editorPath) : base (fullPath, editorPath) { } - protected override SyntaxAnalyser CreateSyntaxAnalyser() => new EbnfSyntaxAnalyser (this); + protected override SyntaxAnalyser CreateSyntaxAnalyser() => new EbnfSyntaxAnalyser (ImmutableBufferCopy); public override string GetTokenTypeString (TokenType tokenType) => ((EbnfTokenType)tokenType).ToString(); public override IList GetSuggestions (int absoluteTextPos, int currentTokenIndex, SyntaxNode CurrentNode, CharLocation loc) { diff --git a/plugins/CEEbnfPlugin/src/Parsing/EbnfSyntaxAnalyser.cs b/plugins/CEEbnfPlugin/src/Parsing/EbnfSyntaxAnalyser.cs index 26d3c66..1d66375 100644 --- a/plugins/CEEbnfPlugin/src/Parsing/EbnfSyntaxAnalyser.cs +++ b/plugins/CEEbnfPlugin/src/Parsing/EbnfSyntaxAnalyser.cs @@ -22,7 +22,7 @@ namespace CrowEdit.Ebnf } public class EbnfSyntaxAnalyser : SyntaxAnalyser { - public EbnfSyntaxAnalyser (EbnfDocument document) : base (document) {} + public EbnfSyntaxAnalyser (ReadOnlyTextBuffer document) : base (document) {} bool skipTriviaAndComments(MultiNodeSyntax currentNode) { @@ -104,13 +104,12 @@ namespace CrowEdit.Ebnf public override async Task Process(CancellationToken cancel = default) { Tokenizer tokenizer = new EbnfTokenizer(); - ReadOnlyTextBuffer buff = document.ImmutableBufferCopy; - Token[] tokens = tokenizer.Tokenize(buff.Source.Span); + Token[] tokens = tokenizer.Tokenize(source.Source.Span); tokIdx = 0; this.cancel = cancel; - Root = new EbnfRootSyntax (buff, tokens); + Root = new EbnfRootSyntax (source, tokens); /*while (!EOF) { diff --git a/plugins/CERoslynPlugin/src/CSDocument.cs b/plugins/CERoslynPlugin/src/CSDocument.cs index d38afb9..bf0bbcb 100644 --- a/plugins/CERoslynPlugin/src/CSDocument.cs +++ b/plugins/CERoslynPlugin/src/CSDocument.cs @@ -34,14 +34,10 @@ namespace CERoslynPlugin App.GetService ()?.Start (); } - internal CSharpSyntaxTree tree; - public CSDocument (string fullPath, string editorPath) : base (fullPath, editorPath) { - - tree = (CSharpSyntaxTree)CSharpSyntaxTree.ParseText (source.ToString(), CSharpParseOptions.Default); - } + public CSDocument (string fullPath, string editorPath) : base (fullPath, editorPath) { } #region SourceDocument abstract class implementation - protected override SyntaxAnalyser CreateSyntaxAnalyser() => new CSSyntaxAnalyser (this); + protected override SyntaxAnalyser CreateSyntaxAnalyser() => new CSSyntaxAnalyser (ImmutableBufferCopy); public override IList GetSuggestions (int absoluteTextPos, int currentTokenIndex, SyntaxNode CurrentNode, CharLocation loc) { @@ -51,24 +47,6 @@ namespace CERoslynPlugin } #endregion - /*public override Color GetColorForToken (TokenType tokType) { - uint rawkind = (uint)tokType; - uint tokCat = rawkind & 0xFF; - CSTokenType cat = (CSTokenType)tokCat; - - SyntaxKind k = (SyntaxKind)tokType; - - //Console.WriteLine($"{k,50} {(((uint)tokType) ).ToString("B16") } {cat}"); - - switch (cat) { - case CSTokenType.Trivia: - return Colors.Grey; - case CSTokenType.Keyword: - return Colors.DarkSlateBlue; - default: - return Colors.Black; - } - }*/ public override string GetTokenTypeString (TokenType tokenType) => ((SyntaxKind)tokenType).ToString(); public override Color GetColorForToken(Token token) { @@ -108,10 +86,5 @@ namespace CERoslynPlugin parse(); } - protected override void parse() - { - tree = (CSharpSyntaxTree)CSharpSyntaxTree.ParseText (source.ToString(), CSharpParseOptions.Default, "", null); - base.parse(); - } } } \ No newline at end of file diff --git a/plugins/CERoslynPlugin/src/CSTokenizer.cs b/plugins/CERoslynPlugin/src/CSTokenizer.cs index e90d6ec..9076639 100644 --- a/plugins/CERoslynPlugin/src/CSTokenizer.cs +++ b/plugins/CERoslynPlugin/src/CSTokenizer.cs @@ -124,7 +124,7 @@ namespace CERoslynPlugin } CSTokenType convertTokenType(SyntaxKind kind) { return (CSTokenType)kind; - switch (kind) { + /*switch (kind) { case SyntaxKind.None: return CSTokenType.Unknown; case SyntaxKind.List: @@ -1117,7 +1117,7 @@ namespace CERoslynPlugin return CSTokenType.Unknown; default: return CSTokenType.Unknown; - } + }*/ } } } \ No newline at end of file diff --git a/plugins/CERoslynPlugin/src/Parsing/CSSyntaxAnalyser.cs b/plugins/CERoslynPlugin/src/Parsing/CSSyntaxAnalyser.cs index e797873..41698cb 100644 --- a/plugins/CERoslynPlugin/src/Parsing/CSSyntaxAnalyser.cs +++ b/plugins/CERoslynPlugin/src/Parsing/CSSyntaxAnalyser.cs @@ -9,15 +9,12 @@ using Microsoft.CodeAnalysis.CSharp; namespace CERoslynPlugin { public class CSSyntaxAnalyser : SyntaxAnalyser { - CSDocument csdoc; - public CSSyntaxAnalyser (CSDocument document) : base (document) { - csdoc = document; - } + public CSSyntaxAnalyser (ReadOnlyTextBuffer document) : base (document) { } public override async Task Process (CancellationToken cancel = default) { - ReadOnlyTextBuffer buff = document.ImmutableBufferCopy; - CsharpSyntaxWalkerBridge bridge = new CsharpSyntaxWalkerBridge(new CSRootSyntax (buff), cancel); - CSharpSyntaxNode csroot = await csdoc.tree.GetRootAsync(cancel); + CSharpSyntaxTree tree = (CSharpSyntaxTree)CSharpSyntaxTree.ParseText (source.Source.Span.ToString(), CSharpParseOptions.Default, "", null); + CsharpSyntaxWalkerBridge bridge = new CsharpSyntaxWalkerBridge(new CSRootSyntax (source), cancel); + CSharpSyntaxNode csroot = await tree.GetRootAsync(cancel); if (cancel.IsCancellationRequested) return null; diff --git a/plugins/CEXmlPlugin/src/Parsing/XmlDocument.cs b/plugins/CEXmlPlugin/src/Parsing/XmlDocument.cs index b05abb0..38d70f7 100644 --- a/plugins/CEXmlPlugin/src/Parsing/XmlDocument.cs +++ b/plugins/CEXmlPlugin/src/Parsing/XmlDocument.cs @@ -16,7 +16,7 @@ namespace CrowEdit.Xml public class XmlDocument : SourceDocument { public XmlDocument (string fullPath, string editorPath) : base (fullPath, editorPath) { } - protected override SyntaxAnalyser CreateSyntaxAnalyser() => new XmlSyntaxAnalyser (this); + protected override SyntaxAnalyser CreateSyntaxAnalyser() => new XmlSyntaxAnalyser (ImmutableBufferCopy); public override string GetTokenTypeString (TokenType tokenType) => ((XmlTokenType)tokenType).ToString(); protected virtual IEnumerable getElementNameSuggestions(string curName, TextChange change) => null; diff --git a/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxAnalyser.cs b/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxAnalyser.cs index 419ba8d..96bb444 100644 --- a/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxAnalyser.cs +++ b/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxAnalyser.cs @@ -21,7 +21,7 @@ namespace CrowEdit.Xml public static bool Is(this Token tok, XmlTokenType type) => (XmlTokenType)tok.Type == type; } public class XmlSyntaxAnalyser : SyntaxAnalyser { - public XmlSyntaxAnalyser (XmlDocument document) : base (document) {} + public XmlSyntaxAnalyser (ReadOnlyTextBuffer document) : base (document) {} bool skipTriviaAndComments(MultiNodeSyntax currentNode, bool skipLineBreaks = true) { while (tryPeekFlag(out Token token, TokenType.Trivia)) { switch(token.GetTokenType()) { @@ -96,7 +96,25 @@ namespace CrowEdit.Xml } return pi; } - + ElementSyntax processElement(ElementSyntax elt) { + while (!EOF) { + if (cancel.IsCancellationRequested) + break; + if (!skipTriviaAndComments(elt)) + break; + if (Peek().Is(XmlTokenType.ElementOpen)) { + processElementNode(elt); + } else if (Peek().Is(XmlTokenType.EndElementOpen)) { + elt.AddChild(processNode(new ElementEndTagSyntax(Read()))); + break; + } else if (Peek().Is(XmlTokenType.PI_Start)) { + elt.AddChild(processNode(new ProcessingInstructionSyntax(Read()))); + } else { + elt.AddChild(new UnexpectedTokenSyntax(Read())); + } + } + return elt; + } void processElementNode(MultiNodeSyntax node) { ElementStartTagSyntax start = new ElementStartTagSyntax(Read()); if (accept (start, XmlTokenType.ElementName)) { @@ -118,34 +136,14 @@ namespace CrowEdit.Xml start.AddChild(new UnexpectedTokenSyntax(Read())); node.AddChild(new ElementSyntax(start)); } - } - ElementSyntax processElement(ElementSyntax elt) { - while (!EOF) { - if (cancel.IsCancellationRequested) - break; - if (!skipTriviaAndComments(elt)) - break; - if (Peek().Is(XmlTokenType.ElementOpen)) { - processElementNode(elt); - } else if (Peek().Is(XmlTokenType.EndElementOpen)) { - elt.AddChild(processNode(new ElementEndTagSyntax(Read()))); - break; - } else if (Peek().Is(XmlTokenType.PI_Start)) { - elt.AddChild(processNode(new ProcessingInstructionSyntax(Read()))); - } else { - elt.AddChild(new UnexpectedTokenSyntax(Read())); - } - } - return elt; - } + } public override async Task Process (CancellationToken cancel = default) { Tokenizer tokenizer = new XmlTokenizer(); - ReadOnlyTextBuffer buff = document.ImmutableBufferCopy; - Token[] tokens = tokenizer.Tokenize(buff.Source.Span); + Token[] tokens = tokenizer.Tokenize(source.Source.Span); tokIdx = 0; this.cancel = cancel;//? - Root = new XMLRootSyntax (buff, tokens); + Root = new XMLRootSyntax (source, tokens); while (!EOF) { if (cancel.IsCancellationRequested) break; diff --git a/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxNodes.cs b/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxNodes.cs index 4fc91f1..7e870c2 100644 --- a/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxNodes.cs +++ b/plugins/CEXmlPlugin/src/Parsing/XmlSyntaxNodes.cs @@ -42,7 +42,8 @@ namespace CrowEdit.Xml public class EmptyElementSyntax : MultiNodeSyntax { public EmptyElementSyntax (ElementStartTagSyntax startNode) { - AddChild (startNode); + foreach (var child in startNode.Children) + AddChild(child); } //public override bool IsComplete => base.IsComplete && StartTag != null; } diff --git a/plugins/CEXmlPlugin/src/Parsing/XmlTokenType.cs b/plugins/CEXmlPlugin/src/Parsing/XmlTokenType.cs index 18d4c49..4cbd244 100644 --- a/plugins/CEXmlPlugin/src/Parsing/XmlTokenType.cs +++ b/plugins/CEXmlPlugin/src/Parsing/XmlTokenType.cs @@ -18,24 +18,24 @@ namespace CrowEdit.Xml BlockComment = 0x0105, BlockCommentEnd = 0x0106, Name = 0x0200, - ElementName = 0x8201, - AttributeName = 0x8202, - PI_Target = 0x8203, + ElementName = 0x0201, + AttributeName = 0x0202, + PI_Target = 0x0203, + ConstantName = 0x0204, Punctuation = 0x0400, - - PI_Start = 0x8401,// '' - ElementOpen = 0x8403,// '<' - EndElementOpen = 0x8404,// '' - ClosingSign = 0x8406,// '>' - DTDObjectOpen = 0x84A0,// '' + ElementOpen = 0x0403,// '<' + EndElementOpen = 0x0404,// '' + ClosingSign = 0x0406,// '>' + DTDObjectOpen = 0x04A0,// '