From c730a58f064ae82692c826c8933f841e63087c63 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jean-Philippe=20Bruy=C3=A8re?= Date: Wed, 30 Aug 2017 06:02:29 +0200 Subject: [PATCH] Basic xml parser --- CrowEdit.csproj | 4 +- src/CSharpParser.cs | 40 ++++++++ src/CodeTextBuffer.cs | 86 ++++++++-------- src/CrowEdit.cs | 4 +- src/Parser.cs | 158 +++++++++++++++++++++++++++++ src/SourceEditor.cs | 98 ++++++------------ src/SourceLine.cs | 156 ---------------------------- src/Token.cs | 63 +++++++----- src/XMLParser.cs | 231 ++++++++++++++++++++++++++++++++++++++++++ ui/main.crow | 10 +- 10 files changed, 550 insertions(+), 300 deletions(-) create mode 100644 src/CSharpParser.cs create mode 100644 src/Parser.cs delete mode 100644 src/SourceLine.cs create mode 100644 src/XMLParser.cs diff --git a/CrowEdit.csproj b/CrowEdit.csproj index e3340b4..39b204f 100644 --- a/CrowEdit.csproj +++ b/CrowEdit.csproj @@ -68,7 +68,6 @@ - @@ -78,6 +77,9 @@ + + + diff --git a/src/CSharpParser.cs b/src/CSharpParser.cs new file mode 100644 index 0000000..25acf4c --- /dev/null +++ b/src/CSharpParser.cs @@ -0,0 +1,40 @@ +using System; +using Crow; + +namespace CrowEdit +{ + public class CSharpParser : Parser + { + public new enum TokenType { + Unknown = Parser.TokenType.Unknown, + WhiteSpace = Parser.TokenType.WhiteSpace, + LineComment = Parser.TokenType.LineComment, + BlockComment = Parser.TokenType.BlockComment, + OpenParenth, + CloseParenth, + OpenBlock, + CloseBlock, + StatementEnding, + UnaryOp, + BinaryOp, + Affectation, + StringLiteral, + CharacterLiteral, + DigitalLiteral, + Literal, + Identifier, + Indexer, + Type, + Preprocessor, + } + + public CSharpParser (CodeTextBuffer _buffer) : base(_buffer) + { + } + public override void Parse () + { + throw new NotImplementedException (); + } + } +} + diff --git a/src/CodeTextBuffer.cs b/src/CodeTextBuffer.cs index 988b9c3..f82e93f 100644 --- a/src/CodeTextBuffer.cs +++ b/src/CodeTextBuffer.cs @@ -25,64 +25,70 @@ using System.Text.RegularExpressions; namespace Crow { - public class CodeTextBuffer : List + public class CodeTextBuffer : List { - public CodeTextBuffer () : base() - { - } - public int longestLineIdx = 0; - public int longestLineCharCount = 0; + + #region CTOR + public CodeTextBuffer () : base(){} public CodeTextBuffer (string rawSource) : this (){ if (string.IsNullOrEmpty (rawSource)) return; - string[] lines = Regex.Split (rawSource, "\r\n|\r|\n|\\\\n"); - for (int i = 0; i < lines.Length; i++) { - if (lines [i].Length > longestLineCharCount) { - longestLineCharCount = lines [i].Length; + + this.AddRange (Regex.Split (rawSource, "\r\n|\r|\n|\\\\n")); + + lineBreak = detectLineBreakKind (rawSource); + findLongestLine (); + } + #endregion + + string lineBreak = Interface.LineBreak; + + public int longestLineIdx = 0; + public int longestLineCharCount = 0; + + void findLongestLine(){ + longestLineCharCount = 0; + for (int i = 0; i < this.Count; i++) { + if (this[i].Length > longestLineCharCount) { + longestLineCharCount = this[i].Length; longestLineIdx = i; } - this.Add (new SourceLine ( lines [i] )); } } + /// line break could be '\r' or '\n' or '\r\n' + static string detectLineBreakKind(string buffer){ + string strLB = ""; + if (string.IsNullOrEmpty(buffer)) + return Interface.LineBreak; + int i = 0; + while ( i < buffer.Length) { + if (buffer [i] == '\r') { + strLB += '\r'; + i++; + } + if (i < buffer.Length) { + if (buffer [i] == '\r') + return "\r"; + if (buffer[i] == '\n') + strLB += '\n'; + } + if (!string.IsNullOrEmpty (strLB)) + return strLB; + i++; + } + return Interface.LineBreak; + } /// /// return all lines with linebreaks /// public string FullText{ get { - string tmp = ""; - foreach (SourceLine sl in this) - tmp += sl.RawText + Interface.LineBreak; - return tmp; - } - } - - public void Tokenize (int lineIndex) { - //handle multiline block comments - if (lineIndex > 0){ - if (this [lineIndex - 1].Tokens?.LastOrDefault ().Type == TokenType.BlockComment) - this [lineIndex].PresetCurrentToken (TokenType.BlockComment); + return this.Count > 0 ? this.Aggregate((i, j) => i + this.lineBreak + j) : ""; } - this [lineIndex].Tokenize (); - } - - public void InsertLine(int index, SourceLine line){ - base.Insert (index, line); } - public void RemoveLine (int index) { - base.RemoveAt (index); - } - - //public void Tokenize (int lineIndex) { - // //handle multiline block comments - // if (lineIndex > 0){ - // if (this [lineIndex - 1].Tokens?.LastOrDefault ().Type == TokenType.BlockComment) - // this [lineIndex].PresetCurrentToken (TokenType.BlockComment); - // } - // this [lineIndex].Tokenize (); - //} } } diff --git a/src/CrowEdit.cs b/src/CrowEdit.cs index ea9ef6b..d23541b 100644 --- a/src/CrowEdit.cs +++ b/src/CrowEdit.cs @@ -122,7 +122,7 @@ namespace CrowEdit NotifyValueChanged ("IsDirty", IsDirty); if (redoStack.Count == 0) - CMDRedo.CanExecute = false; + CMDRedo.CanExecute = false; } void openFileDialog(){ Load ("#CrowEdit.ui.openFile.crow").DataSource = this; @@ -201,7 +201,7 @@ namespace CrowEdit } void textView_KeyDown (object sender, Crow.KeyboardKeyEventArgs e) - { + { if (e.Control) { if (e.Key == Key.W) { if (e.Shift) diff --git a/src/Parser.cs b/src/Parser.cs new file mode 100644 index 0000000..8c153e5 --- /dev/null +++ b/src/Parser.cs @@ -0,0 +1,158 @@ +using System; +using System.IO; +using Crow; +using System.Collections.Generic; + +namespace CrowEdit +{ + public abstract class Parser + { + public enum TokenType { + Unknown, + WhiteSpace, + NewLine, + LineComment, + BlockComment, + Type, + Identifier, + Indexer, + OpenBlock, + CloseBlock, + StatementEnding, + UnaryOp, + BinaryOp, + Affectation, + StringLitteralOpening, + StringLitteralClosing, + StringLitteral, + NumericLitteral, + Preprocessor, + } + + public class ParsingException : Exception + { + public ParsingException(Parser parser, string txt) + : base(string.Format("Parser exception ({0},{1}): {2}", parser.currentLine, parser.currentColumn, txt)) + { + } + public ParsingException(Parser parser, string txt, Exception innerException) + : base(txt, innerException) + { + txt = string.Format("Parser exception ({0},{1}): {2}", parser.currentLine, parser.currentColumn, txt); + } + } + + public Parser (CodeTextBuffer _buffer) + { + buffer = _buffer; + if (buffer.Count > 0) + eof = false; + } + + protected bool parsed = false; + protected int currentLine = 0; + protected int currentColumn = 0; + protected Token currentTok; + protected bool eof = true; + + CodeTextBuffer buffer; + + public List> Tokens; + + public bool Parsed { get { return parsed; }} + public Point CurrentPosition { get { return new Point (currentLine, currentColumn); } } + + public abstract void Parse(); + + protected void readToCurrTok(bool startOfTok = false){ + if (startOfTok) + currentTok.Start = CurrentPosition; + currentTok += Read(); + } + + protected void readAndResetCurrentTok(System.Enum type, bool startToc = false) { + readToCurrTok (); + saveAndResetCurrentTok (type); + } + protected void saveAndResetCurrentTok() { this.saveAndResetCurrentTok (currentTok.Type); } + protected void saveAndResetCurrentTok(System.Enum type) { + currentTok.Type = (TokenType)type; + currentTok.End = CurrentPosition; + Tokens.Add (currentTok); + + currentTok = default(Token); + } + + protected virtual char Peek() { + if (eof) + throw new ParsingException (this, "Unexpected End of File"); + return currentColumn < buffer [currentLine].Length ? + buffer [currentLine] [currentColumn] : '\n'; + } + protected virtual string Peek(int length) { + if (eof) + throw new ParsingException (this, "Unexpected End of File"); + if (buffer[currentLine].Length - currentColumn - length < 0) + throw new ParsingException (this, "Unexpected End of line"); + return buffer [currentLine].Substring (currentColumn, length); + } + protected virtual char Read() { + char c = Peek (); + + if (c == '\n') { + currentLine++; + if (currentLine >= buffer.Count) + eof = true; + currentColumn = 0; + } else + currentColumn++; + return c; + } + protected virtual string Read(uint length) { + string tmp = ""; + for (int i = 0; i < length; i++) { + char c = Peek (); + if (c == '\n') { + currentLine++; + if (currentLine >= buffer.Count) + eof = true; + currentColumn = 0; + } else + currentColumn++; + tmp += c; + } + return tmp; + } + + protected virtual string ReadUntil (string endExp){ + string tmp = ""; + + while (!eof) { + if (buffer [currentLine].Length - currentColumn - endExp.Length < 0) { + currentLine++; + if (currentLine >= buffer.Count) + eof = true; + currentColumn = 0; + continue; + } + if (string.Equals (Peek (endExp.Length), endExp)) + return tmp; + tmp += Read(); + } + throw new ParsingException (this, string.Format("Expectign '{0}'", endExp)); + } + + protected void SkipWhiteSpaces () { + if (currentTok.Type != TokenType.Unknown) + throw new ParsingException (this, "current token should be reset to unknown (0) before skiping white spaces"); + while (!eof) { + if (!char.IsWhiteSpace (Peek ())||Peek()=='\n') + break; + readToCurrTok (currentTok.Type == TokenType.Unknown); + currentTok.Type = TokenType.WhiteSpace; + } + if (currentTok.Type != TokenType.Unknown) + saveAndResetCurrentTok (); + } + } +} \ No newline at end of file diff --git a/src/SourceEditor.cs b/src/SourceEditor.cs index 8f1748e..e2c6f5b 100644 --- a/src/SourceEditor.cs +++ b/src/SourceEditor.cs @@ -58,11 +58,9 @@ namespace Crow } #region private and protected fields - string lineBreak = Interface.LineBreak; int visibleLines = 1; int visibleColumns = 1; CodeTextBuffer buffer; - string _text = "label"; Color selBackground; Color selForeground; int _currentCol; //0 based cursor position in string @@ -79,20 +77,14 @@ namespace Crow public string Text { get { - return buffer == null ? - _text : buffer.FullText; + return buffer == null ? "" : buffer.FullText; } set { - if (string.Equals (value, _text, StringComparison.Ordinal)) + if (string.Equals (value, buffer?.FullText, StringComparison.Ordinal)) return; - _text = value; - - if (string.IsNullOrEmpty(_text)) - _text = ""; - - buffer = new CodeTextBuffer (_text); + buffer = new CodeTextBuffer (value); MaxScrollY = Math.Max (0, buffer.Count - visibleLines); MaxScrollX = Math.Max (0, buffer.longestLineCharCount - visibleColumns); @@ -239,24 +231,19 @@ namespace Crow if (SelRelease < 0 || SelBegin < 0) return ""; if (selectionStart.Y == selectionEnd.Y) - return buffer [selectionStart.Y].RawText.Substring (selectionStart.X, selectionEnd.X - selectionStart.X); + return buffer [selectionStart.Y].Substring (selectionStart.X, selectionEnd.X - selectionStart.X); string tmp = ""; - tmp = buffer [selectionStart.Y].RawText.Substring (selectionStart.X); + tmp = buffer [selectionStart.Y].Substring (selectionStart.X); for (int l = selectionStart.Y + 1; l < selectionEnd.Y; l++) { tmp += Interface.LineBreak + buffer [l]; } - tmp += Interface.LineBreak + buffer [selectionEnd.Y].RawText.Substring (0, selectionEnd.X); + tmp += Interface.LineBreak + buffer [selectionEnd.Y].Substring (0, selectionEnd.X); return tmp; } } [XmlIgnore]public bool selectionIsEmpty { get { return SelRelease == SelBegin; } } - List getLines { - get { - return Regex.Split (_text, "\r\n|\r|\n|\\\\n").ToList(); - } - } /// /// Moves cursor one char to the left. /// @@ -318,27 +305,27 @@ namespace Crow return; CurrentLine--; CurrentColumn = buffer [CurrentLine].Length; - buffer [CurrentLine].RawText += buffer [CurrentLine + 1].RawText; - buffer.RemoveLine (CurrentLine + 1); + buffer [CurrentLine] += buffer [CurrentLine + 1]; + buffer.RemoveAt (CurrentLine + 1); OnTextChanged (this, null); return; } CurrentColumn--; - buffer [CurrentLine].RawText = buffer [CurrentLine].RawText.Remove (CurrentColumn, 1); + buffer [CurrentLine] = buffer [CurrentLine].Remove (CurrentColumn, 1); } else { int linesToRemove = selectionEnd.Y - selectionStart.Y + 1; int l = selectionStart.Y; if (linesToRemove > 0) { - buffer [l].RawText = buffer [l].RawText.Remove (selectionStart.X, buffer [l].Length - selectionStart.X) + - buffer [selectionEnd.Y].RawText.Substring (selectionEnd.X, buffer [selectionEnd.Y].Length - selectionEnd.X); + buffer [l] = buffer [l].Remove (selectionStart.X, buffer [l].Length - selectionStart.X) + + buffer [selectionEnd.Y].Substring (selectionEnd.X, buffer [selectionEnd.Y].Length - selectionEnd.X); l++; for (int c = 0; c < linesToRemove-1; c++) - buffer.RemoveLine (l); + buffer.RemoveAt (l); CurrentLine = selectionStart.Y; CurrentColumn = selectionStart.X; } else - buffer [l].RawText = buffer [l].RawText.Remove (selectionStart.X, selectionEnd.X - selectionStart.X); + buffer [l] = buffer [l].Remove (selectionStart.X, selectionEnd.X - selectionStart.X); CurrentColumn = selectionStart.X; SelBegin = -1; SelRelease = -1; @@ -369,15 +356,7 @@ namespace Crow if (lt == LayoutingType.Height) return (int)Math.Ceiling(fe.Height * buffer.Count) + Margin * 2; - string txt = _text.Replace("\t", new String (' ', Interface.TabSize)); - - - int maxChar = 0; - foreach (string s in Regex.Split (txt, "\r\n|\r|\n|\\\\n")) { - if (maxChar < s.Length) - maxChar = s.Length; - } - return (int)(fe.MaxXAdvance * maxChar) + Margin * 2; + return (int)(fe.MaxXAdvance * buffer.longestLineCharCount) + Margin * 2; } public override void OnLayoutChanges (LayoutingType layoutType) { @@ -421,7 +400,7 @@ namespace Crow int curL = i + ScrollY; if (curL >= buffer.Count) break; - string lstr = buffer[curL].RawText; + string lstr = buffer[curL]; if (ScrollX < lstr.Length) lstr = lstr.Substring (ScrollX); else @@ -687,7 +666,7 @@ namespace Crow SelRelease = CurrentPosition; break; } - SelRelease = -1; + SelRelease = -1; CurrentLine += visibleLines; break; case Key.PageUp: @@ -697,7 +676,7 @@ namespace Crow CurrentLine -= visibleLines; SelRelease = CurrentPosition; break; - } + } CurrentLine -= visibleLines; break; case Key.RWin: @@ -705,6 +684,14 @@ namespace Crow case Key.Tab: this.Insert ("\t"); break; + case Key.F8: + try { + CrowEdit.XMLParser parser = new CrowEdit.XMLParser (buffer); + parser.Parse (); + }catch(Exception ee){ + Debug.WriteLine (ee.ToString ()); + } + break; default: break; } @@ -728,7 +715,7 @@ namespace Crow double GetXFromTextPointer(Context gr, Point pos) { try { - string l = buffer [pos.Y].RawText.Substring (0, pos.X). + string l = buffer [pos.Y].Substring (0, pos.X). Replace ("\t", new String (' ', Interface.TabSize)); return gr.TextExtents (l).XAdvance; } catch{ @@ -736,31 +723,6 @@ namespace Crow } } - /// line break could be '\r' or '\n' or '\r\n' - string detectLineBreakKind(){ - string strLB = ""; - - if (string.IsNullOrEmpty(_text)) - return Interface.LineBreak; - int i = 0; - while ( i < _text.Length) { - if (_text [i] == '\r') { - strLB += '\r'; - i++; - } - if (i < _text.Length) { - if (_text [i] == '\r') - return "\r"; - if (_text [i] == '\n') - strLB += '\n'; - } - if (!string.IsNullOrEmpty (strLB)) - return strLB; - i++; - } - return Interface.LineBreak; - } - void updateVisibleLines(){ visibleLines = (int)Math.Floor ((double)ClientRectangle.Height / fe.Height); MaxScrollY = Math.Max (0, buffer.Count - visibleLines); @@ -787,11 +749,11 @@ namespace Crow if (!selectionIsEmpty) this.DeleteChar (); string[] strLines = Regex.Split (str, "\r\n|\r|\n|" + @"\\n").ToArray(); - buffer [CurrentLine].RawText = buffer [CurrentLine].RawText.Insert (CurrentColumn, strLines[0]); + buffer [CurrentLine] = buffer [CurrentLine].Insert (CurrentColumn, strLines[0]); CurrentColumn += strLines[0].Length; for (int i = 1; i < strLines.Length; i++) { InsertLineBreak (); - buffer [CurrentLine].RawText = buffer [CurrentLine].RawText.Insert (CurrentColumn, strLines[i]); + buffer [CurrentLine] = buffer [CurrentLine].Insert (CurrentColumn, strLines[i]); CurrentColumn += strLines[i].Length; } OnTextChanged (this, null); @@ -803,8 +765,8 @@ namespace Crow /// protected void InsertLineBreak() { - buffer.InsertLine(CurrentLine + 1, new SourceLine (buffer[CurrentLine].RawText.Substring(CurrentColumn))); - buffer [CurrentLine].RawText = buffer [CurrentLine].RawText.Substring (0, CurrentColumn); + buffer.Insert(CurrentLine + 1, buffer[CurrentLine].Substring(CurrentColumn)); + buffer [CurrentLine] = buffer [CurrentLine].Substring (0, CurrentColumn); CurrentLine++; CurrentColumn = 0; OnTextChanged (this, null); diff --git a/src/SourceLine.cs b/src/SourceLine.cs deleted file mode 100644 index cb777c6..0000000 --- a/src/SourceLine.cs +++ /dev/null @@ -1,156 +0,0 @@ -// -// SourceLine.cs -// -// Author: -// Jean-Philippe Bruyère -// -// Copyright (c) 2017 jp -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . -using System; -using System.Collections.Generic; - -namespace Crow -{ - /// - /// basic structure for line of source code - /// - public class SourceLine - { - public string RawText; - public List Tokens = null; - - public int Length { - get { return string.IsNullOrEmpty (RawText)? 0 : RawText.Length; } - } - public char this[int index]{ - get { return RawText [index]; } - - } - int ptr; //character pointer in the source string - Token tok; //current token parsed before addition to the token list - - public SourceLine () - { - } - public SourceLine (string rawText){ - RawText = rawText; - } - - /// - /// Tokenize this instance. - /// This tokenization step is used for display mainly, so literals are not interpreted - /// - public bool Tokenize(){ - Tokens = new List(); - ptr = 0; - - while (!eol) { - Char c = readChar (); - - //block comments - if (tok?.Type == TokenType.BlockComment) { - tok.Content += c; - if (c == '*') { - if (peekChar () == '/') { - tok.Content += readChar (); - saveCurTok (); - } - } - continue; - } else if (tok?.Type == TokenType.StringLiteral) { - tok.Content += c; - if (c == '\\')//may escape " char, so next char is read; - tok.Content += readChar (); - else if (c == '"') - saveCurTok (); - continue; - } else if (tok?.Type == TokenType.CharacterLiteral) { - tok.Content += c; - if (c == '\\')//may escape ' char, so next char is read; - tok.Content += readChar (); - else if (c == '\'') - saveCurTok (); - continue; - } else if (tok?.Type == TokenType.WhiteSpace) { - if (char.IsWhiteSpace (c)) { - tok.Content += c; - continue; - } - saveCurTok (); - //if (char.IsLetter (c)) - - } - - //single char tokens - if (c == '{') - tok.Type = TokenType.OpenBlock; - else if (c == '}') - tok.Type = TokenType.CloseBlock; - else if (c == '(') - tok.Type = TokenType.OpenParenth; - else if (c == ')') - tok.Type = TokenType.CloseParenth; - - - if (tok == null) { - tok = new Token () { Content = new string (c, 1) }; - - if (char.IsWhiteSpace (c)) - tok.Type = TokenType.WhiteSpace; - else if (char.IsDigit (c)) - tok.Type = TokenType.DigitalLiteral; - else if (char.IsLetter (c)) - tok.Type = TokenType.Unknown; - else if (c == '"') - tok.Type = TokenType.StringLiteral; - else {//put here all single step parsing token, reseting tok directely - saveCurTok (); - } - } - - - - ptr++; - } - return true; - } - /// add tok to token list and reset it to null - void saveCurTok(){ - Tokens.Add (tok); - tok = null; - } - public void PresetCurrentToken (TokenType tokType, string content = null){ - tok = new Token (tokType,content); - } - - bool eol { get { return ptr < RawText.Length; }} - char readChar() { - char c = RawText [ptr]; - ptr++; - return c; - } - char peekChar() { - return RawText [ptr]; - } - -// public static implicit operator SourceLine(string rawText){ -// return new SourceLine() { RawText = rawText }; -// } -// public static implicit operator string(SourceLine sl){ -// return sl?.RawText; -// } - } -} - diff --git a/src/Token.cs b/src/Token.cs index 7639ca2..e306c76 100644 --- a/src/Token.cs +++ b/src/Token.cs @@ -19,41 +19,48 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . using System; +using CrowEdit; namespace Crow { - public enum TokenType { - Unknown, - WhiteSpace, - OpenParenth, - CloseParenth, - OpenBlock, - CloseBlock, - StatementEnding, - UnaryOp, - BinaryOp, - Affectation, - StringLiteral, - CharacterLiteral, - DigitalLiteral, - Literal, - Identifier, - Indexer, - Type, - LineComment, - BlockComment, - } - public class Token + public struct Token { - public TokenType Type; + public Parser.TokenType Type; public string Content; + public Point Start; + public Point End; - public Token () - { +// public Token (TokenType tokType, string content = ""){ +// Type = tokType; +// Content = content; +// } + + public bool IsEmpty { get { return string.IsNullOrEmpty(Content); }} + + public static bool operator == (Token t, System.Enum tt){ + return Convert.ToInt32(t.Type) == Convert.ToInt32(tt); + } + public static bool operator != (Token t, System.Enum tt){ + return Convert.ToInt32(t.Type) != Convert.ToInt32(tt); + } + public static bool operator == (System.Enum tt, Token t){ + return Convert.ToInt32(t.Type) == Convert.ToInt32(tt); } - public Token (TokenType tokType, string content = null){ - Type = tokType; - Content = content; + public static bool operator != (System.Enum tt, Token t){ + return Convert.ToInt32(t.Type) != Convert.ToInt32(tt); + } + + public static Token operator +(Token t, char c){ + t.Content += c; + return t; + } + public static Token operator +(Token t, string s){ + t.Content += s; + return t; + } + public override string ToString () + { + return string.Format ("[Tok{2}->{3}:{0}: {1}]", Type,Content,Start,End); } } } diff --git a/src/XMLParser.cs b/src/XMLParser.cs new file mode 100644 index 0000000..8129fac --- /dev/null +++ b/src/XMLParser.cs @@ -0,0 +1,231 @@ +using System; +using Crow; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace CrowEdit +{ + public class XMLParser : Parser + { + + public XMLParser (CodeTextBuffer _buffer) : base(_buffer) + { + } + + public new enum TokenType { + Unknown = Parser.TokenType.Unknown, + WhiteSpace = Parser.TokenType.WhiteSpace, + NewLine = Parser.TokenType.NewLine, + LineComment = Parser.TokenType.LineComment, + BlockComment = Parser.TokenType.BlockComment, + Affectation = Parser.TokenType.Affectation, + XMLDecl = Parser.TokenType.Preprocessor, + ElementStart, + ElementEnd, + ElementClosing = Parser.TokenType.StatementEnding, + ElementName = Parser.TokenType.Type, + AttributeName = Parser.TokenType.Identifier, + AttributeValueOpening = Parser.TokenType.StringLitteralOpening, + AttributeValueClosing = Parser.TokenType.StringLitteralClosing, + AttributeValue = Parser.TokenType.StringLitteral, + } + public enum States + { + init, //first statement of prolog, xmldecl should only apear in this state + prolog, //misc before doctypedecl + InternalSubset, //doctype declaration subset + ExternalSubsetInit, + ExternalSubset, + DTDEnd,//doctype finished + XML, + StartTag, + Content, + EndTag, + XMLEnd + } + enum Keywords + { + DOCTYPE, + ELEMENT, + ATTLIST, + ENTITY, + NOTATION + } + + States curState = States.init; + + #region Regular Expression for validity checks + //private static Regex rxValidChar = new Regex("[\u0020-\uD7FF]"); + private static Regex rxValidChar = new Regex(@"\u0009|\u000A|\u000D|[\u0020-\uD7FF]|[\uE000-\uFFFD]"); //| [\u10000-\u10FFFF] unable to set those plans + private static Regex rxNameStartChar = new Regex(@":|[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]"); // | [\u10000-\uEFFFF] + private static Regex rxNameChar = new Regex(@":|[A-Z]|_|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|-|\.|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]");//[\u10000-\uEFFFF]| + private static Regex rxDecimal = new Regex(@"[0-9]+"); + private static Regex rxHexadecimal = new Regex(@"[0-9a-fA-F]+"); + private static Regex rxAttributeValue = new Regex(@"[^<]"); + private static Regex rxEntityValue = new Regex(@"[^<]"); + private static Regex rxPubidChar = new Regex(@"\u0020|\u000D|\u000A|[a-zA-Z0-9]|[-\(\)\+\,\./:=\?;!\*#@\$_%]"); + #endregion + + #region Character ValidityCheck + public bool nextCharIsValidCharStartName + { + get { return rxNameStartChar.IsMatch(new string(new char[]{Peek()})); } + } + public bool nextCharIsValidCharName + { + get { return rxNameChar.IsMatch(new string(new char[]{Peek()})); } + } +// public bool NameIsValid(string name) +// { +// if (!rxNameStartChar.IsMatch(char.ConvertFromUtf32(((string)name)[0]))) +// return false; +// +// return rxNameChar.IsMatch(name); +// } +// private bool NextCharIsValidPubidChar +// { +// get { return rxPubidChar.IsMatch(char.ConvertFromUtf32(Peek())); } +// } +// private bool AttributeValueIsValid(string name) +// { +// return string.IsNullOrEmpty(name) ? true : rxAttributeValue.IsMatch(name); +// } +// private bool NextCharIsValidEntityValue +// { +// get { return rxEntityValue.IsMatch(char.ConvertFromUtf32(Peek())); } +// } + #endregion + + public override void Parse () + { + parsed = false; + Tokens = new List (); + currentLine = currentColumn = 0; + currentTok = default(Token); + curState = States.init; + + string tmp = ""; + + while (!eof) { + SkipWhiteSpaces (); + + if (eof) + break; + + switch (Peek()) { + case '\n': + if (currentTok != TokenType.Unknown) + throw new ParsingException (this, "Unexpected end of line"); + readAndResetCurrentTok (TokenType.NewLine, true); + break; + case '<': + readToCurrTok (true); + switch (Peek()) { + case '?': + if (curState != States.init) + throw new ParsingException (this, "prolog may appear only on first line"); + readToCurrTok (); + currentTok += ReadUntil ("?>"); + saveAndResetCurrentTok (TokenType.XMLDecl); + curState = States.prolog; + break; + case '!': + readToCurrTok (); + switch (Peek()) { + case '-': + readToCurrTok (); + if (Peek () != '-') + throw new ParsingException (this, "Expecting comment start tag"); + currentTok += ReadUntil ("--"); + if (Peek () != '>') + throw new ParsingException (this, "Expecting comment closing tag"); + readAndResetCurrentTok (TokenType.BlockComment); + break; + default: + throw new NotImplementedException (); + } + break; + default: + if (!(curState == States.Content || curState == States.XML || curState == States.init)) + throw new ParsingException (this, "Unexpected char: '<'"); + if (Peek () == '/') { + curState = States.EndTag; + readToCurrTok (); + saveAndResetCurrentTok (TokenType.ElementEnd); + } else { + curState = States.StartTag; + saveAndResetCurrentTok (TokenType.ElementStart); + } + + if (!nextCharIsValidCharStartName) + throw new ParsingException (this, "Expected element name"); + + readToCurrTok (true); + while (nextCharIsValidCharName) + readToCurrTok (); + + saveAndResetCurrentTok (TokenType.ElementName); + break; + } + break; + case '/': + if (curState != States.StartTag) + throw new ParsingException (this, "Unexpected char: '/'"); + readToCurrTok (true); + if (Peek () != '>') + throw new ParsingException (this, "Expecting '>'"); + readAndResetCurrentTok (TokenType.ElementClosing); + + curState = States.XML; + break; + case '>': + readAndResetCurrentTok (TokenType.ElementClosing, true); + switch (curState) { + case States.EndTag: + curState = States.XML; + break; + case States.StartTag: + curState = States.Content; + break; + default: + throw new ParsingException (this, "Unexpected char: '>'"); + } + break; + default: + switch (curState) { + case States.StartTag: + if (!nextCharIsValidCharStartName) + throw new ParsingException (this, "Expected attribute name"); + readToCurrTok (true); + while (nextCharIsValidCharName) + readToCurrTok (); + saveAndResetCurrentTok (TokenType.AttributeName); + if (Peek () != '=') + throw new ParsingException (this, "Expecting: '='"); + readAndResetCurrentTok (TokenType.Affectation, true); + + char openAttVal = Peek (); + if (openAttVal != '"' && openAttVal != '\'') + throw new ParsingException (this, "Expecting attribute value enclosed either in '\"' or in \"'\""); + readAndResetCurrentTok (TokenType.AttributeValueOpening, true); + + currentTok.Start = CurrentPosition; + currentTok.Content = ReadUntil (new string (new char[]{ openAttVal })); + saveAndResetCurrentTok (TokenType.AttributeValue); + + if (Peek () != openAttVal) + throw new ParsingException (this, string.Format ("Expecting {0}", openAttVal)); + readAndResetCurrentTok (TokenType.AttributeValueClosing, true); + break; + default: + throw new ParsingException (this, "unexpected char: " + Peek ()); + } + break; + } + } + + parsed = true; + } + } +} + diff --git a/ui/main.crow b/ui/main.crow index 981b5ca..8e81eb2 100755 --- a/ui/main.crow +++ b/ui/main.crow @@ -22,15 +22,15 @@ - - - -- 2.47.3