1 module dietc.parser; 2 3 import dietc.lexer; 4 5 import std.algorithm; 6 import std.array : join; 7 import std.conv : text, to; 8 import std.meta : AliasSeq; 9 10 alias ASTClasses = AliasSeq!(Document, HiddenComment, Comment, DStatement, 11 DietFilter, TagNode, TagNode.AttributeAST, RawAssignment, 12 Assignment, StringTagContents, TextLine, XMLNode, PipeText, Expression, TextLine.PartAST); 13 14 interface AST 15 { 16 Token token() @property; 17 void accept(ASTVisitor visitor); 18 } 19 20 abstract class ASTVisitor 21 { 22 static foreach (T; ASTClasses) 23 void visit(T ast) 24 in(ast !is null) 25 { 26 ast.accept(this); 27 } 28 29 void visit(AST ast) 30 in(ast !is null) 31 { 32 static foreach (T; ASTClasses) 33 if (cast(T) ast) 34 return visit(cast(T) ast); 35 throw new Exception("Unknown ast passed?!"); 36 } 37 } 38 39 enum VisitResult 40 { 41 continue_, 42 recurse, 43 return_ 44 } 45 46 alias VisitorDelegate = VisitResult delegate(AST node, AST parent); 47 48 void traverse(AST node, VisitorDelegate callback) 49 { 50 static class VisitorImpl : ASTVisitor 51 { 52 static foreach (T; ASTClasses) 53 override void visit(T ast) 54 { 55 if (result == VisitResult.return_) 56 return; 57 58 result = callback(ast, parents[$ - 1]); 59 if (result == VisitResult.return_) 60 return; 61 62 if (result == VisitResult.recurse) 63 { 64 parents ~= ast; 65 scope (exit) 66 parents.length--; 67 ast.accept(this); 68 } 69 } 70 71 alias visit = ASTVisitor.visit; 72 73 VisitResult result; 74 AST[] parents; 75 VisitorDelegate callback; 76 77 this(AST node, VisitorDelegate callback) 78 { 79 parents = [node]; 80 this.callback = callback; 81 } 82 } 83 84 node.accept(new VisitorImpl(node, callback)); 85 } 86 87 class Document : AST, Node 88 { 89 Node[] _children; 90 Token _token; 91 92 Node[] children() @property 93 { 94 return _children; 95 } 96 97 void addChild(Node child) 98 { 99 _token.range[1] = child.token.range[1]; 100 _children ~= child; 101 } 102 103 Token token() @property 104 { 105 return _token; 106 } 107 108 void accept(ASTVisitor visitor) 109 { 110 foreach (child; _children) 111 if (child) 112 visitor.visit(child); 113 } 114 115 this(Token token) 116 { 117 _token = token; 118 } 119 120 override string toString() const 121 { 122 string ret = "Document("; 123 foreach (child; _children) 124 ret ~= "\n" ~ child.to!string.indent; 125 return ret ~= ")"; 126 } 127 } 128 129 interface Node : AST 130 { 131 Node[] children() @property; 132 void addChild(Node); 133 } 134 135 interface INamed 136 { 137 string name() @property; 138 } 139 140 interface IStringContainer : AST 141 { 142 string content() @property; 143 } 144 145 abstract class StringNode : Node, IStringContainer 146 { 147 Token _token; 148 string _content; 149 Node[] _children; 150 151 Node[] children() @property 152 { 153 return _children; 154 } 155 156 void addChild(Node child) 157 { 158 _token.range[1] = child.token.range[1]; 159 _children ~= child; 160 } 161 162 Token token() @property 163 { 164 return _token; 165 } 166 167 void accept(ASTVisitor visitor) 168 { 169 foreach (child; _children) 170 if (child) 171 visitor.visit(child); 172 } 173 174 string content() @property 175 { 176 return _content; 177 } 178 179 this(Token token, string content) 180 { 181 _token = token; 182 _content = content; 183 } 184 185 override string toString() 186 { 187 import std.array : join; 188 189 string ret = text('(', (cast(Object) this).classinfo.name, `) "`, content, '"'); 190 foreach (child; children) 191 ret ~= "\n" ~ child.to!string.indent; 192 return ret; 193 } 194 } 195 196 class Comment : StringNode 197 { 198 this(Token token, string content) 199 { 200 super(token, content); 201 } 202 } 203 204 class HiddenComment : Comment 205 { 206 this(Token token, string content) 207 { 208 super(token, content); 209 } 210 } 211 212 class DStatement : StringNode 213 { 214 this(Token token, string content) 215 { 216 super(token, content); 217 } 218 } 219 220 class DietFilter : StringNode, INamed 221 { 222 string _name; 223 224 string name() @property 225 { 226 return _name; 227 } 228 229 this(Token token, string name, string content) 230 { 231 super(token, content); 232 _name = name; 233 } 234 } 235 236 interface NestedTags : Node, TagContents 237 { 238 } 239 240 class TagNode : NestedTags, INamed 241 { 242 // NormalTagStart (TextBlock | NestedStart NestedTags | TagContents) 243 // NormalTagStart: TAG_IDENTIFIER? ( NodeID | NodeClass )* Attributes? ( FitInside? FitOutside? | FitOutside FitInside ) Translated? 244 // TagContents: RawAssignment | Assignment | ' '? TextLine | EOL 245 246 struct Attribute 247 { 248 Token name; 249 Expression expr; 250 251 string toString() 252 { 253 return name.content ~ name.range.to!string ~ (expr ? "='" ~ expr.content ~ "'" : "×"); 254 } 255 } 256 257 /// Wrapper class around Attribute used for visiting 258 class AttributeAST : AST, INamed 259 { 260 Attribute attribute; 261 262 this(Attribute attribute) 263 { 264 this.attribute = attribute; 265 } 266 267 Token token() @property 268 { 269 auto tok = attribute.name; 270 if (attribute.expr) 271 tok.range[1] = attribute.expr.token.range[1]; 272 return tok; 273 } 274 275 string name() @property 276 { 277 return attribute.name.content; 278 } 279 280 void accept(ASTVisitor visitor) 281 { 282 if (attribute.expr) 283 visitor.visit(attribute.expr); 284 } 285 286 override string toString() 287 { 288 return attribute.toString(); 289 } 290 } 291 292 Token[] directIDs, directClasses; 293 Attribute[] attributes; 294 Token _fitInside, _fitOutside, _translated; 295 Token _token, _tag; 296 TagContents _contents; 297 Node[] _children; 298 size_t[2] attributesRange; 299 300 Node[] children() @property 301 { 302 return _children; 303 } 304 305 void accept(ASTVisitor visitor) 306 { 307 foreach (attr; attributes) 308 visitor.visit(new AttributeAST(attr)); 309 if (_contents !is null) 310 visitor.visit(_contents); 311 foreach (child; _children) 312 if (child) 313 visitor.visit(child); 314 } 315 316 void addChild(Node child) 317 { 318 _token.range[1] = child.token.range[1]; 319 _children ~= child; 320 } 321 322 Token tag() @property 323 { 324 return _tag; 325 } 326 327 Token token() @property 328 { 329 return _token; 330 } 331 332 /// Returns: the tag name. 333 string name() @property 334 { 335 return _tag.content; 336 } 337 338 /// Returns: true if the `<` whitespace modifier token is present 339 bool fitInside() @property 340 { 341 return _fitInside != Token.init; 342 } 343 344 /// Returns: true if the `>` whitespace modifier token is present 345 bool fitOutside() @property 346 { 347 return _fitOutside != Token.init; 348 } 349 350 /// Returns: true if the `&` token is present 351 bool translated() @property 352 { 353 return _translated != Token.init; 354 } 355 356 /// Content of the tag (nullable) 357 TagContents contents() @property 358 { 359 return _contents; 360 } 361 362 this(Token tag) 363 { 364 this(tag, null); 365 } 366 367 this(Token tag, TagContents contents) 368 { 369 _tag = _token = tag; 370 _contents = contents; 371 } 372 373 override string toString() 374 { 375 import std.array : join; 376 377 string ret = "tag" ~ token.range.to!string ~ "<" ~ _tag.range.to!string ~ name ~ ">"; 378 foreach (c; directClasses) 379 ret ~= c.content; 380 foreach (id; directIDs) 381 ret ~= id.content; 382 ret ~= "("; 383 ret ~= attributes.to!(string[]).join(", "); 384 ret ~= ")" ~ attributesRange.to!string; 385 if (fitOutside) 386 ret ~= ">"; 387 if (fitInside) 388 ret ~= "<"; 389 if (translated) 390 ret ~= "&"; 391 if (_contents !is null) 392 ret ~= " = " ~ contents.to!string; 393 foreach (child; children) 394 ret ~= "\n" ~ child.to!string.indent; 395 return ret; 396 } 397 } 398 399 interface TagContents : AST 400 { 401 } 402 403 class StringTagContents : TagContents, IStringContainer 404 { 405 Token _token; 406 string _content; 407 408 Token token() @property 409 { 410 return _token; 411 } 412 413 void accept(ASTVisitor) 414 { 415 } 416 417 string content() @property 418 { 419 return _content; 420 } 421 422 this(Token token, string content) 423 { 424 _token = token; 425 _content = content; 426 } 427 428 override string toString() 429 { 430 import std.array : join; 431 432 string ret = text('(', (cast(Object) this).classinfo.name, `) "`, content, '"'); 433 return ret; 434 } 435 } 436 437 class Assignment : StringTagContents 438 { 439 this(Token token, string content) 440 { 441 super(token, content); 442 } 443 } 444 445 class RawAssignment : Assignment 446 { 447 this(Token token, string content) 448 { 449 super(token, content); 450 } 451 } 452 453 class TextLine : TagContents 454 { 455 struct Part 456 { 457 Token token; 458 string raw; 459 Expression inlineExpr; 460 NestedTags inlineTag; 461 bool escapeInlineExpr; 462 463 string toString() const 464 { 465 string pre = token.range.to!string; 466 if (raw.length) 467 return pre ~ raw; 468 else if (inlineExpr !is null) 469 return pre ~ (escapeInlineExpr ? "#" : "!") ~ "{" ~ inlineExpr.to!string ~ "}"; 470 else if (inlineTag !is null) 471 return pre ~ "#[" ~ inlineTag.to!string ~ "]"; 472 else 473 return pre; 474 } 475 } 476 477 /// Wrapper class around Part used for visiting 478 class PartAST : AST 479 { 480 Part part; 481 482 this(Part part) 483 { 484 this.part = part; 485 } 486 487 Token token() @property 488 { 489 return part.token; 490 } 491 492 void accept(ASTVisitor visitor) 493 { 494 if (part.inlineExpr) 495 visitor.visit(part.inlineExpr); 496 if (part.inlineTag) 497 visitor.visit(part.inlineTag); 498 } 499 500 override string toString() const 501 { 502 return part.toString(); 503 } 504 } 505 506 Token _token; 507 Part[] _parts; 508 509 void accept(ASTVisitor visitor) 510 { 511 foreach (part; _parts) 512 visitor.visit(new PartAST(part)); 513 } 514 515 Token token() @property 516 { 517 return _token; 518 } 519 520 this(Token token, Part[] parts) 521 { 522 _token = token; 523 _parts = parts; 524 } 525 526 override string toString() 527 { 528 string ret = _token.range.to!string ~ "'"; 529 foreach (part; _parts) 530 ret ~= '{' ~ part.toString ~ '}'; 531 return ret ~ "'"; 532 } 533 } 534 535 class XMLNode : NestedTags 536 { 537 TextLine _line; 538 Node[] _children; 539 540 Node[] children() @property 541 { 542 return _children; 543 } 544 545 void accept(ASTVisitor visitor) 546 { 547 foreach (child; _children) 548 if (child) 549 visitor.visit(child); 550 } 551 552 void addChild(Node child) 553 { 554 _line.token.range[1] = child.token.range[1]; 555 _children ~= child; 556 } 557 558 Token token() @property 559 { 560 return _line.token; 561 } 562 563 this(TextLine line) 564 { 565 _line = line; 566 } 567 } 568 569 class PipeText : NestedTags 570 { 571 Token _token; 572 Token _translated; 573 TagContents _content; 574 Node[] _children; 575 576 Node[] children() @property 577 { 578 return _children; 579 } 580 581 void accept(ASTVisitor visitor) 582 { 583 if (_content) 584 visitor.visit(_content); 585 foreach (child; _children) 586 if (child) 587 visitor.visit(child); 588 } 589 590 void addChild(Node child) 591 { 592 _token.range[1] = child.token.range[1]; 593 _children ~= child; 594 } 595 596 Token token() @property 597 { 598 return _token; 599 } 600 601 bool translated() @property 602 { 603 return _translated != Token.init; 604 } 605 606 TagContents content() @property 607 { 608 return _content; 609 } 610 611 this(Token token, Token translated, TagContents content) 612 { 613 _token = token; 614 _translated = translated; 615 _content = content; 616 } 617 } 618 619 class Expression : AST 620 { 621 Token _token; 622 string _content; 623 624 Token token() @property 625 { 626 return _token; 627 } 628 629 void accept(ASTVisitor) 630 { 631 } 632 633 string content() @property 634 { 635 return _content; 636 } 637 638 this(Token token, string content) 639 { 640 _token = token; 641 _content = content; 642 } 643 644 override string toString() 645 { 646 return _content; 647 } 648 } 649 650 struct ASTParser 651 { 652 DietInput input; 653 Document root; 654 655 void parseDocument() 656 { 657 root = new Document(input.front); 658 while (true) 659 { 660 auto n = parseNode(); 661 if (!n) 662 break; 663 root.addChild(n); 664 } 665 input.expect(TokenType.eof); 666 } 667 668 Node parseNode() 669 { 670 auto past = input.save(); 671 auto val = parseNodeValue(); 672 if (val is null) 673 { 674 input = past; 675 return null; 676 } 677 if (input.skipAll(TokenType.newline) == 0 && input.front.type != TokenType.eof) 678 { 679 input = past; 680 return null; 681 } 682 if (input.peek(TokenType.indent)) 683 { 684 input.popFront(); 685 while (true) 686 { 687 auto n = parseNode(); 688 if (!n) 689 break; 690 val.addChild(n); 691 } 692 input.expect(TokenType.detent); 693 } 694 return val; 695 } 696 697 Node parseNodeValue() 698 { 699 if (auto comment = parseComment()) 700 return comment; 701 if (auto statement = parseDStatement()) 702 return statement; 703 if (auto filter = parseFilter()) 704 return filter; 705 if (auto nested = parseNestedTags()) 706 return nested; 707 return null; 708 } 709 710 string parseText(bool multiline, bool allowIndent) 711 { 712 string ret; 713 size_t indentation = 1; 714 bool lastNewline; 715 while (true) 716 { 717 auto v = input.front; 718 if (lastNewline && v.type != TokenType.indent) 719 break; 720 if (v.type == TokenType.eof) 721 break; 722 if (v.type == TokenType.newline && (indentation == 1 || !multiline)) 723 { 724 if (multiline) 725 { 726 lastNewline = true; 727 input.popFront; 728 } 729 else 730 break; 731 } 732 else 733 { 734 input.popFront; 735 if (v.type == TokenType.indent) 736 { 737 if (indentation > 1 && !allowIndent) 738 input.errors.error(input, v.range[0], 739 "Can't indent here because it is already indented."); 740 indentation++; 741 } 742 else if (v.type == TokenType.detent) 743 { 744 indentation--; 745 if (indentation == 0) 746 break; 747 } 748 else 749 { 750 ret ~= v.content; 751 } 752 } 753 } 754 return ret; 755 } 756 757 Comment parseComment() 758 { 759 auto tok = input.front; 760 if (input.matchText("//")) 761 { 762 bool hidden = input.front.content.startsWith("-"); 763 string content = parseText(true, true); 764 if (hidden) 765 content = content[1 .. $]; 766 tok.type = TokenType.raw; 767 tok.content = hidden ? "//-" : "//"; 768 tok.range[1] = input.front.range[0]; 769 return hidden ? new HiddenComment(tok, content) : new Comment(tok, content); 770 } 771 return null; 772 } 773 774 DStatement parseDStatement() 775 { 776 auto tok = input.front; 777 if (tok.content.startsWith("-")) 778 { 779 string content = parseText(false, false); 780 content = content[1 .. $]; 781 tok.content = "-"; 782 tok.type = TokenType.raw; 783 tok.range[1] = input.front.range[0]; 784 return new DStatement(tok, content); 785 } 786 return null; 787 } 788 789 DietFilter parseFilter() 790 { 791 auto startTok = input.front; 792 if (input.matchText(":")) 793 { 794 auto tok = input.front; 795 string name; 796 if (input.expect(TokenType.identifier)) 797 { 798 name = tok.content; 799 if (!name.validateIdentifierAlpha) 800 input.errors.expect(input, tok.range[0], "identifier of type [-_a-zA-Z][-_0-9a-zA-Z]*"); 801 } 802 input.match(TokenType.whitespace); 803 string text = parseText(true, true); 804 return new DietFilter(startTok, name, text); 805 } 806 return null; 807 } 808 809 NestedTags parseNestedTags() 810 { 811 if (auto comment = parseDoctype()) 812 return comment; 813 if (auto statement = parseXML()) 814 return statement; 815 if (auto filter = parsePipeText()) 816 return filter; 817 if (auto nested = parseTag(true)) 818 return nested; 819 return null; 820 } 821 822 NestedTags parseSingleTag() 823 { 824 if (auto comment = parseDoctype()) 825 return comment; 826 if (auto statement = parseXML()) 827 return statement; 828 if (auto filter = parsePipeText()) 829 return filter; 830 if (auto nested = parseTag(false)) 831 return nested; 832 return null; 833 } 834 835 TextLine parseTextLine() 836 { 837 DietInput fastForward(string inc, string dec) 838 { 839 auto c = input.save; 840 int depth = 0; 841 do 842 { 843 if (c.front.content == inc) 844 depth++; 845 else if (c.front.content == dec) 846 depth--; 847 c.popFront; 848 } 849 while (depth > 0 && c.front.type != TokenType.eof); 850 return c; 851 } 852 853 Token tok = input.front; 854 TextLine.Part[] parts; 855 string raw; 856 size_t[2] rawRange = input.index; 857 immutable size_t startIndex = input.front.range[0]; 858 859 void flushRaw() 860 { 861 if (raw.length) 862 parts ~= TextLine.Part(Token(TokenType.code, raw, rawRange), raw); 863 raw = ""; 864 } 865 866 while (!input.front.type.among!(TokenType.eof, TokenType.newline)) 867 { 868 auto v = input.front; 869 input.popFront; 870 if (v.content == "\\") 871 { 872 v = input.front; 873 input.popFront; 874 if (!v.content.among!("!", "\\", "#")) 875 input.errors.expect(input, v.range[0], "escaped !, \\ or #"); 876 if (!raw.length) 877 rawRange[0] = v.range[0]; 878 raw ~= v.content; 879 rawRange[1] = input.index; 880 } 881 else if (v.content == "!") 882 { 883 if (input.front.content == "{") 884 { 885 flushRaw(); 886 auto load = fastForward("{", "}"); 887 input.code.length = load.index; 888 auto start = v.range[0]; 889 input.popFront(); 890 auto expr = parseExpression; 891 auto end = input.index; 892 v.range = [start, end]; 893 parts ~= TextLine.Part(v, null, expr, null, true); 894 input = load; 895 } 896 else 897 { 898 if (!raw.length) 899 rawRange[0] = v.range[0]; 900 raw ~= v.content; 901 rawRange[1] = input.index; 902 } 903 } 904 else if (v.content == "#") 905 { 906 if (input.front.content == "{") 907 { 908 flushRaw(); 909 auto load = fastForward("{", "}"); 910 input.code.length = load.index; 911 auto start = v.range[0]; 912 input.popFront(); 913 auto expr = parseExpression; 914 auto end = input.index; 915 v.range = [start, end]; 916 parts ~= TextLine.Part(v, null, expr, null, true); 917 input = load; 918 } 919 else if (input.front.content == "[") 920 { 921 flushRaw(); 922 auto load = fastForward("[", "]"); 923 input.code.length = load.index; 924 auto start = v.range[0]; 925 input.popFront(); 926 auto tag = parseSingleTag; 927 auto end = input.index; 928 v.range = [start, end]; 929 parts ~= TextLine.Part(v, null, null, tag); 930 input = load; 931 } 932 else 933 { 934 if (!raw.length) 935 rawRange[0] = v.range[0]; 936 raw ~= v.content; 937 rawRange[1] = input.index; 938 } 939 } 940 else if (v.type != TokenType.newline) 941 { 942 if (!raw.length) 943 rawRange[0] = v.range[0]; 944 raw ~= v.content; 945 rawRange[1] = input.index; 946 } 947 else 948 break; 949 tok.range[1] = input.index; 950 } 951 flushRaw(); 952 if (!parts.length && startIndex == input.front.range[0]) 953 return null; 954 return new TextLine(tok, parts); 955 } 956 957 TagNode parseDoctype() 958 { 959 auto tok = input.front; 960 if (input.matchText("!!!")) 961 { 962 tok.content = "doctype"; 963 auto text = parseTextLine(); 964 return new TagNode(tok, text); 965 } 966 return null; 967 } 968 969 XMLNode parseXML() 970 { 971 if (input.front.content == "<") 972 { 973 auto v = parseTextLine(); 974 return new XMLNode(v); 975 } 976 return null; 977 } 978 979 PipeText parsePipeText() 980 { 981 auto tok = input.front; 982 if (tok.content == "|") 983 { 984 input.popFront; 985 Token translated; 986 if (input.front.content == "&") 987 { 988 translated = input.front; 989 input.popFront; 990 } 991 return new PipeText(tok, translated, parseTagContents()); 992 } 993 return null; 994 } 995 996 Token parseAttributeIdentifier() 997 { 998 import std.array : array; 999 import std.range : retro, chain; 1000 1001 Token tok; 1002 tok.type = TokenType.code; 1003 tok.range = input.front.range; 1004 string ret; 1005 char[] stack; 1006 Loop: while (!input.empty) 1007 { 1008 auto front = input.front; 1009 if (front.type != TokenType.identifier) 1010 { 1011 switch (front.content) 1012 { 1013 case ",": 1014 case "=": 1015 if (stack.length == 0) 1016 break Loop; 1017 break; 1018 case ")": 1019 case "]": 1020 case "}": 1021 if (stack.length && stack[$ - 1] != front.content[0]) 1022 { 1023 input.errors.expect(input, front.range[0], 1024 "'" ~ (cast(char[])(cast(ubyte[]) stack).retro.chain.array).idup ~ "' before ')'"); 1025 stack.length = 0; 1026 } 1027 if (stack.length == 0) 1028 break Loop; 1029 stack.length--; 1030 break; 1031 case "(": 1032 stack ~= ')'; 1033 break; 1034 case "[": 1035 stack ~= ']'; 1036 break; 1037 case "{": 1038 stack ~= '}'; 1039 break; 1040 case "\"": 1041 if (stack.length && stack[$ - 1] == '"') 1042 { 1043 stack.length--; 1044 } 1045 else 1046 { 1047 stack ~= '"'; 1048 } 1049 break; 1050 default: 1051 break; 1052 } 1053 } 1054 ret ~= front.content; 1055 input.popFront; 1056 } 1057 tok.range[1] = input.front.range[0]; 1058 tok.content = ret; 1059 return tok; 1060 } 1061 1062 Expression parseExpression() 1063 { 1064 import std.array : array; 1065 import std.range : retro, chain; 1066 1067 auto save = input.save; 1068 auto tok = input.front; 1069 string ret; 1070 char[] stack; 1071 bool escape = false; 1072 int level = 0; 1073 Loop: while (!input.empty) 1074 { 1075 auto front = input.front; 1076 if (front.type == TokenType.indent) 1077 level++; 1078 else if (front.type == TokenType.detent) 1079 level--; 1080 if (level < 0) 1081 { 1082 input = save; 1083 return null; 1084 } 1085 if (front.type == TokenType.raw) 1086 { 1087 if (stack.length && stack[$ - 1].among!('"', '\'')) 1088 { 1089 if (escape) 1090 { 1091 escape = false; 1092 } 1093 else 1094 { 1095 if (front.content == "\\") 1096 escape = true; 1097 else if (front.content.length == 1 && front.content[0] == stack[$ - 1]) 1098 stack.length--; 1099 } 1100 } 1101 else 1102 { 1103 switch (front.content) 1104 { 1105 case ",": 1106 if (stack.length == 0) 1107 break Loop; 1108 break; 1109 case ")": 1110 case "]": 1111 case "}": 1112 if (stack.length && stack[$ - 1] != front.content[0]) 1113 { 1114 input.errors.expect(input, front.range[0], 1115 "'" ~ (cast(char[])(cast(ubyte[]) stack).retro.chain.array).idup ~ "' before ')'"); 1116 stack.length = 0; 1117 } 1118 if (stack.length == 0) 1119 break Loop; 1120 stack.length--; 1121 break; 1122 case "(": 1123 stack ~= ')'; 1124 break; 1125 case "[": 1126 stack ~= ']'; 1127 break; 1128 case "{": 1129 stack ~= '}'; 1130 break; 1131 case "\"": 1132 stack ~= '"'; 1133 break; 1134 case "'": 1135 stack ~= '\''; 1136 break; 1137 default: 1138 break; 1139 } 1140 } 1141 } 1142 else 1143 escape = false; 1144 ret ~= front.content; 1145 input.popFront; 1146 } 1147 if (stack.length) 1148 input.errors.expect(input, input.front.range[0], 1149 "'" ~ (cast(char[])(cast(ubyte[]) stack).retro.chain.array).idup ~ "' before ')'"); 1150 tok.range[1] = input.front.range[0]; 1151 tok.content = input.code[tok.range[0] .. tok.range[1]]; 1152 return new Expression(tok, ret); 1153 } 1154 1155 TagNode parseTag(bool allowNested = true) 1156 { 1157 auto save = input.save; 1158 1159 auto tok = input.front; 1160 1161 Token[] classes, ids; 1162 1163 bool parseClassOrID() 1164 { 1165 Token combineIdentifier() 1166 { 1167 Token start = input.front; 1168 input.popFront; 1169 auto next = input.front; 1170 if (input.expect(TokenType.identifier)) 1171 { 1172 if (!validateIdentifier(next.content)) 1173 input.errors.expect(input, next.range[0], "identifier of type [-_0-9a-zA-Z]+"); 1174 start.content ~= next.content; 1175 start.range[1] = next.range[1]; 1176 } 1177 return start; 1178 } 1179 1180 if (input.front.content == ".") 1181 { 1182 classes ~= combineIdentifier(); 1183 return true; 1184 } 1185 else if (input.front.content == "#") 1186 { 1187 ids ~= combineIdentifier(); 1188 return true; 1189 } 1190 else 1191 return false; 1192 } 1193 1194 Token tag; 1195 bool match; 1196 if (tok.content == "." || tok.content == "#") 1197 { 1198 tag = tok; 1199 tag.range[1] = tag.range[0]; 1200 tag.type = TokenType.identifier; 1201 tag.content = "div"; 1202 while (parseClassOrID()) 1203 { 1204 } 1205 match = classes.length > 0 || ids.length > 0; 1206 } 1207 else if (tok.type == TokenType.identifier) 1208 { 1209 input.popFront; 1210 tag = tok; 1211 if (!tok.content.validateTagIdentifier) 1212 input.errors.expect(input, tok.range[0], "identifier of type [-:_0-9a-zA-Z]+"); 1213 while (parseClassOrID()) 1214 { 1215 } 1216 match = true; 1217 } 1218 1219 if (match) 1220 { 1221 auto ret = new TagNode(tag); 1222 ret.directIDs = ids; 1223 ret.directClasses = classes; 1224 ret.attributesRange[] = input.front.range[0]; 1225 if (input.matchText("(")) 1226 { 1227 ret.attributesRange[0]++; 1228 ret.attributesRange[1]++; 1229 auto lastValid = input.save; 1230 // don't store in lastValid after detent happened 1231 bool detented = false; 1232 TagNode.Attribute[] lastNonDetented; 1233 bool lastNonDetentedHadValue; 1234 if (input.peek(TokenType.identifier)) 1235 { 1236 lastValid = input.save; 1237 bool errored; 1238 while (input.front.content != ")") 1239 { 1240 input.skipAllWhiteGetDetent(detented); 1241 const wasDetented = detented; 1242 auto identifier = parseAttributeIdentifier(); 1243 Expression value; 1244 bool validKey; 1245 if (!identifier.content.length) 1246 errored = true; 1247 else if (!detented) 1248 { 1249 lastValid = input.save; 1250 validKey = true; 1251 } 1252 input.skipAllWhiteGetDetent(detented); 1253 bool empty; 1254 if (input.matchText("=")) 1255 { 1256 if (!detented) 1257 lastValid = input.save; 1258 input.skipAllWhiteGetDetent(detented); 1259 value = parseExpression(); 1260 if (value !is null) 1261 { 1262 if (!detented) 1263 lastValid = input.save; 1264 } 1265 else 1266 errored = true; 1267 } 1268 else if (input.matchText(",")) 1269 { 1270 if (!detented) 1271 lastValid = input.save; 1272 input.skipAllWhiteGetDetent(detented); 1273 } 1274 else 1275 { 1276 if (!identifier.content.length) 1277 break; 1278 empty = true; 1279 } 1280 1281 if (!errored) 1282 { 1283 if (!detented) 1284 lastValid = input.save; 1285 input.skipAllWhiteGetDetent(detented); 1286 } 1287 1288 ret.attributes ~= TagNode.Attribute(identifier, value); 1289 if (!detented) 1290 lastNonDetented = ret.attributes; 1291 1292 if (!wasDetented && detented && validKey) 1293 { 1294 lastNonDetented = ret.attributes; 1295 lastNonDetentedHadValue = false; 1296 } 1297 1298 if (empty && !input.front.content.among!(")", ",")) 1299 { 1300 errored = true; 1301 break; 1302 } 1303 1304 if (input.front.content == ",") 1305 input.popFront(); 1306 } 1307 ret.attributesRange[1] = input.front.range[0]; 1308 if (errored) 1309 { 1310 lastValid.errors = input.errors; 1311 ret.attributes = lastNonDetented; 1312 input = lastValid; 1313 } 1314 } 1315 if (!input.expect(TokenType.raw, ")") && detented) 1316 { 1317 lastValid.errors = input.errors; 1318 ret.attributes = lastNonDetented; 1319 input = lastValid; 1320 } 1321 if (detented && !lastNonDetentedHadValue && ret.attributes.length) 1322 ret.attributes[$ - 1].expr = null; 1323 } 1324 if (input.front.content == "<") 1325 { 1326 ret._fitInside = input.front; 1327 input.popFront; 1328 } 1329 else if (input.front.content == ">") 1330 { 1331 auto tmp = input.front; 1332 ret._fitOutside = tmp; 1333 input.popFront; 1334 if (input.front.content == "<") 1335 { 1336 tmp = input.front; 1337 input.popFront; 1338 ret._fitInside = tmp; 1339 } 1340 } 1341 if (input.front.content == "&") 1342 { 1343 ret._translated = input.front; 1344 input.popFront; 1345 } 1346 1347 if (input.front.content == ".") 1348 { 1349 ret._contents = parseTextBlock(); 1350 } 1351 else if (allowNested && input.front.content == ":") 1352 { 1353 input.popFront; 1354 input.skipAll(TokenType.whitespace); 1355 ret._contents = parseNestedTags(); 1356 } 1357 else 1358 { 1359 ret._contents = parseTagContents(); 1360 } 1361 1362 ret._token.range[1] = input.indexEOL; 1363 return ret; 1364 } 1365 else 1366 { 1367 input = save; 1368 return null; 1369 } 1370 } 1371 1372 StringTagContents parseTextBlock() 1373 { 1374 Token front = input.front; 1375 if (front.content == ".") 1376 { 1377 input.popFront; 1378 string content; 1379 input.skipAll(TokenType.identifier, TokenType.raw, TokenType.whitespace); 1380 if (input.expect(TokenType.newline)) 1381 { 1382 input.skipAll(TokenType.newline); 1383 if (input.expect(TokenType.indent)) 1384 { 1385 content = parseText(true, true); 1386 input.expect(TokenType.detent); 1387 } 1388 } 1389 front.range[1] = input.indexEOL; 1390 return new StringTagContents(front, content); 1391 } 1392 return null; 1393 } 1394 1395 TagContents parseTagContents() 1396 { 1397 if (auto assignment = parseAssignment()) 1398 return assignment; 1399 if (input.peek(TokenType.newline)) 1400 return null; 1401 input.match(TokenType.whitespace); 1402 return parseTextLine(); 1403 } 1404 1405 Assignment parseAssignment() 1406 { 1407 auto tok = input.front; 1408 if (input.matchText("!=")) 1409 { 1410 string content = parseText(false, false); 1411 tok.content = "!="; 1412 tok.range[1] = input.front.range[0]; 1413 return new RawAssignment(tok, content); 1414 } 1415 else if (input.matchText("=")) 1416 { 1417 string content = parseText(false, false); 1418 tok.range[1] = input.front.range[0]; 1419 return new Assignment(tok, content); 1420 } 1421 return null; 1422 } 1423 1424 /// Searches for a path of AST nodes lying within the specified offset. 1425 /// Params: 1426 /// offset = The cursor position to search AST nodes in. 1427 /// inclusiveStart = true if an AST [1 .. 3] should be matched for index 1. 1428 /// inclusiveEnd = true if an AST [1 .. 3] should be matched for index 3. 1429 /// Returns: A path of AST nodes starting at the broadest object (Document) down to the finest object. 1430 AST[] searchAST(size_t offset, bool inclusiveStart = true, bool inclusiveEnd = true) 1431 out(r; r.length > 0) 1432 { 1433 AST[] ret = [root]; 1434 1435 root.traverse((AST node, AST parent) { 1436 if (ret[$ - 1] != parent) 1437 return VisitResult.return_; 1438 auto range = node.token.range; 1439 if (!offset.withinRange(range, inclusiveStart, inclusiveEnd)) 1440 return VisitResult.continue_; 1441 ret ~= node; 1442 return VisitResult.recurse; 1443 }); 1444 1445 return ret; 1446 } 1447 } 1448 1449 bool withinRange(size_t offset, size_t[2] range, bool inclusiveStart = true, bool inclusiveEnd = true) 1450 { 1451 if (inclusiveStart && inclusiveEnd) 1452 return offset >= range[0] && offset <= range[1]; 1453 else if (inclusiveStart) 1454 return offset >= range[0] && offset < range[1]; 1455 else if (inclusiveEnd) 1456 return offset > range[0] && offset <= range[1]; 1457 else 1458 return offset > range[0] && offset < range[1]; 1459 } 1460 1461 void skipAllWhiteGetDetent(ref DietInput input, ref bool detented) 1462 { 1463 auto c = input.skipAllCount(TokenType.whitespace, TokenType.detent, 1464 TokenType.indent, TokenType.newline); 1465 if (c[1]) 1466 detented = true; 1467 } 1468 1469 private void assertToken(Token token, TokenType type, string content) 1470 { 1471 assert(token.type == type); 1472 assert(token.content == content); 1473 } 1474 1475 private void assertToken(Token token, TokenType type, string content, size_t[2] range) 1476 { 1477 assert(token.type == type); 1478 assert(token.content == content); 1479 assert(token.range == range); 1480 } 1481 1482 unittest 1483 { 1484 DietInput input; 1485 input.file = "stdin"; 1486 input.code = q{doctype html 1487 html 1488 1489 }; 1490 1491 auto parser = new ASTParser; 1492 parser.input = input.save; 1493 parser.parseDocument(); 1494 1495 assert(parser.input.errors.length == 0); 1496 1497 assert(parser.root); 1498 assert(parser.root.token.range == [0, 17]); 1499 assert(parser.root.children.length == 2); 1500 1501 auto doctype = cast(TagNode) parser.root.children[0]; 1502 auto html = cast(TagNode) parser.root.children[1]; 1503 1504 assert(doctype); 1505 assert(html); 1506 1507 doctype.tag.assertToken(TokenType.identifier, "doctype"); 1508 html.tag.assertToken(TokenType.identifier, "html"); 1509 } 1510 1511 unittest 1512 { 1513 DietInput input; 1514 input.file = "stdin"; 1515 input.code = `foo 1516 .bar1 text1 1517 .bar2 text2 1518 `; 1519 1520 auto parser = new ASTParser; 1521 parser.input = input.save; 1522 parser.parseDocument(); 1523 1524 assert(parser.input.errors.length == 0); 1525 1526 assert(parser.root); 1527 assert(parser.root.children.length == 1); 1528 auto root = cast(TagNode) parser.root.children[0]; 1529 assert(root); 1530 root.tag.assertToken(TokenType.identifier, "foo"); 1531 assert(root.children.length == 2); 1532 1533 auto bar1 = cast(TagNode) root.children[0]; 1534 auto bar2 = cast(TagNode) root.children[1]; 1535 assert(bar1); 1536 assert(bar2); 1537 1538 bar1.tag.assertToken(TokenType.identifier, "div", [5, 5]); 1539 bar2.tag.assertToken(TokenType.identifier, "div", [18, 18]); 1540 1541 assert(cast(TextLine) bar1.contents, 1542 "Expected string contents but got " ~ bar1.contents.to!string); 1543 assert(cast(TextLine) bar2.contents, 1544 "Expected string contents but got " ~ bar2.contents.to!string); 1545 1546 assert((cast(TextLine) bar1.contents)._parts.length == 1); 1547 assert((cast(TextLine) bar2.contents)._parts.length == 1); 1548 assert((cast(TextLine) bar1.contents)._parts[0].raw == "text1"); 1549 assert((cast(TextLine) bar2.contents)._parts[0].raw == "text2"); 1550 } 1551 1552 unittest 1553 { 1554 DietInput input; 1555 input.file = "stdin"; 1556 input.code = `foo 1557 - int item = 3; 1558 p #{item.foo} bar 1559 `; 1560 1561 auto parser = new ASTParser; 1562 parser.input = input.save; 1563 parser.parseDocument(); 1564 1565 assert(parser.input.errors.length == 0); 1566 1567 assert(parser.root); 1568 assert(parser.root.children.length == 1); 1569 auto root = cast(TagNode) parser.root.children[0]; 1570 assert(root); 1571 root.tag.assertToken(TokenType.identifier, "foo"); 1572 assert(root.children.length == 2); 1573 1574 auto code = cast(DStatement) root.children[0]; 1575 auto paragraph = cast(TagNode) root.children[1]; 1576 assert(code); 1577 assert(paragraph); 1578 1579 assert(code.content == " int item = 3;"); 1580 paragraph.tag.assertToken(TokenType.identifier, "p"); 1581 1582 auto content = cast(TextLine) paragraph.contents; 1583 assert(content); 1584 assert(content._parts.length == 2); 1585 assert(content._parts[0].inlineExpr); 1586 assert(content._parts[0].inlineExpr.token.range[0] == 26); 1587 assert(content._parts[0].inlineExpr.token.range[1] == 34); 1588 assert(content._parts[0].inlineExpr.token.content == "item.foo"); 1589 assert(content._parts[0].inlineExpr.content == "item.foo"); 1590 assert(content._parts[1].raw == " bar"); 1591 } 1592 1593 unittest 1594 { 1595 DietInput input; 1596 input.file = "stdin"; 1597 input.code = `//-foo`; 1598 1599 auto parser = new ASTParser; 1600 parser.input = input.save; 1601 parser.parseDocument(); 1602 1603 assert(parser.input.errors.length == 0); 1604 1605 assert(parser.root); 1606 assert(parser.root.children.length == 1); 1607 auto root = cast(HiddenComment) parser.root.children[0]; 1608 assert(root); 1609 assertToken(root.token, TokenType.raw, "//-"); 1610 assert(root.content == "foo"); 1611 } 1612 1613 unittest 1614 { 1615 DietInput input; 1616 input.file = "stdin"; 1617 input.code = `-foo`; 1618 1619 auto parser = new ASTParser; 1620 parser.input = input.save; 1621 parser.parseDocument(); 1622 1623 assert(parser.input.errors.length == 0); 1624 1625 assert(parser.root); 1626 assert(parser.root.children.length == 1); 1627 auto root = cast(DStatement) parser.root.children[0]; 1628 assert(root); 1629 assertToken(root.token, TokenType.raw, "-"); 1630 assert(root.content == "foo"); 1631 }