import 'package:string_scanner/string_scanner.dart'; import '../ast/ast.dart'; final RegExp _whitespace = new RegExp(r'[ \n\r\t]+'); final RegExp _string1 = new RegExp( r"'((\\(['\\/bfnrt]|(u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])))|([^'\\]))*'"); final RegExp _string2 = new RegExp( r'"((\\(["\\/bfnrt]|(u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])))|([^"\\]))*"'); Scanner scan(String text, {sourceUrl}) => new _Scanner(text, sourceUrl)..scan(); abstract class Scanner { List get errors; List get tokens; } final Map _htmlPatterns = { '{{': TokenType.doubleCurlyL, '{{-': TokenType.doubleCurlyL, // new RegExp(r''): TokenType.htmlComment, '!DOCTYPE': TokenType.doctype, '!doctype': TokenType.doctype, '<': TokenType.lt, '>': TokenType.gt, '/': TokenType.slash, '=': TokenType.equals, _string1: TokenType.string, _string2: TokenType.string, new RegExp(r']*>[^$]*'): TokenType.script_tag, new RegExp(r'([A-Za-z][A-Za-z0-9]*-)*([A-Za-z][A-Za-z0-9]*)'): TokenType.id, }; final Map _expressionPatterns = { '}}': TokenType.doubleCurlyR, // Keywords 'new': TokenType.$new, // Misc. '*': TokenType.asterisk, ':': TokenType.colon, ',': TokenType.comma, '.': TokenType.dot, '=': TokenType.equals, '-': TokenType.minus, '%': TokenType.percent, '+': TokenType.plus, '[': TokenType.lBracket, ']': TokenType.rBracket, '{': TokenType.lCurly, '}': TokenType.rCurly, '(': TokenType.lParen, ')': TokenType.rParen, '/': TokenType.slash, '<': TokenType.lt, '<=': TokenType.lte, '>': TokenType.gt, '>=': TokenType.gte, '==': TokenType.equ, '!=': TokenType.nequ, '=': TokenType.equals, new RegExp(r'-?[0-9]+(\.[0-9]+)?([Ee][0-9]+)?'): TokenType.number, new RegExp(r'0x[A-Fa-f0-9]+'): TokenType.hex, _string1: TokenType.string, _string2: TokenType.string, new RegExp('[A-Za-z_\\\$][A-Za-z0-9_\\\$]*'): TokenType.id, }; class _Scanner implements Scanner { final List errors = []; final List tokens = []; SpanScanner _scanner; _Scanner(String text, sourceUrl) { _scanner = new SpanScanner(text, sourceUrl: sourceUrl); } Token _scanFrom(Map patterns, [LineScannerState textStart]) { var potential = []; patterns.forEach((pattern, type) { if (_scanner.matches(pattern)) potential.add(new Token(type, _scanner.lastSpan)); }); if (potential.isEmpty) return null; if (textStart != null) { var span = _scanner.spanFrom(textStart); tokens.add(new Token(TokenType.text, span)); } potential.sort((a, b) => b.span.length.compareTo(a.span.length)); var token = potential.first; tokens.add(token); _scanner.scan(token.span.text); return token; } void scan() { while (!_scanner.isDone) scanHtmlTokens(); } void scanHtmlTokens() { LineScannerState textStart; while (!_scanner.isDone) { var state = _scanner.state; // Skip whitespace conditionally if (textStart == null) { _scanner.scan(_whitespace); } var lastToken = _scanFrom(_htmlPatterns, textStart); if (lastToken?.type == TokenType.equals) { textStart = null; scanExpressionTokens(); return; } else if (lastToken?.type == TokenType.doubleCurlyL) { textStart = null; scanExpressionTokens(true); return; } else if (lastToken?.type == TokenType.id && tokens.length >= 2 && tokens[tokens.length - 2].type == TokenType.gt) { // Fold in the ID into a text node... tokens.removeLast(); textStart = state; } else if (lastToken?.type == TokenType.id && tokens.length >= 2 && tokens[tokens.length - 2].type == TokenType.text) { // Append the ID into the old text node tokens.removeLast(); tokens.removeLast(); // Not sure how, but the following logic seems to occur // automatically: // //var textToken = tokens.removeLast(); //var newSpan = textToken.span.expand(lastToken.span); //tokens.add(new Token(TokenType.text, newSpan)); } else if (lastToken != null) { textStart = null; } else if (!_scanner.isDone ?? lastToken == null) { textStart ??= state; _scanner.readChar(); } } if (textStart != null) { var span = _scanner.spanFrom(textStart); tokens.add(new Token(TokenType.text, span)); } } void scanExpressionTokens([bool allowGt = false]) { Token lastToken; do { _scanner.scan(_whitespace); lastToken = _scanFrom(_expressionPatterns); } while (!_scanner.isDone && lastToken != null && lastToken.type != TokenType.doubleCurlyR && (allowGt || lastToken.type != TokenType.gt)); } }