import 'package:string_scanner/string_scanner.dart';
import '../ast/ast.dart';
final RegExp _whitespace = new RegExp(r'[ \n\r\t]+');
final RegExp _string1 = new RegExp(
final RegExp _string2 = new RegExp(
Scanner scan(String text, {sourceUrl}) => new _Scanner(text, sourceUrl)..scan();
abstract class Scanner {
List<JaelError> get errors;
List<Token> get tokens;
final Map<Pattern, TokenType> _htmlPatterns = {
'{{': TokenType.doubleCurlyL,
'{{-': TokenType.doubleCurlyL,
new RegExp(r'<!--[^$]*-->'): TokenType.htmlComment,
'!DOCTYPE': TokenType.doctype,
'!doctype': TokenType.doctype,
'<': TokenType.lt,
'>': TokenType.gt,
'/': TokenType.slash,
'=': TokenType.equals,
_string1: TokenType.string,
_string2: TokenType.string,
new RegExp(r'<script[^>]*>[^$]*</script>'): TokenType.script_tag,
new RegExp(r'([A-Za-z][A-Za-z0-9]*-)*([A-Za-z][A-Za-z0-9]*)'): TokenType.id,
final Map<Pattern, TokenType> _expressionPatterns = {
'}}': TokenType.doubleCurlyR,
// Keywords
'new': TokenType.$new,
// Misc.
'*': TokenType.asterisk,
':': TokenType.colon,
',': TokenType.comma,
'.': TokenType.dot,
'=': TokenType.equals,
'-': TokenType.minus,
'%': TokenType.percent,
'+': TokenType.plus,
'[': TokenType.lBracket,
']': TokenType.rBracket,
'{': TokenType.lCurly,
'}': TokenType.rCurly,
'(': TokenType.lParen,
')': TokenType.rParen,
'/': TokenType.slash,
'<': TokenType.lt,
'<=': TokenType.lte,
'>': TokenType.gt,
'>=': TokenType.gte,
'==': TokenType.equ,
'!=': TokenType.nequ,
'=': TokenType.equals,
new RegExp(r'-?[0-9]+(\.[0-9]+)?([Ee][0-9]+)?'): TokenType.number,
new RegExp(r'0x[A-Fa-f0-9]+'): TokenType.hex,
_string1: TokenType.string,
_string2: TokenType.string,
new RegExp('[A-Za-z_\\\$][A-Za-z0-9_\\\$]*'): TokenType.id,
class _Scanner implements Scanner {
final List<JaelError> errors = [];
final List<Token> tokens = [];
SpanScanner _scanner;
_Scanner(String text, sourceUrl) {
_scanner = new SpanScanner(text, sourceUrl: sourceUrl);
Token _scanFrom(Map<Pattern, TokenType> patterns,
[LineScannerState textStart]) {
var potential = <Token>[];
patterns.forEach((pattern, type) {
if (_scanner.matches(pattern))
potential.add(new Token(type, _scanner.lastSpan));
if (potential.isEmpty) return null;
if (textStart != null) {
var span = _scanner.spanFrom(textStart);
tokens.add(new Token(TokenType.text, span));
potential.sort((a, b) => b.span.length.compareTo(a.span.length));
var token = potential.first;
return token;
void scan() {
while (!_scanner.isDone) scanHtmlTokens();
void scanHtmlTokens() {
LineScannerState textStart;
while (!_scanner.isDone) {
var state = _scanner.state;
// Skip whitespace conditionally
if (textStart == null) {
var lastToken = _scanFrom(_htmlPatterns, textStart);
if (lastToken?.type == TokenType.equals) {
textStart = null;
} else if (lastToken?.type == TokenType.doubleCurlyL) {
textStart = null;
} else if (lastToken?.type == TokenType.id &&
tokens.length >= 2 &&
tokens[tokens.length - 2].type == TokenType.gt) {
// Fold in the ID into a text node...
textStart = state;
} else if (lastToken?.type == TokenType.id &&
tokens.length >= 2 &&
tokens[tokens.length - 2].type == TokenType.text) {
// Append the ID into the old text node
// Not sure how, but the following logic seems to occur
// automatically:
//var textToken = tokens.removeLast();
//var newSpan = textToken.span.expand(lastToken.span);
//tokens.add(new Token(TokenType.text, newSpan));
} else if (lastToken != null) {
textStart = null;
} else if (!_scanner.isDone ?? lastToken == null) {
textStart ??= state;
if (textStart != null) {
var span = _scanner.spanFrom(textStart);
tokens.add(new Token(TokenType.text, span));
void scanExpressionTokens([bool allowGt = false]) {
Token lastToken;
do {
lastToken = _scanFrom(_expressionPatterns);
} while (!_scanner.isDone &&
lastToken != null &&
lastToken.type != TokenType.doubleCurlyR &&
(allowGt || lastToken.type != TokenType.gt));