Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 13bc8af

Browse files
committedApr 21, 2015
Model lexer: Fix remaining issues
1 parent e5e343a commit 13bc8af

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+102
-179
lines changed
 

‎src/grammar/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ javac *.java
1212
rustc -O verify.rs
1313
for file in ../*/**.rs; do
1414
echo $file;
15-
grun RustLexer tokens -tokens < $file | ./verify $file RustLexer.tokens || break
15+
grun RustLexer tokens -tokens < "$file" | ./verify "$file" RustLexer.tokens || break
1616
done
1717
```
1818

‎src/grammar/RustLexer.g4

Lines changed: 47 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
lexer grammar RustLexer;
22

3+
@lexer::members {
4+
public boolean is_at(int pos) {
5+
return _input.index() == pos;
6+
}
7+
}
8+
9+
310
tokens {
411
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
512
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
@@ -8,7 +15,7 @@ tokens {
815
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
916
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
1017
LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
11-
COMMENT
18+
COMMENT, SHEBANG
1219
}
1320

1421
import xidstart , xidcontinue;
@@ -86,94 +93,63 @@ fragment CHAR_ESCAPE
8693
| [xX] HEXIT HEXIT
8794
| 'u' HEXIT HEXIT HEXIT HEXIT
8895
| 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
96+
| 'u{' HEXIT '}'
97+
| 'u{' HEXIT HEXIT '}'
98+
| 'u{' HEXIT HEXIT HEXIT '}'
99+
| 'u{' HEXIT HEXIT HEXIT HEXIT '}'
100+
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
101+
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
89102
;
90103
91104
fragment SUFFIX
92105
: IDENT
93106
;
94107
108+
fragment INTEGER_SUFFIX
109+
: { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
110+
;
111+
95112
LIT_CHAR
96-
: '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] | '\ud800' .. '\udbff' '\udc00' .. '\udfff' ) '\'' SUFFIX?
113+
: '\'' ( '\\' CHAR_ESCAPE
114+
| ~[\\'\n\t\r]
115+
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
116+
)
117+
'\'' SUFFIX?
97118
;
98119

99120
LIT_BYTE
100-
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX?
121+
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
122+
| [nrt\\'"0] )
123+
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
124+
)
125+
'\'' SUFFIX?
101126
;
102127

103128
LIT_INTEGER
104-
: [0-9][0-9_]* SUFFIX?
105-
| '0b' [01][01_]* SUFFIX?
106-
| '0o' [0-7][0-7_]* SUFFIX?
107-
| '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX?
129+
130+
: [0-9][0-9_]* INTEGER_SUFFIX?
131+
| '0b' [01_]+ INTEGER_SUFFIX?
132+
| '0o' [0-7_]+ INTEGER_SUFFIX?
133+
| '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
108134
;
109135

110136
LIT_FLOAT
111137
: [0-9][0-9_]* ('.' {
112-
/* dot followed by another dot is a range, no float */
138+
/* dot followed by another dot is a range, not a float */
113139
_input.LA(1) != '.' &&
114-
/* dot followed by an identifier is an integer with a function call, no float */
140+
/* dot followed by an identifier is an integer with a function call, not a float */
115141
_input.LA(1) != '_' &&
116-
_input.LA(1) != 'a' &&
117-
_input.LA(1) != 'b' &&
118-
_input.LA(1) != 'c' &&
119-
_input.LA(1) != 'd' &&
120-
_input.LA(1) != 'e' &&
121-
_input.LA(1) != 'f' &&
122-
_input.LA(1) != 'g' &&
123-
_input.LA(1) != 'h' &&
124-
_input.LA(1) != 'i' &&
125-
_input.LA(1) != 'j' &&
126-
_input.LA(1) != 'k' &&
127-
_input.LA(1) != 'l' &&
128-
_input.LA(1) != 'm' &&
129-
_input.LA(1) != 'n' &&
130-
_input.LA(1) != 'o' &&
131-
_input.LA(1) != 'p' &&
132-
_input.LA(1) != 'q' &&
133-
_input.LA(1) != 'r' &&
134-
_input.LA(1) != 's' &&
135-
_input.LA(1) != 't' &&
136-
_input.LA(1) != 'u' &&
137-
_input.LA(1) != 'v' &&
138-
_input.LA(1) != 'w' &&
139-
_input.LA(1) != 'x' &&
140-
_input.LA(1) != 'y' &&
141-
_input.LA(1) != 'z' &&
142-
_input.LA(1) != 'A' &&
143-
_input.LA(1) != 'B' &&
144-
_input.LA(1) != 'C' &&
145-
_input.LA(1) != 'D' &&
146-
_input.LA(1) != 'E' &&
147-
_input.LA(1) != 'F' &&
148-
_input.LA(1) != 'G' &&
149-
_input.LA(1) != 'H' &&
150-
_input.LA(1) != 'I' &&
151-
_input.LA(1) != 'J' &&
152-
_input.LA(1) != 'K' &&
153-
_input.LA(1) != 'L' &&
154-
_input.LA(1) != 'M' &&
155-
_input.LA(1) != 'N' &&
156-
_input.LA(1) != 'O' &&
157-
_input.LA(1) != 'P' &&
158-
_input.LA(1) != 'Q' &&
159-
_input.LA(1) != 'R' &&
160-
_input.LA(1) != 'S' &&
161-
_input.LA(1) != 'T' &&
162-
_input.LA(1) != 'U' &&
163-
_input.LA(1) != 'V' &&
164-
_input.LA(1) != 'W' &&
165-
_input.LA(1) != 'X' &&
166-
_input.LA(1) != 'Y' &&
167-
_input.LA(1) != 'Z'
142+
!(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
143+
!(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
168144
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
169145
;
170146

171147
LIT_STR
172148
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
173149
;
174150

175-
LIT_BINARY : 'b' LIT_STR SUFFIX?;
176-
LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?;
151+
LIT_BINARY : 'b' LIT_STR ;
152+
LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
177153

178154
/* this is a bit messy */
179155

@@ -201,13 +177,19 @@ LIFETIME : '\'' IDENT ;
201177

202178
WHITESPACE : [ \r\n\t]+ ;
203179

204-
UNDOC_COMMENT : '////' ~[\r\n]* -> type(COMMENT) ;
180+
UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
205181
YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
206182
OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
207-
LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ;
183+
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
208184

209185
DOC_BLOCK_COMMENT
210186
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
211187
;
212188

213189
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
190+
191+
/* these appear at the beginning of a file */
192+
193+
SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
194+
195+
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;

0 commit comments

Comments
 (0)