1
1
lexer grammar RustLexer;
2
2
3
+ @lexer::members {
4
+ public boolean is_at(int pos) {
5
+ return _input.index() == pos;
6
+ }
7
+ }
8
+
9
+
3
10
tokens {
4
11
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
5
12
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
@@ -8,7 +15,7 @@ tokens {
8
15
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
9
16
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
10
17
LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
11
- COMMENT
18
+ COMMENT, SHEBANG
12
19
}
13
20
14
21
import xidstart , xidcontinue;
@@ -86,94 +93,63 @@ fragment CHAR_ESCAPE
86
93
| [xX] HEXIT HEXIT
87
94
| ' u' HEXIT HEXIT HEXIT HEXIT
88
95
| ' U ' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
96
+ | ' u{' HEXIT ' } '
97
+ | ' u{' HEXIT HEXIT ' } '
98
+ | ' u{' HEXIT HEXIT HEXIT ' } '
99
+ | ' u{' HEXIT HEXIT HEXIT HEXIT ' } '
100
+ | ' u{' HEXIT HEXIT HEXIT HEXIT HEXIT ' } '
101
+ | ' u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT ' } '
89
102
;
90
103
91
104
fragment SUFFIX
92
105
: IDENT
93
106
;
94
107
108
+ fragment INTEGER_SUFFIX
109
+ : { _input.LA(1) != ' e' && _input.LA(1) != ' E ' }? SUFFIX
110
+ ;
111
+
95
112
LIT_CHAR
96
- : ' \' ' ( ' \\ ' CHAR_ESCAPE | ~[\\' \n\t\r ] | ' \ud800' .. ' \udbff' ' \udc00' .. ' \udfff' ) ' \' ' SUFFIX ?
113
+ : ' \' ' ( ' \\ ' CHAR_ESCAPE
114
+ | ~[\\' \n\t\r ]
115
+ | ' \ud800' .. ' \udbff' ' \udc00' .. ' \udfff'
116
+ )
117
+ ' \' ' SUFFIX ?
97
118
;
98
119
99
120
LIT_BYTE
100
- : ' b\' ' ( ' \\ ' ( [xX] HEXIT HEXIT | [nrt\\' "0] ) | ~[\\ ' \n\t\r] ) ' \' ' SUFFIX ?
121
+ : ' b\' ' ( ' \\ ' ( [xX] HEXIT HEXIT
122
+ | [nrt\\' "0] )
123
+ | ~[\\ ' \n\t\r] ' \udc00 ' ..' \udfff ' ?
124
+ )
125
+ ' \' ' SUFFIX ?
101
126
;
102
127
103
128
LIT_INTEGER
104
- : [0-9][0-9_]* SUFFIX ?
105
- | ' 0b' [01][01_]* SUFFIX ?
106
- | ' 0o' [0-7][0-7_]* SUFFIX ?
107
- | ' 0x' [0-9a-fA-F ][0-9a-fA-F_ ]* SUFFIX ?
129
+
130
+ : [0-9][0-9_]* INTEGER_SUFFIX ?
131
+ | ' 0b' [01_]+ INTEGER_SUFFIX ?
132
+ | ' 0o' [0-7_]+ INTEGER_SUFFIX ?
133
+ | ' 0x' [0-9a-fA-F_ ]+ INTEGER_SUFFIX ?
108
134
;
109
135
110
136
LIT_FLOAT
111
137
: [0-9][0-9_]* (' .' {
112
- /* dot followed by another dot is a range, no float */
138
+ /* dot followed by another dot is a range, not a float */
113
139
_input.LA(1 ) != ' .' &&
114
- /* dot followed by an identifier is an integer with a function call, no float */
140
+ /* dot followed by an identifier is an integer with a function call, not a float */
115
141
_input.LA(1 ) != ' _' &&
116
- _input.LA(1 ) != ' a' &&
117
- _input.LA(1 ) != ' b' &&
118
- _input.LA(1 ) != ' c' &&
119
- _input.LA(1 ) != ' d' &&
120
- _input.LA(1 ) != ' e' &&
121
- _input.LA(1 ) != ' f' &&
122
- _input.LA(1 ) != ' g' &&
123
- _input.LA(1 ) != ' h' &&
124
- _input.LA(1 ) != ' i' &&
125
- _input.LA(1 ) != ' j' &&
126
- _input.LA(1 ) != ' k' &&
127
- _input.LA(1 ) != ' l' &&
128
- _input.LA(1 ) != ' m' &&
129
- _input.LA(1 ) != ' n' &&
130
- _input.LA(1 ) != ' o' &&
131
- _input.LA(1 ) != ' p' &&
132
- _input.LA(1 ) != ' q' &&
133
- _input.LA(1 ) != ' r' &&
134
- _input.LA(1 ) != ' s' &&
135
- _input.LA(1 ) != ' t' &&
136
- _input.LA(1 ) != ' u' &&
137
- _input.LA(1 ) != ' v' &&
138
- _input.LA(1 ) != ' w' &&
139
- _input.LA(1 ) != ' x' &&
140
- _input.LA(1 ) != ' y' &&
141
- _input.LA(1 ) != ' z' &&
142
- _input.LA(1 ) != ' A' &&
143
- _input.LA(1 ) != ' B' &&
144
- _input.LA(1 ) != ' C' &&
145
- _input.LA(1 ) != ' D' &&
146
- _input.LA(1 ) != ' E' &&
147
- _input.LA(1 ) != ' F' &&
148
- _input.LA(1 ) != ' G' &&
149
- _input.LA(1 ) != ' H' &&
150
- _input.LA(1 ) != ' I' &&
151
- _input.LA(1 ) != ' J' &&
152
- _input.LA(1 ) != ' K' &&
153
- _input.LA(1 ) != ' L' &&
154
- _input.LA(1 ) != ' M' &&
155
- _input.LA(1 ) != ' N' &&
156
- _input.LA(1 ) != ' O' &&
157
- _input.LA(1 ) != ' P' &&
158
- _input.LA(1 ) != ' Q' &&
159
- _input.LA(1 ) != ' R' &&
160
- _input.LA(1 ) != ' S' &&
161
- _input.LA(1 ) != ' T' &&
162
- _input.LA(1 ) != ' U' &&
163
- _input.LA(1 ) != ' V' &&
164
- _input.LA(1 ) != ' W' &&
165
- _input.LA(1 ) != ' X' &&
166
- _input.LA(1 ) != ' Y' &&
167
- _input.LA(1 ) != ' Z'
142
+ !(_input.LA(1 ) >= ' a' && _input.LA(1 ) <= ' z' ) &&
143
+ !(_input.LA(1 ) >= ' A' && _input.LA(1 ) <= ' Z' )
168
144
} ? | (' .' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX ?)
169
145
;
170
146
171
147
LIT_STR
172
148
: ' "' (' \\\n ' | ' \\\r\n ' | ' \\ ' CHAR_ESCAPE | .)*? ' "' SUFFIX ?
173
149
;
174
150
175
- LIT_BINARY : ' b' LIT_STR SUFFIX ? ;
176
- LIT_BINARY_RAW : ' rb ' LIT_STR_RAW SUFFIX ? ;
151
+ LIT_BINARY : ' b' LIT_STR ;
152
+ LIT_BINARY_RAW : ' b ' LIT_STR_RAW ;
177
153
178
154
/* this is a bit messy */
179
155
@@ -201,13 +177,19 @@ LIFETIME : '\'' IDENT ;
201
177
202
178
WHITESPACE : [ \r\n\t]+ ;
203
179
204
- UNDOC_COMMENT : ' ////' ~[\r\ n]* -> type(COMMENT ) ;
180
+ UNDOC_COMMENT : ' ////' ~[\n]* -> type(COMMENT ) ;
205
181
YESDOC_COMMENT : ' ///' ~[\r\n]* -> type(DOC_COMMENT ) ;
206
182
OUTER_DOC_COMMENT : ' //!' ~[\r\n]* -> type(DOC_COMMENT ) ;
207
- LINE_COMMENT : ' //' ~[\r \n]* -> type(COMMENT ) ;
183
+ LINE_COMMENT : ' //' ( ~[/\n] ~[ \n]* )? -> type(COMMENT ) ;
208
184
209
185
DOC_BLOCK_COMMENT
210
186
: (' /**' ~[*] | ' /*!' ) (DOC_BLOCK_COMMENT | .)*? ' */' -> type(DOC_COMMENT )
211
187
;
212
188
213
189
BLOCK_COMMENT : ' /*' (BLOCK_COMMENT | .)*? ' */' -> type(COMMENT ) ;
190
+
191
+ /* these appear at the beginning of a file */
192
+
193
+ SHEBANG : ' #!' { is_at(2 ) && _input.LA(1 ) != ' [' } ? ~[\r\n]* -> type(SHEBANG ) ;
194
+
195
+ UTF8_BOM : ' \ufeff ' { is_at(1 ) } ? -> skip ;
0 commit comments