Skip to content
This repository was archived by the owner on Dec 15, 2022. It is now read-only.

Commit bce9af4

Browse files
author
Wliu
authored
Merge pull request #70 from chbk/punctuation
Tokenize punctuation
2 parents a31e1e1 + 6a6101d commit bce9af4

File tree

2 files changed

+167
-61
lines changed

2 files changed

+167
-61
lines changed

grammars/sql.cson

+73-30
Original file line numberDiff line numberDiff line change
@@ -87,49 +87,53 @@
8787
'name': 'meta.alter.sql'
8888
}
8989
{
90+
'match': '(?i)\\b(bigserial|boolean|box|bytea|cidr|circle|date|datetime|datetime2|double\\s+precision|enum|inet|integer|interval|line|lseg|macaddr|money|oid|path|point|polygon|real|serial|sysdate|text|uniqueidentifier)\\b'
91+
'name': 'storage.type.sql'
92+
}
93+
{
94+
'match': '(?i)\\b(bigint|bit(?:\\s+varying)?|n?char|character(?:\\s+varying)?|float|int|number|smallint|time(?:stamp)?tz|tinyint|n?varchar\\d?)\\b(?:\\s*(\\()\\s*(\\d*)\\s*(\\)))?'
9095
'captures':
9196
'1':
9297
'name': 'storage.type.sql'
9398
'2':
94-
'name': 'storage.type.sql'
99+
'name': 'punctuation.definition.parameters.bracket.round.begin.sql'
95100
'3':
96101
'name': 'constant.numeric.sql'
97102
'4':
103+
'name': 'punctuation.definition.parameters.bracket.round.end.sql'
104+
}
105+
{
106+
'match': '(?i)\\b(numeric|decimal)\\b(?:\\s*(\\()\\s*(\\d*)(?:\\s*(,)\\s*(\\d*))?\\s*(\\)))?'
107+
'captures':
108+
'1':
98109
'name': 'storage.type.sql'
110+
'2':
111+
'name': 'punctuation.definition.parameters.bracket.round.begin.sql'
112+
'3':
113+
'name': 'constant.numeric.sql'
114+
'4':
115+
'name': 'punctuation.separator.parameters.comma.sql'
99116
'5':
100117
'name': 'constant.numeric.sql'
101118
'6':
119+
'name': 'punctuation.definition.parameters.bracket.round.end.sql'
120+
}
121+
{
122+
'match': '(?i)\\b(time(?:stamp)?)\\b(?:\\s*(\\()\\s*(\\d*)\\s*(\\)))?(?:\\s*(with(?:out)?\\s+time\\s+zone)\\b)?'
123+
'captures':
124+
'1':
102125
'name': 'storage.type.sql'
103-
'7':
104-
'name': 'constant.numeric.sql'
105-
'8':
106-
'name': 'constant.numeric.sql'
107-
'9':
108-
'name': 'storage.type.sql'
109-
'10':
126+
'2':
127+
'name': 'punctuation.definition.parameters.bracket.round.begin.sql'
128+
'3':
110129
'name': 'constant.numeric.sql'
111-
'11':
130+
'4':
131+
'name': 'punctuation.definition.parameters.bracket.round.end.sql'
132+
'5':
112133
'name': 'storage.type.sql'
113-
'match': '''
114-
(?xi)
115-
# normal stuff, capture 1
116-
\\b(bigint|bigserial|bit|boolean|box|bytea|cidr|circle|date|datetime|datetime2|double\\sprecision|enum|inet|int|integer|line|lseg|macaddr|money|oid|path|point|polygon|real|float|serial|smallint|tinyint|sysdate|text|uniqueidentifier)\\b
117-
118-
# numeric suffix, capture 2 + 3i
119-
|\\b(bit\\svarying|character\\s(?:varying)?|tinyint|var\\schar|float|interval)\\((\\d+)\\)
120-
121-
# optional numeric suffix, capture 4 + 5i
122-
|\\b(char|number|n?varchar\\d?|time(?:stamp)?tz)\\b(?:\\s*\\(\\s*(\\d+)\\s*\\))?
123-
124-
# special case, capture 6 + 7i + 8i
125-
|\\b(numeric|decimal)\\b(?:\\((\\d+),(\\d+)\\))?
126-
127-
# special case, captures 9, 10i, 11
128-
|\\b(time(?:stamp)?)\\b(?:\\s*\\(\\s*(\\d+)\\s*\\))?(?:\\s*(with(?:out)?\\s+time\\s+zone\\b))?
129-
'''
130-
}
131-
{
132-
'match': '(?i:\\b((?:primary|foreign)\\s+key|references|on\\sdelete(\\s+cascade)?|check|constraint|unique|default)\\b)'
134+
}
135+
{
136+
'match': '(?i:\\b((?:primary|foreign)\\s+key|references|on\\s+delete(\\s+cascade)?|check|constraint|unique|default)\\b)'
133137
'name': 'storage.modifier.sql'
134138
}
135139
{
@@ -227,15 +231,20 @@
227231
'1':
228232
'name': 'constant.other.database-name.sql'
229233
'2':
234+
'name': 'punctuation.separator.period.sql'
235+
'3':
230236
'name': 'constant.other.table-name.sql'
231-
'match': '(\\w+?)\\.(\\w+)'
237+
'match': '(\\w+?)(\\.)(\\w+)'
232238
}
233239
{
234240
'include': '#strings'
235241
}
236242
{
237243
'include': '#regexps'
238244
}
245+
{
246+
'include': '#punctuation'
247+
}
239248
]
240249
'repository':
241250
'comments':
@@ -260,6 +269,40 @@
260269
'name': 'comment.block.sql'
261270
}
262271
]
272+
'punctuation':
273+
'patterns': [
274+
{
275+
'begin': '\\('
276+
'end': '\\)'
277+
'beginCaptures':
278+
'0':
279+
'name': 'punctuation.definition.section.bracket.round.begin.sql'
280+
'endCaptures':
281+
'0':
282+
'name': 'punctuation.definition.section.bracket.round.end.sql'
283+
'patterns': [
284+
{
285+
'include': '$self'
286+
}
287+
]
288+
}
289+
{
290+
'match': '\\)'
291+
'name': 'punctuation.unmatched.bracket.round.end.sql'
292+
}
293+
{
294+
'match': ','
295+
'name': 'punctuation.separator.comma.sql'
296+
}
297+
{
298+
'match': '\\.'
299+
'name': 'punctuation.separator.period.sql'
300+
}
301+
{
302+
'match': ';'
303+
'name': 'punctuation.terminator.statement.semicolon.sql'
304+
}
305+
]
263306
'regexps':
264307
'patterns': [
265308
{

spec/grammar-spec.coffee

+94-31
Original file line numberDiff line numberDiff line change
@@ -113,37 +113,63 @@ describe "SQL grammar", ->
113113
expect(tokens[1]).toEqual value: 'Test', scopes: ['source.sql', 'string.quoted.double.sql']
114114
expect(tokens[2]).toEqual value: '"', scopes: ['source.sql', 'string.quoted.double.sql', 'punctuation.definition.string.end.sql']
115115

116-
it 'tokenizes the time type', ->
117-
{tokens} = grammar.tokenizeLine('TIME')
118-
expect(tokens[0]).toEqual value: 'TIME', scopes: ['source.sql', 'storage.type.sql']
119-
120-
{tokens} = grammar.tokenizeLine('TIME WITH TIME ZONE')
121-
expect(tokens[0]).toEqual value: 'TIME', scopes: ['source.sql', 'storage.type.sql']
122-
expect(tokens[2]).toEqual value: 'WITH TIME ZONE', scopes: ['source.sql', 'storage.type.sql']
123-
124-
{tokens} = grammar.tokenizeLine('TIME(1)WITHOUT TIME ZONE\'23:00\'')
125-
expect(tokens[0]).toEqual value: 'TIME', scopes: ['source.sql', 'storage.type.sql']
126-
expect(tokens[2]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
127-
expect(tokens[4]).toEqual value: 'WITHOUT TIME ZONE', scopes: ['source.sql', 'storage.type.sql']
128-
129-
it 'tokenizes the timestamp type', ->
130-
{tokens} = grammar.tokenizeLine('TIMESTAMP ( 12 ) WITH TIME ZONE')
131-
expect(tokens[0]).toEqual value: 'TIMESTAMP', scopes: ['source.sql', 'storage.type.sql']
132-
expect(tokens[2]).toEqual value: '12', scopes: ['source.sql', 'constant.numeric.sql']
133-
expect(tokens[4]).toEqual value: 'WITH TIME ZONE', scopes: ['source.sql', 'storage.type.sql']
134-
135-
it 'tokenizes the timestamptz type', ->
136-
{tokens} = grammar.tokenizeLine('timestamptz')
137-
expect(tokens[0]).toEqual value: 'timestamptz', scopes: ['source.sql', 'storage.type.sql']
138-
139-
{tokens} = grammar.tokenizeLine('TIMESTAMPTZ(2)NOT NULL')
140-
expect(tokens[0]).toEqual value: 'TIMESTAMPTZ', scopes: ['source.sql', 'storage.type.sql']
141-
expect(tokens[2]).toEqual value: '2', scopes: ['source.sql', 'constant.numeric.sql']
142-
143-
it 'tokenizes the timetz type', ->
144-
{tokens} = grammar.tokenizeLine('timetz (2)')
145-
expect(tokens[0]).toEqual value: 'timetz', scopes: ['source.sql', 'storage.type.sql']
146-
expect(tokens[2]).toEqual value: '2', scopes: ['source.sql', 'constant.numeric.sql']
116+
it 'tokenizes storage types', ->
117+
lines = grammar.tokenizeLines('''
118+
datetime
119+
double precision
120+
integer
121+
''')
122+
expect(lines[0][0]).toEqual value: 'datetime', scopes: ['source.sql', 'storage.type.sql']
123+
expect(lines[1][0]).toEqual value: 'double precision', scopes: ['source.sql', 'storage.type.sql']
124+
expect(lines[2][0]).toEqual value: 'integer', scopes: ['source.sql', 'storage.type.sql']
125+
126+
it 'tokenizes storage types with an optional argument', ->
127+
lines = grammar.tokenizeLines('''
128+
bit varying
129+
int()
130+
timestamptz(1)
131+
''')
132+
expect(lines[0][0]).toEqual value: 'bit varying', scopes: ['source.sql', 'storage.type.sql']
133+
expect(lines[1][0]).toEqual value: 'int', scopes: ['source.sql', 'storage.type.sql']
134+
expect(lines[1][1]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.begin.sql']
135+
expect(lines[1][2]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.end.sql']
136+
expect(lines[2][0]).toEqual value: 'timestamptz', scopes: ['source.sql', 'storage.type.sql']
137+
expect(lines[2][1]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.begin.sql']
138+
expect(lines[2][2]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
139+
expect(lines[2][3]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.end.sql']
140+
141+
it 'tokenizes storage types with two optional arguments', ->
142+
lines = grammar.tokenizeLines('''
143+
decimal
144+
decimal(1)
145+
numeric(1,1)
146+
''')
147+
expect(lines[0][0]).toEqual value: 'decimal', scopes: ['source.sql', 'storage.type.sql']
148+
expect(lines[1][0]).toEqual value: 'decimal', scopes: ['source.sql', 'storage.type.sql']
149+
expect(lines[1][1]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.begin.sql']
150+
expect(lines[1][2]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
151+
expect(lines[1][3]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.end.sql']
152+
expect(lines[2][0]).toEqual value: 'numeric', scopes: ['source.sql', 'storage.type.sql']
153+
expect(lines[2][1]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.begin.sql']
154+
expect(lines[2][2]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
155+
expect(lines[2][3]).toEqual value: ',', scopes: ['source.sql', 'punctuation.separator.parameters.comma.sql']
156+
expect(lines[2][4]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
157+
expect(lines[2][5]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.end.sql']
158+
159+
it 'tokenizes storage types with time zones', ->
160+
lines = grammar.tokenizeLines('''
161+
time
162+
time(1) with time zone
163+
timestamp without time zone
164+
''')
165+
expect(lines[0][0]).toEqual value: 'time', scopes: ['source.sql', 'storage.type.sql']
166+
expect(lines[1][0]).toEqual value: 'time', scopes: ['source.sql', 'storage.type.sql']
167+
expect(lines[1][1]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.begin.sql']
168+
expect(lines[1][2]).toEqual value: '1', scopes: ['source.sql', 'constant.numeric.sql']
169+
expect(lines[1][3]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.parameters.bracket.round.end.sql']
170+
expect(lines[1][5]).toEqual value: 'with time zone', scopes: ['source.sql', 'storage.type.sql']
171+
expect(lines[2][0]).toEqual value: 'timestamp', scopes: ['source.sql', 'storage.type.sql']
172+
expect(lines[2][2]).toEqual value: 'without time zone', scopes: ['source.sql', 'storage.type.sql']
147173

148174
it 'tokenizes comments', ->
149175
{tokens} = grammar.tokenizeLine('-- comment')
@@ -167,3 +193,40 @@ describe "SQL grammar", ->
167193
expect(tokens[3]).toEqual value: ' WITH ', scopes: ['source.sql', 'comment.block.sql']
168194
expect(tokens[4]).toEqual value: '*/', scopes: ['source.sql', 'comment.block.sql', 'punctuation.definition.comment.sql']
169195
expect(tokens[6]).toEqual value: 'AND', scopes: ['source.sql', 'keyword.other.DML.sql']
196+
197+
describe 'punctuation', ->
198+
it 'tokenizes parentheses', ->
199+
{tokens} = grammar.tokenizeLine('WHERE salary > (SELECT avg(salary) FROM employees)')
200+
expect(tokens[0]).toEqual value: 'WHERE', scopes: ['source.sql', 'keyword.other.DML.sql']
201+
expect(tokens[1]).toEqual value: ' salary ', scopes: ['source.sql']
202+
expect(tokens[2]).toEqual value: '>', scopes: ['source.sql', 'keyword.operator.comparison.sql']
203+
expect(tokens[4]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.section.bracket.round.begin.sql']
204+
expect(tokens[5]).toEqual value: 'SELECT', scopes: ['source.sql', 'keyword.other.DML.sql']
205+
expect(tokens[7]).toEqual value: 'avg', scopes: ['source.sql', 'support.function.aggregate.sql']
206+
expect(tokens[8]).toEqual value: '(', scopes: ['source.sql', 'punctuation.definition.section.bracket.round.begin.sql']
207+
expect(tokens[9]).toEqual value: 'salary', scopes: ['source.sql']
208+
expect(tokens[10]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.section.bracket.round.end.sql']
209+
expect(tokens[12]).toEqual value: 'FROM', scopes: ['source.sql', 'keyword.other.DML.sql']
210+
expect(tokens[13]).toEqual value: ' employees', scopes: ['source.sql']
211+
expect(tokens[14]).toEqual value: ')', scopes: ['source.sql', 'punctuation.definition.section.bracket.round.end.sql']
212+
213+
it 'tokenizes commas', ->
214+
{tokens} = grammar.tokenizeLine('name, year')
215+
expect(tokens[0]).toEqual value: 'name', scopes: ['source.sql']
216+
expect(tokens[1]).toEqual value: ',', scopes: ['source.sql', 'punctuation.separator.comma.sql']
217+
expect(tokens[2]).toEqual value: ' year', scopes: ['source.sql']
218+
219+
it 'tokenizes periods', ->
220+
{tokens} = grammar.tokenizeLine('.')
221+
expect(tokens[0]).toEqual value: '.', scopes: ['source.sql', 'punctuation.separator.period.sql']
222+
223+
{tokens} = grammar.tokenizeLine('database.table')
224+
expect(tokens[0]).toEqual value: 'database', scopes: ['source.sql', 'constant.other.database-name.sql']
225+
expect(tokens[1]).toEqual value: '.', scopes: ['source.sql', 'punctuation.separator.period.sql']
226+
expect(tokens[2]).toEqual value: 'table', scopes: ['source.sql', 'constant.other.table-name.sql']
227+
228+
it 'tokenizes semicolons', ->
229+
{tokens} = grammar.tokenizeLine('ORDER BY year;')
230+
expect(tokens[0]).toEqual value: 'ORDER BY', scopes: ['source.sql', 'keyword.other.DML.sql']
231+
expect(tokens[1]).toEqual value: ' year', scopes: ['source.sql']
232+
expect(tokens[2]).toEqual value: ';', scopes: ['source.sql', 'punctuation.terminator.statement.semicolon.sql']

0 commit comments

Comments
 (0)