Skip to content

Commit 2af3e2c

Browse files
authoredDec 22, 2020
Markdown: Improved URL tokenization (#2678)
It now tokenizes all parts of a URL except for braces.
1 parent df0738e commit 2af3e2c

File tree

6 files changed

+123
-183
lines changed

6 files changed

+123
-183
lines changed
 

‎components/prism-markdown.js

+13-7
Original file line numberDiff line numberDiff line change
@@ -221,21 +221,27 @@
221221
// [example](http://example.com "Optional title")
222222
// [example][id]
223223
// [example] [id]
224-
pattern: createInline(/!?\[(?:(?!\])<inner>)+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)| ?\[(?:(?!\])<inner>)+\])/.source),
224+
pattern: createInline(/!?\[(?:(?!\])<inner>)+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)|[ \t]?\[(?:(?!\])<inner>)+\])/.source),
225225
lookbehind: true,
226226
greedy: true,
227227
inside: {
228-
'variable': {
229-
pattern: /(\[)[^\]]+(?=\]$)/,
230-
lookbehind: true
231-
},
228+
'operator': /^!/,
232229
'content': {
233-
pattern: /(^!?\[)[^\]]+(?=\])/,
230+
pattern: /(^\[)[^\]]+(?=\])/,
234231
lookbehind: true,
235232
inside: {} // see below
236233
},
234+
'variable': {
235+
pattern: /(^\][ \t]?\[)[^\]]+(?=\]$)/,
236+
lookbehind: true
237+
},
238+
'url': {
239+
pattern: /(^\]\()[^\s)]+/,
240+
lookbehind: true
241+
},
237242
'string': {
238-
pattern: /"(?:\\.|[^"\\])*"(?=\)$)/
243+
pattern: /(^[ \t]+)"(?:\\.|[^"\\])*"(?=\)$)/,
244+
lookbehind: true
239245
}
240246
}
241247
}

‎components/prism-markdown.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎tests/languages/markdown/bold_feature.test

+23-44
Original file line numberDiff line numberDiff line change
@@ -24,41 +24,32 @@ not__bold__ __this__either
2424
[
2525
["bold", [
2626
["punctuation", "**"],
27-
["content", [
28-
"foobar"
29-
]],
27+
["content", ["foobar"]],
3028
["punctuation", "**"]
3129
]],
3230
["bold", [
3331
["punctuation", "**"],
34-
["content", [
35-
"foo\r\nbar"
36-
]],
32+
["content", ["foo\r\nbar"]],
3733
["punctuation", "**"]
3834
]],
3935
["bold", [
4036
["punctuation", "__"],
41-
["content", [
42-
"foobar"
43-
]],
37+
["content", ["foobar"]],
4438
["punctuation", "__"]
4539
]],
4640
["bold", [
4741
["punctuation", "__"],
48-
["content", [
49-
"foo\r\nbar"
50-
]],
42+
["content", ["foo\r\nbar"]],
5143
["punctuation", "__"]
5244
]],
45+
5346
["bold", [
5447
["punctuation", "__"],
5548
["content", [
5649
"foo ",
5750
["italic", [
5851
["punctuation", "*"],
59-
["content", [
60-
"bar"
61-
]],
52+
["content", ["bar"]],
6253
["punctuation", "*"]
6354
]],
6455
" baz"
@@ -71,9 +62,7 @@ not__bold__ __this__either
7162
"foo ",
7263
["italic", [
7364
["punctuation", "_"],
74-
["content", [
75-
"bar"
76-
]],
65+
["content", ["bar"]],
7766
["punctuation", "_"]
7867
]],
7968
" baz"
@@ -86,9 +75,7 @@ not__bold__ __this__either
8675
"foo ",
8776
["strike", [
8877
["punctuation", "~"],
89-
["content", [
90-
"bar"
91-
]],
78+
["content", ["bar"]],
9279
["punctuation", "~"]
9380
]],
9481
" baz"
@@ -101,9 +88,7 @@ not__bold__ __this__either
10188
"foo ",
10289
["strike", [
10390
["punctuation", "~~"],
104-
["content", [
105-
"bar"
106-
]],
91+
["content", ["bar"]],
10792
["punctuation", "~~"]
10893
]],
10994
" baz"
@@ -116,23 +101,22 @@ not__bold__ __this__either
116101
"foo",
117102
["url", [
118103
"[",
119-
["content", [
120-
"bar"
121-
]],
122-
"](baz)"
104+
["content", ["bar"]],
105+
"](",
106+
["url", "baz"],
107+
")"
123108
]]
124109
]],
125110
["punctuation", "__"]
126111
]],
112+
127113
["bold", [
128114
["punctuation", "**"],
129115
["content", [
130116
"foo ",
131117
["italic", [
132118
["punctuation", "*"],
133-
["content", [
134-
"bar"
135-
]],
119+
["content", ["bar"]],
136120
["punctuation", "*"]
137121
]],
138122
" baz"
@@ -145,9 +129,7 @@ not__bold__ __this__either
145129
"foo ",
146130
["italic", [
147131
["punctuation", "_"],
148-
["content", [
149-
"bar"
150-
]],
132+
["content", ["bar"]],
151133
["punctuation", "_"]
152134
]],
153135
" baz"
@@ -160,9 +142,7 @@ not__bold__ __this__either
160142
"foo ",
161143
["strike", [
162144
["punctuation", "~"],
163-
["content", [
164-
"bar"
165-
]],
145+
["content", ["bar"]],
166146
["punctuation", "~"]
167147
]],
168148
" baz"
@@ -175,9 +155,7 @@ not__bold__ __this__either
175155
"foo ",
176156
["strike", [
177157
["punctuation", "~~"],
178-
["content", [
179-
"bar"
180-
]],
158+
["content", ["bar"]],
181159
["punctuation", "~~"]
182160
]],
183161
" baz"
@@ -190,14 +168,15 @@ not__bold__ __this__either
190168
"foo",
191169
["url", [
192170
"[",
193-
["content", [
194-
"bar"
195-
]],
196-
"](baz)"
171+
["content", ["bar"]],
172+
"](",
173+
["url", "baz"],
174+
")"
197175
]]
198176
]],
199177
["punctuation", "**"]
200178
]],
179+
201180
"\r\n\r\nnot__bold__ __this__either"
202181
]
203182

‎tests/languages/markdown/italic_feature.test

+23-44
Original file line numberDiff line numberDiff line change
@@ -24,41 +24,32 @@ not_italic_ _this_either
2424
[
2525
["italic", [
2626
["punctuation", "*"],
27-
["content", [
28-
"foobar"
29-
]],
27+
["content", ["foobar"]],
3028
["punctuation", "*"]
3129
]],
3230
["italic", [
3331
["punctuation", "*"],
34-
["content", [
35-
"foo\r\nbar"
36-
]],
32+
["content", ["foo\r\nbar"]],
3733
["punctuation", "*"]
3834
]],
3935
["italic", [
4036
["punctuation", "_"],
41-
["content", [
42-
"foobar"
43-
]],
37+
["content", ["foobar"]],
4438
["punctuation", "_"]
4539
]],
4640
["italic", [
4741
["punctuation", "_"],
48-
["content", [
49-
"foo\r\nbar"
50-
]],
42+
["content", ["foo\r\nbar"]],
5143
["punctuation", "_"]
5244
]],
45+
5346
["italic", [
5447
["punctuation", "_"],
5548
["content", [
5649
"foo ",
5750
["bold", [
5851
["punctuation", "__"],
59-
["content", [
60-
"bar"
61-
]],
52+
["content", ["bar"]],
6253
["punctuation", "__"]
6354
]],
6455
" baz"
@@ -71,9 +62,7 @@ not_italic_ _this_either
7162
"foo ",
7263
["bold", [
7364
["punctuation", "**"],
74-
["content", [
75-
"bar"
76-
]],
65+
["content", ["bar"]],
7766
["punctuation", "**"]
7867
]],
7968
" baz"
@@ -86,9 +75,7 @@ not_italic_ _this_either
8675
"foo ",
8776
["strike", [
8877
["punctuation", "~"],
89-
["content", [
90-
"bar"
91-
]],
78+
["content", ["bar"]],
9279
["punctuation", "~"]
9380
]],
9481
" baz"
@@ -101,9 +88,7 @@ not_italic_ _this_either
10188
"foo ",
10289
["strike", [
10390
["punctuation", "~~"],
104-
["content", [
105-
"bar"
106-
]],
91+
["content", ["bar"]],
10792
["punctuation", "~~"]
10893
]],
10994
" baz"
@@ -116,23 +101,22 @@ not_italic_ _this_either
116101
"foo",
117102
["url", [
118103
"[",
119-
["content", [
120-
"bar"
121-
]],
122-
"](baz)"
104+
["content", ["bar"]],
105+
"](",
106+
["url", "baz"],
107+
")"
123108
]]
124109
]],
125110
["punctuation", "_"]
126111
]],
112+
127113
["italic", [
128114
["punctuation", "*"],
129115
["content", [
130116
"foo ",
131117
["bold", [
132118
["punctuation", "__"],
133-
["content", [
134-
"bar"
135-
]],
119+
["content", ["bar"]],
136120
["punctuation", "__"]
137121
]],
138122
" baz"
@@ -145,9 +129,7 @@ not_italic_ _this_either
145129
"foo ",
146130
["bold", [
147131
["punctuation", "**"],
148-
["content", [
149-
"bar"
150-
]],
132+
["content", ["bar"]],
151133
["punctuation", "**"]
152134
]],
153135
" baz"
@@ -160,9 +142,7 @@ not_italic_ _this_either
160142
"foo ",
161143
["strike", [
162144
["punctuation", "~"],
163-
["content", [
164-
"bar"
165-
]],
145+
["content", ["bar"]],
166146
["punctuation", "~"]
167147
]],
168148
" baz"
@@ -175,9 +155,7 @@ not_italic_ _this_either
175155
"foo ",
176156
["strike", [
177157
["punctuation", "~~"],
178-
["content", [
179-
"bar"
180-
]],
158+
["content", ["bar"]],
181159
["punctuation", "~~"]
182160
]],
183161
" baz"
@@ -190,14 +168,15 @@ not_italic_ _this_either
190168
"foo",
191169
["url", [
192170
"[",
193-
["content", [
194-
"bar"
195-
]],
196-
"](baz)"
171+
["content", ["bar"]],
172+
"](",
173+
["url", "baz"],
174+
")"
197175
]]
198176
]],
199177
["punctuation", "*"]
200178
]],
179+
201180
"\r\n\r\nnot_italic_ _this_either"
202181
]
203182

‎tests/languages/markdown/strike_feature.test

+22-44
Original file line numberDiff line numberDiff line change
@@ -22,41 +22,32 @@ bar~
2222
[
2323
["strike", [
2424
["punctuation", "~~"],
25-
["content", [
26-
"foobar"
27-
]],
25+
["content", ["foobar"]],
2826
["punctuation", "~~"]
2927
]],
3028
["strike", [
3129
["punctuation", "~~"],
32-
["content", [
33-
"foo\r\nbar"
34-
]],
30+
["content", ["foo\r\nbar"]],
3531
["punctuation", "~~"]
3632
]],
3733
["strike", [
3834
["punctuation", "~"],
39-
["content", [
40-
"foobar"
41-
]],
35+
["content", ["foobar"]],
4236
["punctuation", "~"]
4337
]],
4438
["strike", [
4539
["punctuation", "~"],
46-
["content", [
47-
"foo\r\nbar"
48-
]],
40+
["content", ["foo\r\nbar"]],
4941
["punctuation", "~"]
5042
]],
43+
5144
["strike", [
5245
["punctuation", "~"],
5346
["content", [
5447
"foo ",
5548
["italic", [
5649
["punctuation", "*"],
57-
["content", [
58-
"bar"
59-
]],
50+
["content", ["bar"]],
6051
["punctuation", "*"]
6152
]],
6253
" baz"
@@ -69,9 +60,7 @@ bar~
6960
"foo ",
7061
["italic", [
7162
["punctuation", "_"],
72-
["content", [
73-
"bar"
74-
]],
63+
["content", ["bar"]],
7564
["punctuation", "_"]
7665
]],
7766
" baz"
@@ -84,9 +73,7 @@ bar~
8473
"foo ",
8574
["bold", [
8675
["punctuation", "**"],
87-
["content", [
88-
"bar"
89-
]],
76+
["content", ["bar"]],
9077
["punctuation", "**"]
9178
]],
9279
" baz"
@@ -99,9 +86,7 @@ bar~
9986
"foo ",
10087
["bold", [
10188
["punctuation", "__"],
102-
["content", [
103-
"bar"
104-
]],
89+
["content", ["bar"]],
10590
["punctuation", "__"]
10691
]],
10792
" baz"
@@ -114,23 +99,22 @@ bar~
11499
"foo",
115100
["url", [
116101
"[",
117-
["content", [
118-
"bar"
119-
]],
120-
"](baz)"
102+
["content", ["bar"]],
103+
"](",
104+
["url", "baz"],
105+
")"
121106
]]
122107
]],
123108
["punctuation", "~"]
124109
]],
110+
125111
["strike", [
126112
["punctuation", "~~"],
127113
["content", [
128114
"foo ",
129115
["italic", [
130116
["punctuation", "*"],
131-
["content", [
132-
"bar"
133-
]],
117+
["content", ["bar"]],
134118
["punctuation", "*"]
135119
]],
136120
" baz"
@@ -143,9 +127,7 @@ bar~
143127
"foo ",
144128
["italic", [
145129
["punctuation", "_"],
146-
["content", [
147-
"bar"
148-
]],
130+
["content", ["bar"]],
149131
["punctuation", "_"]
150132
]],
151133
" baz"
@@ -158,9 +140,7 @@ bar~
158140
"foo ",
159141
["bold", [
160142
["punctuation", "**"],
161-
["content", [
162-
"bar"
163-
]],
143+
["content", ["bar"]],
164144
["punctuation", "**"]
165145
]],
166146
" baz"
@@ -173,9 +153,7 @@ bar~
173153
"foo ",
174154
["bold", [
175155
["punctuation", "__"],
176-
["content", [
177-
"bar"
178-
]],
156+
["content", ["bar"]],
179157
["punctuation", "__"]
180158
]],
181159
" baz"
@@ -188,10 +166,10 @@ bar~
188166
"foo",
189167
["url", [
190168
"[",
191-
["content", [
192-
"bar"
193-
]],
194-
"](baz)"
169+
["content", ["bar"]],
170+
"](",
171+
["url", "baz"],
172+
")"
195173
]]
196174
]],
197175
["punctuation", "~~"]

‎tests/languages/markdown/url_feature.test

+41-43
Original file line numberDiff line numberDiff line change
@@ -18,134 +18,132 @@ bar](http://prismjs.com)
1818
[
1919
["url", [
2020
"[",
21-
["content", [
22-
"foo"
23-
]],
24-
"](http://prismjs.com)"
21+
["content", ["foo"]],
22+
"](",
23+
["url", "http://prismjs.com"],
24+
")"
2525
]],
2626
["url", [
27-
"![",
28-
["content", [
29-
"foo"
30-
]],
31-
"](http://prismjs.com ",
27+
["operator", "!"],
28+
"[",
29+
["content", ["foo"]],
30+
"](",
31+
["url", "http://prismjs.com"],
3232
["string", "\"Foo\\\"bar\""],
3333
")"
3434
]],
3535
["url", [
3636
"[",
37-
["content", [
38-
"foo"
39-
]],
37+
["content", ["foo"]],
4038
"][",
4139
["variable", "bar"],
4240
"]"
4341
]],
4442
["url", [
4543
"[",
46-
["content", [
47-
"foo"
48-
]],
44+
["content", ["foo"]],
4945
"] [",
5046
["variable", "bar"],
5147
"]"
5248
]],
49+
5350
["url", [
5451
"[",
55-
["content", [
56-
"foo\r\nbar"
57-
]],
58-
"](http://prismjs.com)"
52+
["content", ["foo\r\nbar"]],
53+
"](",
54+
["url", "http://prismjs.com"],
55+
")"
5956
]],
57+
6058
["url", [
6159
"[",
6260
["content", [
6361
"foo ",
6462
["italic", [
6563
["punctuation", "*"],
66-
["content", [
67-
"bar"
68-
]],
64+
["content", ["bar"]],
6965
["punctuation", "*"]
7066
]],
7167
" baz"
7268
]],
73-
"](http://prismjs.com)"
69+
"](",
70+
["url", "http://prismjs.com"],
71+
")"
7472
]],
7573
["url", [
7674
"[",
7775
["content", [
7876
"foo ",
7977
["italic", [
8078
["punctuation", "_"],
81-
["content", [
82-
"bar"
83-
]],
79+
["content", ["bar"]],
8480
["punctuation", "_"]
8581
]],
8682
" baz"
8783
]],
88-
"](http://prismjs.com)"
84+
"](",
85+
["url", "http://prismjs.com"],
86+
")"
8987
]],
9088
["url", [
9189
"[",
9290
["content", [
9391
"foo ",
9492
["bold", [
9593
["punctuation", "**"],
96-
["content", [
97-
"bar"
98-
]],
94+
["content", ["bar"]],
9995
["punctuation", "**"]
10096
]],
10197
" baz"
10298
]],
103-
"](http://prismjs.com)"
99+
"](",
100+
["url", "http://prismjs.com"],
101+
")"
104102
]],
105103
["url", [
106104
"[",
107105
["content", [
108106
"foo ",
109107
["bold", [
110108
["punctuation", "__"],
111-
["content", [
112-
"bar"
113-
]],
109+
["content", ["bar"]],
114110
["punctuation", "__"]
115111
]],
116112
" baz"
117113
]],
118-
"](http://prismjs.com)"
114+
"](",
115+
["url", "http://prismjs.com"],
116+
")"
119117
]],
120118
["url", [
121119
"[",
122120
["content", [
123121
"foo ",
124122
["strike", [
125123
["punctuation", "~"],
126-
["content", [
127-
"bar"
128-
]],
124+
["content", ["bar"]],
129125
["punctuation", "~"]
130126
]],
131127
" baz"
132128
]],
133-
"](http://prismjs.com)"
129+
"](",
130+
["url", "http://prismjs.com"],
131+
")"
134132
]],
135133
["url", [
136134
"[",
137135
["content", [
138136
"foo ",
139137
["strike", [
140138
["punctuation", "~~"],
141-
["content", [
142-
"bar"
143-
]],
139+
["content", ["bar"]],
144140
["punctuation", "~~"]
145141
]],
146142
" baz"
147143
]],
148-
"](http://prismjs.com)"
144+
"](",
145+
["url", "http://prismjs.com"],
146+
")"
149147
]]
150148
]
151149

0 commit comments

Comments
 (0)
Please sign in to comment.