@@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
94
94
if current_pos == end_pos { None } else { Some ( current_pos) }
95
95
}
96
96
97
+ fn extract_path_forward ( text : & str , start_pos : usize ) -> Option < usize > {
98
+ use rustc_lexer:: { is_id_continue, is_id_start} ;
99
+ let mut current_pos = start_pos;
100
+ loop {
101
+ if current_pos < text. len ( ) && text[ current_pos..] . starts_with ( "::" ) {
102
+ current_pos += 2 ;
103
+ } else {
104
+ break ;
105
+ }
106
+ let mut chars = text[ current_pos..] . chars ( ) ;
107
+ if let Some ( c) = chars. next ( ) {
108
+ if is_id_start ( c) {
109
+ current_pos += c. len_utf8 ( ) ;
110
+ } else {
111
+ break ;
112
+ }
113
+ }
114
+ while let Some ( c) = chars. next ( ) {
115
+ if is_id_continue ( c) {
116
+ current_pos += c. len_utf8 ( ) ;
117
+ } else {
118
+ break ;
119
+ }
120
+ }
121
+ }
122
+ if current_pos == start_pos { None } else { Some ( current_pos) }
123
+ }
124
+
97
125
fn is_valid_for_html_tag_name ( c : char , is_empty : bool ) -> bool {
98
126
// https://spec.commonmark.org/0.30/#raw-html
99
127
//
@@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
218
246
// If a tag looks like `<this>`, it might actually be a generic.
219
247
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
220
248
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
221
- if let Some ( Some ( generics_start) ) = ( is_open_tag
222
- && dox[ ..range. end ] . ends_with ( '>' ) )
249
+ let mut generics_end = range. end ;
250
+ if let Some ( Some ( mut generics_start) ) = ( is_open_tag
251
+ && dox[ ..generics_end] . ends_with ( '>' ) )
223
252
. then ( || extract_path_backwards ( & dox, range. start ) )
224
253
{
254
+ while generics_start != 0
255
+ && generics_end < dox. len ( )
256
+ && dox. as_bytes ( ) [ generics_start - 1 ] == b'<'
257
+ && dox. as_bytes ( ) [ generics_end] == b'>'
258
+ {
259
+ generics_end += 1 ;
260
+ generics_start -= 1 ;
261
+ if let Some ( new_start) = extract_path_backwards ( & dox, generics_start) {
262
+ generics_start = new_start;
263
+ }
264
+ if let Some ( new_end) = extract_path_forward ( & dox, generics_end) {
265
+ generics_end = new_end;
266
+ }
267
+ }
268
+ if let Some ( new_end) = extract_path_forward ( & dox, generics_end) {
269
+ generics_end = new_end;
270
+ }
225
271
let generics_sp = match super :: source_span_for_markdown_range (
226
272
tcx,
227
273
& dox,
228
- & ( generics_start..range . end ) ,
274
+ & ( generics_start..generics_end ) ,
229
275
& item. attrs ,
230
276
) {
231
277
Some ( sp) => sp,
232
278
None => item. attr_span ( tcx) ,
233
279
} ;
280
+ // Sometimes, we only extract part of a path. For example, consider this:
281
+ //
282
+ // <[u32] as IntoIter<u32>>::Item
283
+ // ^^^^^ unclosed HTML tag `u32`
284
+ //
285
+ // We don't have any code for parsing fully-qualified trait paths.
286
+ // In theory, we could add it, but doing it correctly would require
287
+ // parsing the entire path grammar, which is problematic because of
288
+ // overlap between the path grammar and Markdown.
289
+ //
290
+ // The example above shows that ambiguity. Is `[u32]` intended to be an
291
+ // intra-doc link to the u32 primitive, or is it intended to be a slice?
292
+ //
293
+ // If the below conditional were removed, we would suggest this, which is
294
+ // not what the user probably wants.
295
+ //
296
+ // <[u32] as `IntoIter<u32>`>::Item
297
+ //
298
+ // We know that the user actually wants to wrap the whole thing in a code
299
+ // block, but the only reason we know that is because `u32` does not, in
300
+ // fact, implement IntoIter. If the example looks like this:
301
+ //
302
+ // <[Vec<i32>] as IntoIter<i32>::Item
303
+ //
304
+ // The ideal fix would be significantly different.
305
+ if ( generics_start > 0 && dox. as_bytes ( ) [ generics_start - 1 ] == b'<' )
306
+ || ( generics_end < dox. len ( ) && dox. as_bytes ( ) [ generics_end] == b'>' )
307
+ {
308
+ diag. emit ( ) ;
309
+ return ;
310
+ }
234
311
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
235
312
diag. multipart_suggestion (
236
313
"try marking as source code" ,
0 commit comments