Skip to content

Commit 05a267f

Browse files
Rollup merge of #101731 - notriddle:notriddle/more-improved-html-check, r=GuillaumeGomez
rustdoc: improve rustdoc HTML suggestions handling of nested generics Based on some poor suggestions produced when stablizing this lint and running it on `manformed-generics.rs` in #101720
2 parents bbe23e8 + 84ca399 commit 05a267f

6 files changed

+330
-13
lines changed

src/librustdoc/passes/html_tags.rs

+80-3
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
9494
if current_pos == end_pos { None } else { Some(current_pos) }
9595
}
9696

97+
fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
98+
use rustc_lexer::{is_id_continue, is_id_start};
99+
let mut current_pos = start_pos;
100+
loop {
101+
if current_pos < text.len() && text[current_pos..].starts_with("::") {
102+
current_pos += 2;
103+
} else {
104+
break;
105+
}
106+
let mut chars = text[current_pos..].chars();
107+
if let Some(c) = chars.next() {
108+
if is_id_start(c) {
109+
current_pos += c.len_utf8();
110+
} else {
111+
break;
112+
}
113+
}
114+
while let Some(c) = chars.next() {
115+
if is_id_continue(c) {
116+
current_pos += c.len_utf8();
117+
} else {
118+
break;
119+
}
120+
}
121+
}
122+
if current_pos == start_pos { None } else { Some(current_pos) }
123+
}
124+
97125
fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
98126
// https://spec.commonmark.org/0.30/#raw-html
99127
//
@@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
218246
// If a tag looks like `<this>`, it might actually be a generic.
219247
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
220248
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
221-
if let Some(Some(generics_start)) = (is_open_tag
222-
&& dox[..range.end].ends_with('>'))
249+
let mut generics_end = range.end;
250+
if let Some(Some(mut generics_start)) = (is_open_tag
251+
&& dox[..generics_end].ends_with('>'))
223252
.then(|| extract_path_backwards(&dox, range.start))
224253
{
254+
while generics_start != 0
255+
&& generics_end < dox.len()
256+
&& dox.as_bytes()[generics_start - 1] == b'<'
257+
&& dox.as_bytes()[generics_end] == b'>'
258+
{
259+
generics_end += 1;
260+
generics_start -= 1;
261+
if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
262+
generics_start = new_start;
263+
}
264+
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
265+
generics_end = new_end;
266+
}
267+
}
268+
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
269+
generics_end = new_end;
270+
}
225271
let generics_sp = match super::source_span_for_markdown_range(
226272
tcx,
227273
&dox,
228-
&(generics_start..range.end),
274+
&(generics_start..generics_end),
229275
&item.attrs,
230276
) {
231277
Some(sp) => sp,
232278
None => item.attr_span(tcx),
233279
};
280+
// Sometimes, we only extract part of a path. For example, consider this:
281+
//
282+
// <[u32] as IntoIter<u32>>::Item
283+
// ^^^^^ unclosed HTML tag `u32`
284+
//
285+
// We don't have any code for parsing fully-qualified trait paths.
286+
// In theory, we could add it, but doing it correctly would require
287+
// parsing the entire path grammar, which is problematic because of
288+
// overlap between the path grammar and Markdown.
289+
//
290+
// The example above shows that ambiguity. Is `[u32]` intended to be an
291+
// intra-doc link to the u32 primitive, or is it intended to be a slice?
292+
//
293+
// If the below conditional were removed, we would suggest this, which is
294+
// not what the user probably wants.
295+
//
296+
// <[u32] as `IntoIter<u32>`>::Item
297+
//
298+
// We know that the user actually wants to wrap the whole thing in a code
299+
// block, but the only reason we know that is because `u32` does not, in
300+
// fact, implement IntoIter. If the example looks like this:
301+
//
302+
// <[Vec<i32>] as IntoIter<i32>::Item
303+
//
304+
// The ideal fix would be significantly different.
305+
if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
306+
|| (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
307+
{
308+
diag.emit();
309+
return;
310+
}
234311
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
235312
diag.multipart_suggestion(
236313
"try marking as source code",

src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs

+42
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,48 @@ pub struct ConstGeneric;
88
// HTML tags cannot contain commas, so no error.
99
pub struct MultipleGenerics;
1010

11+
/// This <[u32] as Iterator<Item>> thing!
12+
//~^ERROR unclosed HTML tag `Item`
13+
// Some forms of fully-qualified path are simultaneously valid HTML tags
14+
// with attributes. They produce an error, but no suggestion, because figuring
15+
// out if this is valid would require parsing the entire path grammar.
16+
//
17+
// The important part is that we don't produce any *wrong* suggestions.
18+
// While several other examples below are added to make sure we don't
19+
// produce suggestions when given complex paths, this example is the actual
20+
// reason behind not just using the real path parser. It's ambiguous: there's
21+
// no way to locally reason out whether that `[u32]` is intended to be a slice
22+
// or an intra-doc link.
23+
pub struct FullyQualifiedPathsDoNotCount;
24+
25+
/// This <Vec as IntoIter>::Iter thing!
26+
//~^ERROR unclosed HTML tag `Vec`
27+
// Some forms of fully-qualified path are simultaneously valid HTML tags
28+
// with attributes. They produce an error, but no suggestion, because figuring
29+
// out if this is valid would require parsing the entire path grammar.
30+
pub struct FullyQualifiedPathsDoNotCount1;
31+
32+
/// This Vec<Vec as IntoIter>::Iter thing!
33+
//~^ERROR unclosed HTML tag `Vec`
34+
// Some forms of fully-qualified path are simultaneously valid HTML tags
35+
// with attributes. They produce an error, but no suggestion, because figuring
36+
// out if this is valid would require parsing the entire path grammar.
37+
pub struct FullyQualifiedPathsDoNotCount2;
38+
39+
/// This Vec<Vec as IntoIter> thing!
40+
//~^ERROR unclosed HTML tag `Vec`
41+
// Some forms of fully-qualified path are simultaneously valid HTML tags
42+
// with attributes. They produce an error, but no suggestion, because figuring
43+
// out if this is valid would require parsing the entire path grammar.
44+
pub struct FullyQualifiedPathsDoNotCount3;
45+
46+
/// This Vec<Vec<i32> as IntoIter> thing!
47+
//~^ERROR unclosed HTML tag `i32`
48+
// Some forms of fully-qualified path are simultaneously valid HTML tags
49+
// with attributes. They produce an error, but no suggestion, because figuring
50+
// out if this is valid would require parsing the entire path grammar.
51+
pub struct FullyQualifiedPathsDoNotCount4;
52+
1153
/// This Vec<i32 class="test"> thing!
1254
//~^ERROR unclosed HTML tag `i32`
1355
// HTML attributes shouldn't be treated as Rust syntax, so no suggestions.
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,68 @@
1-
error: unclosed HTML tag `i32`
2-
--> $DIR/html-as-generics-no-suggestions.rs:11:13
1+
error: unclosed HTML tag `Item`
2+
--> $DIR/html-as-generics-no-suggestions.rs:11:28
33
|
4-
LL | /// This Vec<i32 class="test"> thing!
5-
| ^^^^
4+
LL | /// This <[u32] as Iterator<Item>> thing!
5+
| ^^^^^^
66
|
77
note: the lint level is defined here
88
--> $DIR/html-as-generics-no-suggestions.rs:1:9
99
|
1010
LL | #![deny(rustdoc::invalid_html_tags)]
1111
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
1212

13+
error: unclosed HTML tag `Vec`
14+
--> $DIR/html-as-generics-no-suggestions.rs:25:10
15+
|
16+
LL | /// This <Vec as IntoIter>::Iter thing!
17+
| ^^^^
18+
19+
error: unclosed HTML tag `Vec`
20+
--> $DIR/html-as-generics-no-suggestions.rs:32:13
21+
|
22+
LL | /// This Vec<Vec as IntoIter>::Iter thing!
23+
| ^^^^
24+
25+
error: unclosed HTML tag `Vec`
26+
--> $DIR/html-as-generics-no-suggestions.rs:39:13
27+
|
28+
LL | /// This Vec<Vec as IntoIter> thing!
29+
| ^^^^
30+
31+
error: unclosed HTML tag `i32`
32+
--> $DIR/html-as-generics-no-suggestions.rs:46:17
33+
|
34+
LL | /// This Vec<Vec<i32> as IntoIter> thing!
35+
| ^^^^^
36+
37+
error: unclosed HTML tag `i32`
38+
--> $DIR/html-as-generics-no-suggestions.rs:53:13
39+
|
40+
LL | /// This Vec<i32 class="test"> thing!
41+
| ^^^^
42+
1343
error: unopened HTML tag `i32`
14-
--> $DIR/html-as-generics-no-suggestions.rs:20:13
44+
--> $DIR/html-as-generics-no-suggestions.rs:62:13
1545
|
1646
LL | /// This Vec</i32> thing!
1747
| ^^^^^^
1848

1949
error: unclosed HTML tag `i32`
20-
--> $DIR/html-as-generics-no-suggestions.rs:25:13
50+
--> $DIR/html-as-generics-no-suggestions.rs:67:13
2151
|
2252
LL | /// This 123<i32> thing!
2353
| ^^^^^
2454

2555
error: unclosed HTML tag `i32`
26-
--> $DIR/html-as-generics-no-suggestions.rs:30:14
56+
--> $DIR/html-as-generics-no-suggestions.rs:72:14
2757
|
2858
LL | /// This Vec:<i32> thing!
2959
| ^^^^^
3060

3161
error: unclosed HTML tag `i32`
32-
--> $DIR/html-as-generics-no-suggestions.rs:35:39
62+
--> $DIR/html-as-generics-no-suggestions.rs:77:39
3363
|
3464
LL | /// This [link](https://rust-lang.org)<i32> thing!
3565
| ^^^^^
3666

37-
error: aborting due to 5 previous errors
67+
error: aborting due to 10 previous errors
3868

src/test/rustdoc-ui/suggestions/html-as-generics.fixed

+40
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,43 @@ pub struct BareTurbofish;
3030
//~^ERROR unclosed HTML tag `i32`
3131
//~|HELP try marking as source
3232
pub struct Nested;
33+
34+
/// Nested generics `Vec<Vec<u32>>`
35+
//~^ ERROR unclosed HTML tag `u32`
36+
//~|HELP try marking as source
37+
pub struct NestedGenerics;
38+
39+
/// Generics with path `Vec<i32>::Iter`
40+
//~^ ERROR unclosed HTML tag `i32`
41+
//~|HELP try marking as source
42+
pub struct GenericsWithPath;
43+
44+
/// Generics with path `<Vec<i32>>::Iter`
45+
//~^ ERROR unclosed HTML tag `i32`
46+
//~|HELP try marking as source
47+
pub struct NestedGenericsWithPath;
48+
49+
/// Generics with path `Vec<Vec<i32>>::Iter`
50+
//~^ ERROR unclosed HTML tag `i32`
51+
//~|HELP try marking as source
52+
pub struct NestedGenericsWithPath2;
53+
54+
/// Generics with bump `<Vec<i32>>`s
55+
//~^ ERROR unclosed HTML tag `i32`
56+
//~|HELP try marking as source
57+
pub struct NestedGenericsWithBump;
58+
59+
/// Generics with bump `Vec<Vec<i32>>`s
60+
//~^ ERROR unclosed HTML tag `i32`
61+
//~|HELP try marking as source
62+
pub struct NestedGenericsWithBump2;
63+
64+
/// Generics with punct `<Vec<i32>>`!
65+
//~^ ERROR unclosed HTML tag `i32`
66+
//~|HELP try marking as source
67+
pub struct NestedGenericsWithPunct;
68+
69+
/// Generics with punct `Vec<Vec<i32>>`!
70+
//~^ ERROR unclosed HTML tag `i32`
71+
//~|HELP try marking as source
72+
pub struct NestedGenericsWithPunct2;

src/test/rustdoc-ui/suggestions/html-as-generics.rs

+40
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,43 @@ pub struct BareTurbofish;
3030
//~^ERROR unclosed HTML tag `i32`
3131
//~|HELP try marking as source
3232
pub struct Nested;
33+
34+
/// Nested generics Vec<Vec<u32>>
35+
//~^ ERROR unclosed HTML tag `u32`
36+
//~|HELP try marking as source
37+
pub struct NestedGenerics;
38+
39+
/// Generics with path Vec<i32>::Iter
40+
//~^ ERROR unclosed HTML tag `i32`
41+
//~|HELP try marking as source
42+
pub struct GenericsWithPath;
43+
44+
/// Generics with path <Vec<i32>>::Iter
45+
//~^ ERROR unclosed HTML tag `i32`
46+
//~|HELP try marking as source
47+
pub struct NestedGenericsWithPath;
48+
49+
/// Generics with path Vec<Vec<i32>>::Iter
50+
//~^ ERROR unclosed HTML tag `i32`
51+
//~|HELP try marking as source
52+
pub struct NestedGenericsWithPath2;
53+
54+
/// Generics with bump <Vec<i32>>s
55+
//~^ ERROR unclosed HTML tag `i32`
56+
//~|HELP try marking as source
57+
pub struct NestedGenericsWithBump;
58+
59+
/// Generics with bump Vec<Vec<i32>>s
60+
//~^ ERROR unclosed HTML tag `i32`
61+
//~|HELP try marking as source
62+
pub struct NestedGenericsWithBump2;
63+
64+
/// Generics with punct <Vec<i32>>!
65+
//~^ ERROR unclosed HTML tag `i32`
66+
//~|HELP try marking as source
67+
pub struct NestedGenericsWithPunct;
68+
69+
/// Generics with punct Vec<Vec<i32>>!
70+
//~^ ERROR unclosed HTML tag `i32`
71+
//~|HELP try marking as source
72+
pub struct NestedGenericsWithPunct2;

0 commit comments

Comments
 (0)