Rollup merge of #107655 - notriddle:notriddle/small-url-encode, r=GuillaumeGomez

rustdoc: use the same URL escape rules for fragments as for examples

Carries over improvements from #107284
This commit is contained in:
Dylan DPC 2023-02-09 23:18:34 +05:30 committed by GitHub
commit 188dd72b5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 71 additions and 81 deletions

View file

@ -458,7 +458,7 @@
//! [`Result`] of a collection of each contained value of the original
//! [`Result`] values, or [`Err`] if any of the elements was [`Err`].
//!
//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA%2C%20E%3E%3E-for-Result%3CV%2C%20E%3E
//! [impl-FromIterator]: Result#impl-FromIterator%3CResult%3CA,+E%3E%3E-for-Result%3CV,+E%3E
//!
//! ```
//! let v = [Ok(2), Ok(4), Err("err!"), Ok(8)];
@ -474,8 +474,8 @@
//! to provide the [`product`][Iterator::product] and
//! [`sum`][Iterator::sum] methods.
//!
//! [impl-Product]: Result#impl-Product%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E
//! [impl-Sum]: Result#impl-Sum%3CResult%3CU%2C%20E%3E%3E-for-Result%3CT%2C%20E%3E
//! [impl-Product]: Result#impl-Product%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E
//! [impl-Sum]: Result#impl-Sum%3CResult%3CU,+E%3E%3E-for-Result%3CT,+E%3E
//!
//! ```
//! let v = [Err("error!"), Ok(1), Ok(2), Ok(3), Err("foo")];

View file

@ -46,6 +46,7 @@
use crate::html::format::Buffer;
use crate::html::highlight;
use crate::html::length_limit::HtmlWithLimit;
use crate::html::render::small_url_encode;
use crate::html::toc::TocBuilder;
use pulldown_cmark::{
@ -294,47 +295,7 @@ fn next(&mut self) -> Option<Self::Item> {
doctest::make_test(&test, krate, false, &Default::default(), edition, None);
let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
// These characters don't need to be escaped in a URI.
// See https://url.spec.whatwg.org/#query-percent-encode-set
// and https://url.spec.whatwg.org/#urlencoded-parsing
// and https://url.spec.whatwg.org/#url-code-points
fn dont_escape(c: u8) -> bool {
(b'a' <= c && c <= b'z')
|| (b'A' <= c && c <= b'Z')
|| (b'0' <= c && c <= b'9')
|| c == b'-'
|| c == b'_'
|| c == b'.'
|| c == b','
|| c == b'~'
|| c == b'!'
|| c == b'\''
|| c == b'('
|| c == b')'
|| c == b'*'
|| c == b'/'
|| c == b';'
|| c == b':'
|| c == b'?'
// As described in urlencoded-parsing, the
// first `=` is the one that separates key from
// value. Following `=`s are part of the value.
|| c == b'='
}
let mut test_escaped = String::new();
for b in test.bytes() {
if dont_escape(b) {
test_escaped.push(char::from(b));
} else if b == b' ' {
// URL queries are decoded with + replaced with SP
test_escaped.push('+');
} else if b == b'%' {
test_escaped.push('%');
test_escaped.push('%');
} else {
write!(test_escaped, "%{:02X}", b).unwrap();
}
}
let test_escaped = small_url_encode(test);
Some(format!(
r#"<a class="test-arrow" target="_blank" href="{}?code={}{}&amp;edition={}">Run</a>"#,
url, test_escaped, channel, edition,

View file

@ -38,7 +38,7 @@
use std::collections::VecDeque;
use std::default::Default;
use std::fmt;
use std::fmt::{self, Write};
use std::fs;
use std::iter::Peekable;
use std::path::PathBuf;
@ -2020,31 +2020,60 @@ fn get_associated_constants(
.collect::<Vec<_>>()
}
// The point is to url encode any potential character from a type with genericity.
fn small_url_encode(s: String) -> String {
pub(crate) fn small_url_encode(s: String) -> String {
// These characters don't need to be escaped in a URI.
// See https://url.spec.whatwg.org/#query-percent-encode-set
// and https://url.spec.whatwg.org/#urlencoded-parsing
// and https://url.spec.whatwg.org/#url-code-points
fn dont_escape(c: u8) -> bool {
(b'a' <= c && c <= b'z')
|| (b'A' <= c && c <= b'Z')
|| (b'0' <= c && c <= b'9')
|| c == b'-'
|| c == b'_'
|| c == b'.'
|| c == b','
|| c == b'~'
|| c == b'!'
|| c == b'\''
|| c == b'('
|| c == b')'
|| c == b'*'
|| c == b'/'
|| c == b';'
|| c == b':'
|| c == b'?'
// As described in urlencoded-parsing, the
// first `=` is the one that separates key from
// value. Following `=`s are part of the value.
|| c == b'='
}
let mut st = String::new();
let mut last_match = 0;
for (idx, c) in s.char_indices() {
let escaped = match c {
'<' => "%3C",
'>' => "%3E",
' ' => "%20",
'?' => "%3F",
'\'' => "%27",
'&' => "%26",
',' => "%2C",
':' => "%3A",
';' => "%3B",
'[' => "%5B",
']' => "%5D",
'"' => "%22",
_ => continue,
};
for (idx, b) in s.bytes().enumerate() {
if dont_escape(b) {
continue;
}
st += &s[last_match..idx];
st += escaped;
// NOTE: we only expect single byte characters here - which is fine as long as we
// only match single byte characters
if last_match != idx {
// Invariant: `idx` must be the first byte in a character at this point.
st += &s[last_match..idx];
}
if b == b' ' {
// URL queries are decoded with + replaced with SP.
// While the same is not true for hashes, rustdoc only needs to be
// consistent with itself when encoding them.
st += "+";
} else if b == b'%' {
st += "%%";
} else {
write!(st, "%{:02X}", b).unwrap();
}
// Invariant: if the current byte is not at the start of a multi-byte character,
// we need to get down here so that when the next turn of the loop comes around,
// last_match winds up equalling idx.
//
// In other words, dont_escape must always return `false` in multi-byte character.
last_match = idx + 1;
}

View file

@ -21,8 +21,8 @@
// 'pub trait Trait<const N: usize>'
// @has - '//*[@id="impl-Trait%3C1%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<1> for u8'
// @has - '//*[@id="impl-Trait%3C2%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<2> for u8'
// @has - '//*[@id="impl-Trait%3C{1%20+%202}%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8%3B%20N%5D"]//h3[@class="code-header"]' \
// @has - '//*[@id="impl-Trait%3C%7B1+%2B+2%7D%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8;+N%5D"]//h3[@class="code-header"]' \
// 'impl<const N: usize> Trait<N> for [u8; N]'
pub trait Trait<const N: usize> {}
impl Trait<1> for u8 {}
@ -47,7 +47,7 @@ pub fn hey<const N: usize>(&self) -> Bar<u8, N> {
}
}
// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8%2C%20M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8,+M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
impl<const M: usize> Bar<u8, M> {
// @has - '//*[@id="method.hey"]' \
// 'pub fn hey<const N: usize>(&self) -> Foo<N>where u8: Trait<N>'

View file

@ -9,20 +9,20 @@ pub enum Order {
}
// @has foo/struct.VSet.html '//pre[@class="rust item-decl"]' 'pub struct VSet<T, const ORDER: Order>'
// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
pub struct VSet<T, const ORDER: Order> {
inner: Vec<T>,
}
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3ASorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Sorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
impl<T> VSet<T, { Order::Sorted }> {
pub fn new() -> Self {
Self { inner: Vec::new() }
}
}
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3AUnsorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Unsorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
impl<T> VSet<T, { Order::Unsorted }> {
pub fn new() -> Self {
Self { inner: Vec::new() }
@ -31,7 +31,7 @@ pub fn new() -> Self {
pub struct Escape<const S: &'static str>;
// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr#%22%3Cscript%3Ealert(%22Escape%22)%3B%3C/script%3E%22#%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr%23%22%3Cscript%3Ealert(%22Escape%22);%3C/script%3E%22%23%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
impl Escape<r#"<script>alert("Escape");</script>"#> {
pub fn f() {}
}

View file

@ -7,5 +7,5 @@ fn foo() {}
pub struct Bar;
// @has foo/struct.Bar.html
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe%20extern%20%22C%22%20fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe+extern+%22C%22+fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
impl Foo<unsafe extern "C" fn()> for Bar {}

View file

@ -6,13 +6,13 @@
pub trait Foo {}
// @has foo/trait.Foo.html
// @has - '//section[@id="impl-Foo-for-(T%2C)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
// @has - '//section[@id="impl-Foo-for-(T,)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
#[doc(fake_variadic)]
impl<T> Foo for (T,) {}
pub trait Bar {}
// @has foo/trait.Bar.html
// @has - '//section[@id="impl-Bar-for-(U%2C)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
// @has - '//section[@id="impl-Bar-for-(U,)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
#[doc(fake_variadic)]
impl<U: Foo> Bar for (U,) {}

View file

@ -7,8 +7,8 @@
// @has - '//h2[@id="foreign-impls"]' 'Implementations on Foreign Types'
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-u32"]' 'u32'
// @has - '//*[@id="impl-Foo-for-u32"]//h3[@class="code-header"]' 'impl Foo for u32'
// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-%26%27a%20str"]' "&'a str"
// @has - '//*[@id="impl-Foo-for-%26%27a%20str"]//h3[@class="code-header"]' "impl<'a> Foo for &'a str"
// @has - "//*[@class=\"sidebar-elems\"]//section//a[@href=\"#impl-Foo-for-%26'a+str\"]" "&'a str"
// @has - "//*[@id=\"impl-Foo-for-%26'a+str\"]//h3[@class=\"code-header\"]" "impl<'a> Foo for &'a str"
pub trait Foo {}
impl Foo for u32 {}

View file

@ -7,7 +7,7 @@ pub trait SomeTrait<Rhs = Self>
}
// @has 'foo/trait.SomeTrait.html'
// @has - "//*[@id='impl-SomeTrait%3C(A%2C%20B%2C%20C%2C%20D%2C%20E)%3E-for-(A%2C%20B%2C%20C%2C%20D%2C%20E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
// @has - "//*[@id='impl-SomeTrait%3C(A,+B,+C,+D,+E)%3E-for-(A,+B,+C,+D,+E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)
where
A: PartialOrd<A> + PartialEq<A>,