From 7875ada79cea1194dc9e15acee36ed0700be70e6 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 15 Jun 2022 19:28:54 +0200 Subject: Add docs on sanitizing urls to autolink --- src/construct/autolink.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/construct') diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 2682878..78003fb 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -38,6 +38,23 @@ //!

https://example.com/alpha%20bravo

//! ``` //! +//! There are several cases where incorrect encoding of URLs would, in other +//! languages, result in a parse error. +//! In markdown, there are no errors, and URLs are normalized. +//! In addition, unicode characters are percent encoded +//! ([`sanitize_uri`][sanitize_uri]). +//! For example: +//! +//! ```markdown +//! +//! ``` +//! +//! Yields: +//! +//! ```html +//!

https://a👍b%

+//! ``` +//! //! Interestingly, there are a couple of things that are valid autolinks in //! markdown but in HTML would be valid tags, such as `` and //! ``. @@ -73,6 +90,7 @@ //! [text]: crate::content::text //! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX //! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX +//! [sanitize_uri]: crate::util::sanitize_uri //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! //! -- cgit