From 2379c2749916483be68dbf816a4c56cd59ced958 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 15 Aug 2022 18:22:40 +0200
Subject: Refactor to proof docs, grammars

---
 build.rs                               |   4 +-
 src/compiler.rs                        | 155 +++++++++++++++++++--------------
 src/constant.rs                        |   3 +-
 src/construct/attention.rs             |  31 ++++---
 src/construct/autolink.rs              |  52 +++++++----
 src/construct/blank_line.rs            |  35 ++++++--
 src/construct/block_quote.rs           |  23 +++--
 src/construct/character_escape.rs      |  19 ++--
 src/construct/character_reference.rs   |  27 +++---
 src/construct/code_fenced.rs           |  73 +++++++++-------
 src/construct/code_indented.rs         |  44 ++++++----
 src/construct/code_text.rs             |  32 +++----
 src/construct/definition.rs            |  42 +++++----
 src/construct/document.rs              |   4 +-
 src/construct/hard_break_escape.rs     |  29 +++---
 src/construct/heading_atx.rs           |  32 ++++---
 src/construct/heading_setext.rs        |  40 ++++++---
 src/construct/html_flow.rs             |  48 +++++-----
 src/construct/html_text.rs             |  35 ++++----
 src/construct/label_end.rs             |   2 +-
 src/construct/list_item.rs             |   2 +-
 src/construct/mod.rs                   |  82 ++++++++++++++---
 src/construct/partial_data.rs          |   2 +-
 src/construct/partial_whitespace.rs    |   6 +-
 src/event.rs                           |  14 ++-
 src/parser.rs                          |   2 +-
 src/resolve.rs                         |  46 +++++++++-
 src/state.rs                           |   8 +-
 src/subtokenize.rs                     |  57 ++++++------
 src/tokenizer.rs                       |  90 +++++++++----------
 src/unicode.rs                         |   4 +-
 src/util/decode_character_reference.rs |  10 +--
 src/util/edit_map.rs                   |   4 +-
 src/util/encode.rs                     |   5 +-
 src/util/mod.rs                        |   2 +-
 src/util/normalize_identifier.rs       |  30 +++----
 src/util/sanitize_uri.rs               |  10 ++-
 src/util/skip.rs                       |   2 +-
 src/util/slice.rs                      |   9 +-
 39 files changed, 689 insertions(+), 426 deletions(-)
diff --git a/build.rs b/build.rs
index 1a3549b..7e68124 100644
--- a/build.rs
+++ b/build.rs
@@ -122,9 +122,9 @@ async fn punctuation() {
         .collect::<Vec<_>>();
 
     let doc = format!(
-        "//! Information on Unicode.
+        "//! Info on Unicode.
 
-/// List of characters that are considered punctuation according to Unicode.
+/// List of characters that are considered punctuation.
 ///
 /// > 👉 **Important**: this module is generated by `build.rs`.
 /// > It is generate from the latest Unicode data.
diff --git a/src/compiler.rs b/src/compiler.rs
index f2af8f1..db0df9b 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -19,26 +19,29 @@ struct Media {
     /// Whether this represents an image (`true`) or a link or definition
     /// (`false`).
     image: bool,
-    /// The text between the brackets (`x` in `![x]()` and `[x]()`), as an
-    /// identifier, meaning that the original source characters are used
-    /// instead of interpreting them.
+    /// The text between the brackets (`x` in `![x]()` and `[x]()`).
+    ///
     /// Not interpreted.
     label_id: Option<(usize, usize)>,
-    /// The text between the brackets (`x` in `![x]()` and `[x]()`), as
-    /// interpreted content.
-    /// When this is a link, it can contain further text content and thus HTML
+    /// The result of interpreting the text between the brackets
+    /// (`x` in `![x]()` and `[x]()`).
+    ///
+    /// When this is a link, it contains further text content and thus HTML
     /// tags.
     /// Otherwise, when an image, text content is also allowed, but resulting
     /// tags are ignored.
     label: Option<String>,
-    /// The text between the explicit brackets of the reference (`y` in
+    /// The string between the explicit brackets of the reference (`y` in
     /// `[x][y]`), as content.
+    ///
     /// Not interpreted.
     reference_id: Option<(usize, usize)>,
     /// The destination (url).
+    ///
     /// Interpreted string content.
     destination: Option<String>,
     /// The destination (url).
+    ///
     /// Interpreted string content.
     title: Option<String>,
 }
@@ -46,10 +49,14 @@ struct Media {
 /// Representation of a definition.
 #[derive(Debug)]
 struct Definition {
+    /// Identifier.
+    id: String,
     /// The destination (url).
+    ///
     /// Interpreted string content.
     destination: Option<String>,
     /// The title.
+    ///
     /// Interpreted string content.
     title: Option<String>,
 }
@@ -58,32 +65,55 @@ struct Definition {
 #[allow(clippy::struct_excessive_bools)]
 #[derive(Debug)]
 struct CompileContext<'a> {
-    /// Static info.
+    // Static info.
+    /// List of events.
     pub events: &'a [Event],
+    /// List of bytes.
     pub bytes: &'a [u8],
-    /// Fields used by handlers to track the things they need to track to
-    /// compile markdown.
-    pub atx_opening_sequence_size: Option<usize>,
+    // Fields used by handlers to track the things they need to track to
+    // compile markdown.
+    /// Rank of heading (atx).
+    pub heading_atx_rank: Option<usize>,
+    /// Buffer of heading (setext) text.
     pub heading_setext_buffer: Option<String>,
+    /// Whether code (flow) contains data.
     pub code_flow_seen_data: Option<bool>,
+    /// Number of code (fenced) fenced.
     pub code_fenced_fences_count: Option<usize>,
+    /// Whether we are in code (text).
     pub code_text_inside: bool,
+    /// Whether we are in image text.
+    pub image_alt_inside: bool,
+    /// Marker of character reference.
     pub character_reference_marker: Option<u8>,
-    pub expect_first_item: Option<bool>,
+    /// Whether we are expecting the first list item marker.
+    pub list_expect_first_marker: Option<bool>,
+    /// Stack of media (link, image).
     pub media_stack: Vec<Media>,
-    pub definitions: Vec<(String, Definition)>,
+    /// Stack of containers.
     pub tight_stack: Vec<bool>,
-    /// Fields used to influance the current compilation.
+    /// List of definitions.
+    pub definitions: Vec<Definition>,
+    // Fields used to influance the current compilation.
+    /// Ignore the next line ending.
     pub slurp_one_line_ending: bool,
-    pub in_image_alt: bool,
+    /// Whether to encode HTML.
     pub encode_html: bool,
-    /// Configuration
+    // Configuration
+    /// Whether to sanitize `href`s, and in which case, which protocols to
+    /// allow.
     pub protocol_href: Option<Vec<&'static str>>,
+    /// Whether to sanitize `src`s, and in which case, which protocols to
+    /// allow.
     pub protocol_src: Option<Vec<&'static str>>,
+    /// Line ending to use.
     pub line_ending_default: LineEnding,
+    /// Whether to allow HTML.
     pub allow_dangerous_html: bool,
-    /// Intermediate results.
+    // Intermediate results.
+    /// Stack of buffers.
     pub buffers: Vec<String>,
+    /// Current event index.
     pub index: usize,
 }
 
@@ -98,18 +128,18 @@ impl<'a> CompileContext<'a> {
         CompileContext {
             events,
             bytes,
-            atx_opening_sequence_size: None,
+            heading_atx_rank: None,
             heading_setext_buffer: None,
             code_flow_seen_data: None,
             code_fenced_fences_count: None,
             code_text_inside: false,
             character_reference_marker: None,
-            expect_first_item: None,
+            list_expect_first_marker: None,
             media_stack: vec![],
             definitions: vec![],
             tight_stack: vec![],
             slurp_one_line_ending: false,
-            in_image_alt: false,
+            image_alt_inside: false,
             encode_html: true,
             protocol_href: if options.allow_dangerous_protocol {
                 None
@@ -258,7 +288,7 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String {
         .to_string()
 }
 
-// Handle the event at `index`.
+/// Handle the event at `index`.
 fn handle(context: &mut CompileContext, index: usize) {
     context.index = index;
 
@@ -389,7 +419,7 @@ fn on_enter_code_fenced(context: &mut CompileContext) {
 /// Handle [`Enter`][Kind::Enter]:[`CodeText`][Name::CodeText].
 fn on_enter_code_text(context: &mut CompileContext) {
     context.code_text_inside = true;
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<code>");
     }
     context.buffer();
@@ -416,7 +446,7 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) {
 
 /// Handle [`Enter`][Kind::Enter]:[`Emphasis`][Name::Emphasis].
 fn on_enter_emphasis(context: &mut CompileContext) {
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<em>");
     }
 }
@@ -446,7 +476,7 @@ fn on_enter_image(context: &mut CompileContext) {
         destination: None,
         title: None,
     });
-    context.in_image_alt = true; // Disallow tags.
+    context.image_alt_inside = true; // Disallow tags.
 }
 
 /// Handle [`Enter`][Kind::Enter]:[`Link`][Name::Link].
@@ -556,21 +586,19 @@ fn on_enter_list(context: &mut CompileContext) {
     } else {
         "<ul"
     });
-    context.expect_first_item = Some(true);
+    context.list_expect_first_marker = Some(true);
 }
 
 /// Handle [`Enter`][Kind::Enter]:[`ListItemMarker`][Name::ListItemMarker].
 fn on_enter_list_item_marker(context: &mut CompileContext) {
-    let expect_first_item = context.expect_first_item.take().unwrap();
-
-    if expect_first_item {
+    if context.list_expect_first_marker.take().unwrap() {
         context.push(">");
     }
 
     context.line_ending_if_needed();
 
     context.push("<li>");
-    context.expect_first_item = Some(false);
+    context.list_expect_first_marker = Some(false);
 }
 
 /// Handle [`Enter`][Kind::Enter]:[`Paragraph`][Name::Paragraph].
@@ -599,7 +627,7 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) {
 
 /// Handle [`Enter`][Kind::Enter]:[`Strong`][Name::Strong].
 fn on_enter_strong(context: &mut CompileContext) {
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<strong>");
     }
 }
@@ -612,7 +640,7 @@ fn on_exit_autolink_email(context: &mut CompileContext) {
     );
     let value = slice.as_str();
 
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<a href=\"");
         context.push(&sanitize_uri(
             &format!("mailto:{}", value),
@@ -623,7 +651,7 @@ fn on_exit_autolink_email(context: &mut CompileContext) {
 
     context.push(&encode(value, context.encode_html));
 
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("</a>");
     }
 }
@@ -636,7 +664,7 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) {
     );
     let value = slice.as_str();
 
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<a href=\"");
         context.push(&sanitize_uri(value, &context.protocol_href));
         context.push("\">");
@@ -644,14 +672,14 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) {
 
     context.push(&encode(value, context.encode_html));
 
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("</a>");
     }
 }
 
 /// Handle [`Exit`][Kind::Exit]:{[`HardBreakEscape`][Name::HardBreakEscape],[`HardBreakTrailing`][Name::HardBreakTrailing]}.
 fn on_exit_break(context: &mut CompileContext) {
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("<br />");
     }
 }
@@ -748,11 +776,6 @@ fn on_exit_code_fenced_fence_info(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:{[`CodeFenced`][Name::CodeFenced],[`CodeIndented`][Name::CodeIndented]}.
 fn on_exit_code_flow(context: &mut CompileContext) {
-    let seen_data = context
-        .code_flow_seen_data
-        .take()
-        .expect("`code_flow_seen_data` must be defined");
-
     // One special case is if we are inside a container, and the fenced code was
     // not closed (meaning it runs to the end).
     // In that case, the following line ending, is considered *outside* the
@@ -772,7 +795,11 @@ fn on_exit_code_flow(context: &mut CompileContext) {
 
     // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
     // line ending when needed.
-    if seen_data {
+    if context
+        .code_flow_seen_data
+        .take()
+        .expect("`code_flow_seen_data` must be defined")
+    {
         context.line_ending_if_needed();
     }
 
@@ -814,7 +841,7 @@ fn on_exit_code_text(context: &mut CompileContext) {
     context.code_text_inside = false;
     context.push(str::from_utf8(bytes).unwrap());
 
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("</code>");
     }
 }
@@ -846,13 +873,11 @@ fn on_exit_definition(context: &mut CompileContext) {
     let id =
         normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str());
 
-    context.definitions.push((
+    context.definitions.push(Definition {
         id,
-        Definition {
-            destination: media.destination,
-            title: media.title,
-        },
-    ));
+        destination: media.destination,
+        title: media.title,
+    });
 }
 
 /// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString].
@@ -878,7 +903,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Emphasis].
 fn on_exit_emphasis(context: &mut CompileContext) {
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("</em>");
     }
 }
@@ -886,9 +911,9 @@ fn on_exit_emphasis(context: &mut CompileContext) {
 /// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx].
 fn on_exit_heading_atx(context: &mut CompileContext) {
     let rank = context
-        .atx_opening_sequence_size
+        .heading_atx_rank
         .take()
-        .expect("`atx_opening_sequence_size` must be set in headings");
+        .expect("`heading_atx_rank` must be set in headings");
 
     context.push("</h");
     context.push(&rank.to_string());
@@ -898,14 +923,14 @@ fn on_exit_heading_atx(context: &mut CompileContext) {
 /// Handle [`Exit`][Kind::Exit]:[`HeadingAtxSequence`][Name::HeadingAtxSequence].
 fn on_exit_heading_atx_sequence(context: &mut CompileContext) {
     // First fence we see.
-    if context.atx_opening_sequence_size.is_none() {
+    if context.heading_atx_rank.is_none() {
         let rank = Slice::from_position(
             context.bytes,
             &Position::from_exit_event(context.events, context.index),
         )
         .len();
         context.line_ending_if_needed();
-        context.atx_opening_sequence_size = Some(rank);
+        context.heading_atx_rank = Some(rank);
         context.push("<h");
         context.push(&rank.to_string());
         context.push(">");
@@ -930,7 +955,7 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) {
     let text = context
         .heading_setext_buffer
         .take()
-        .expect("`atx_opening_sequence_size` must be set in headings");
+        .expect("`heading_atx_rank` must be set in headings");
     let head = Slice::from_position(
         context.bytes,
         &Position::from_exit_event(context.events, context.index),
@@ -1034,9 +1059,7 @@ fn on_exit_list_item(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue].
 fn on_exit_list_item_value(context: &mut CompileContext) {
-    let expect_first_item = context.expect_first_item.unwrap();
-
-    if expect_first_item {
+    if context.list_expect_first_marker.unwrap() {
         let slice = Slice::from_position(
             context.bytes,
             &Position::from_exit_event(context.events, context.index),
@@ -1066,11 +1089,11 @@ fn on_exit_media(context: &mut CompileContext) {
         index += 1;
     }
 
-    context.in_image_alt = is_in_image;
+    context.image_alt_inside = is_in_image;
 
     let media = context.media_stack.pop().unwrap();
     let label = media.label.unwrap();
-    let in_image_alt = context.in_image_alt;
+    let image_alt_inside = context.image_alt_inside;
     let id = media.reference_id.or(media.label_id).map(|indices| {
         normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str())
     });
@@ -1080,7 +1103,7 @@ fn on_exit_media(context: &mut CompileContext) {
             let mut index = 0;
 
             while index < context.definitions.len() {
-                if context.definitions[index].0 == id {
+                if context.definitions[index].id == id {
                     return Some(index);
                 }
 
@@ -1093,7 +1116,7 @@ fn on_exit_media(context: &mut CompileContext) {
         None
     };
 
-    if !in_image_alt {
+    if !image_alt_inside {
         if media.image {
             context.push("<img src=\"");
         } else {
@@ -1101,7 +1124,7 @@ fn on_exit_media(context: &mut CompileContext) {
         };
 
         let destination = if let Some(index) = definition_index {
-            context.definitions[index].1.destination.as_ref()
+            context.definitions[index].destination.as_ref()
         } else {
             media.destination.as_ref()
         };
@@ -1126,11 +1149,11 @@ fn on_exit_media(context: &mut CompileContext) {
         context.push(&label);
     }
 
-    if !in_image_alt {
+    if !image_alt_inside {
         context.push("\"");
 
         let title = if let Some(index) = definition_index {
-            context.definitions[index].1.title.clone()
+            context.definitions[index].title.clone()
         } else {
             media.title
         };
@@ -1151,7 +1174,7 @@ fn on_exit_media(context: &mut CompileContext) {
     if !media.image {
         context.push(&label);
 
-        if !in_image_alt {
+        if !image_alt_inside {
             context.push("</a>");
         }
     }
@@ -1192,7 +1215,7 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Strong].
 fn on_exit_strong(context: &mut CompileContext) {
-    if !context.in_image_alt {
+    if !context.image_alt_inside {
         context.push("</strong>");
     }
 }
diff --git a/src/constant.rs b/src/constant.rs
index 47cb50c..b856fd0 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -254,8 +254,7 @@ pub const THEMATIC_BREAK_MARKER_COUNT_MIN: usize = 3;
 
 // Important: please touch the below lists as few times as possible to keep Git small.
 
-/// List of names that can form named [character reference][character_reference]s
-/// and corresponding values.
+/// List of names and values that form named [character reference][character_reference]s.
 ///
 /// This list is sensitive to casing.
 ///
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 1dc8868..21407b7 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -1,22 +1,29 @@
-//! Attention is a construct that occurs in the [text][] content type.
+//! Attention (emphasis and strong) occurs in the [text][] content type.
 //!
-//! How attention parses is too complex to explain in BNF.
-//! Essentially, one or more of `*` or `_` form attention sequences.
-//! Depending on the code before and after a sequence, it can open or close
-//! attention.
-//! When everything is parsed, we find each sequence that can close, and a
-//! corresponding sequence that can open which uses the same marker.
-//! If both sequences have two or more markers, strong is formed.
-//! Otherwise emphasis is formed.
+//! ## Grammar
 //!
-//! Attention sequences do not, on their own, relate to anything in HTML.
-//! When matched with another sequence, and two markers can be “taken” from
-//! them, they together relate to the `<strong>` element in HTML.
+//! Attention sequences form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! attention_sequence ::= 1*'*' | 1*'_'
+//! ```
+//!
+//! Sequences are matched together to form attention based on which character
+//! they contain, and what character occurs before and after each sequence.
+//! Otherwise they are turned into data.
+//!
+//! ## HTML
+//!
+//! When sequences match, and two markers can be “taken” from them, they
+//! together relate to the `<strong>` element in HTML.
 //! When one marker can be taken, they relate to the `<em>` element.
 //! See [*§ 4.5.2 The `em` element*][html-em] and
 //! [*§ 4.5.3 The `strong` element*][html-strong] in the HTML spec for more
 //! info.
 //!
+//! ## Recommendation
+//!
 //! It is recommended to use asterisks for attention when writing markdown.
 //!
 //! There are some small differences in whether sequences can open and/or close
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 37e21d9..9890aaf 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -1,22 +1,24 @@
-//! Autolinks are a construct that occurs in the [text][] content type.
+//! Autolinks occur in the [text][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! Autolinks form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! autolink ::= '<' ( url | email ) '>'
+//! autolink ::= '<' (url | email) '>'
+//!
+//! url ::= protocol *url_byte
+//! protocol ::= ascii_alphabetic 0*31(protocol_byte) ':'
+//! protocol_byte ::= '+' '-' '.' ascii_alphanumeric
+//! url_byte ::= byte - ascii_control - ' '
 //!
-//! url ::= ascii_alphabetic 0*31( '+' '-' '.' ascii_alphanumeric ) ':' *( code - ascii_control - '\r' - '\n' - ' ')
-//! email ::= 1*ascii_atext '@' domain *('.' domain)
+//! email ::= 1*ascii_atext '@' email_domain *('.' email_domain)
 //! ; Restriction: up to (including) 63 character are allowed in each domain.
-//! domain ::= ascii_alphanumeric *( ascii_alphanumeric | '-' ascii_alphanumeric )
-//! ascii_atext ::= ascii_alphanumeric | '#' .. '\'' | '*' | '+' | '-' | '/' | '=' | '?' | '^' .. '`' | '{' .. '~'
-//! ```
+//! email_domain ::= ascii_alphanumeric *(ascii_alphanumeric | '-' ascii_alphanumeric)
 //!
-//! Autolinks relate to the `<a>` element in HTML.
-//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
-//! When an email autolink is used (so, without a protocol), the string
-//! `mailto:` is prepended before the email, when generating the `href`
-//! attribute of the hyperlink.
+//! ascii_atext ::= ascii_alphanumeric | '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '/' | '=' | '?' | '^' | '_' | '`' | '{' | '|' | '}' | '~'
+//! ```
 //!
 //! The maximum allowed size of a scheme is `31` (inclusive), which is defined
 //! in [`AUTOLINK_SCHEME_SIZE_MAX`][autolink_scheme_size_max].
@@ -41,7 +43,7 @@
 //! There are several cases where incorrect encoding of URLs would, in other
 //! languages, result in a parse error.
 //! In markdown, there are no errors, and URLs are normalized.
-//! In addition, unicode characters are percent encoded
+//! In addition, many characters are percent encoded
 //! ([`sanitize_uri`][sanitize_uri]).
 //! For example:
 //!
@@ -82,6 +84,22 @@
 //! <p><a href="#"></a><a href="https://example.com">https://example.com</a></p>
 //! ```
 //!
+//! ## HTML
+//!
+//! Autolinks relate to the `<a>` element in HTML.
+//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info.
+//! When an email autolink is used (so, without a protocol), the string
+//! `mailto:` is prepended before the email, when generating the `href`
+//! attribute of the hyperlink.
+//!
+//! ## Recommendation
+//!
+//! It is recommended to use labels ([label start link][label_start_link],
+//! [label end][label_end]), either with a resource or a definition
+//! ([definition][]), instead of autolinks, as those allow more characters in
+//! URLs, and allow relative URLs and `www.` URLs.
+//! They also allow for descriptive text to explain the URL in prose.
+//!
 //! ## Tokens
 //!
 //! *   [`Autolink`][Name::Autolink]
@@ -95,11 +113,13 @@
 //! *   [*§ 6.4 Autolinks* in `CommonMark`](https://spec.commonmark.org/0.30/#autolinks)
 //!
 //! [text]: crate::construct::text
+//! [definition]: crate::construct::definition
+//! [label_start_link]: crate::construct::label_start_link
 //! [label_end]: crate::construct::label_end
 //! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX
 //! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX
 //! [sanitize_uri]: crate::util::sanitize_uri
-//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
 
 use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
 use crate::event::Name;
@@ -293,7 +313,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State {
             tokenizer.tokenize_state.size = 0;
             let index = tokenizer.events.len();
             tokenizer.exit(Name::AutolinkProtocol);
-            // Change the token type.
+            // Change the event name.
             tokenizer.events[index - 1].name = Name::AutolinkEmail;
             tokenizer.events[index].name = Name::AutolinkEmail;
             tokenizer.enter(Name::AutolinkMarker);
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 928b8cc..5be406d 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -1,22 +1,39 @@
-//! Blank lines are a construct that occurs in the [flow][] content type.
+//! Blank lines occur in the [flow][] content type.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Blank lines form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! blank_line ::= *(' ' '\t')
+//! blank_line ::= *space_or_tab
 //! ```
 //!
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
 //! Blank lines are sometimes needed, such as to differentiate a [paragraph][]
 //! from another paragraph.
 //! In several cases, blank lines are not needed between flow constructs,
-//! such as between two [heading (atx)][heading-atx]s.
+//! such as between two [heading (atx)][heading_atx]s.
 //! Sometimes, whether blank lines are present, changes the behavior of how
-//! HTML is rendered, such as whether blank lines are present between list
-//! items in a [list][list-item].
+//! HTML is rendered, such as whether blank lines are present inside or between
+//! [list items][list_item].
 //! More than one blank line is never needed in `CommonMark`.
 //!
 //! Because blank lines can be empty (line endings are not considered part of
-//! it), and events cannot be empty, blank lines are not present as a token.
+//! it), and events cannot be empty, blank lines are not present as an event.
+//!
+//! ## HTML
+//!
+//! Blank lines do not relate an element in HTML, except for the role they play
+//! when inside or between [list items][list_item].
+//!
+//! ## Recommendation
+//!
+//! It is recommended to always use a blank line between every flow construct,
+//! to use blank lines (consistently) between list items as desired, and to
+//! never use more than one blank line.
 //!
 //! ## Tokens
 //!
@@ -27,8 +44,8 @@
 //! *   [`blank-line.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/blank-line.js)
 //! *   [*§ 4.9 Blank lines* in `CommonMark`](https://spec.commonmark.org/0.30/#blank-lines)
 //!
-//! [heading-atx]: crate::construct::heading_atx
-//! [list-item]: crate::construct::list_item
+//! [heading_atx]: crate::construct::heading_atx
+//! [list_item]: crate::construct::list_item
 //! [paragraph]: crate::construct::paragraph
 //! [flow]: crate::construct::flow
 
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 37726c5..8d7e227 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -1,6 +1,9 @@
-//! Block quote is a construct that occurs in the [document][] content type.
+//! Block quotes occur in the [document][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! Block quotes form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! block_quote_start ::= '>' [ space_or_tab ]
@@ -9,14 +12,24 @@
 //!
 //! Further lines that are not prefixed with `block_quote_cont` cause the block
 //! quote to be exited, except when those lines are lazy continuation.
-//! Like so many things in markdown, block quotes too, are very complex.
-//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing
-//! details.
+//! Like so many things in markdown, block quotes too, are complex.
+//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark-block] for
+//! more on parsing details.
+//!
+//! As block quote is a container, it takes several bytes from the start of the
+//! line, while the rest of the line includes more containers or flow.
+//!
+//! ## HTML
 //!
 //! Block quote relates to the `<blockquote>` element in HTML.
 //! See [*§ 4.4.4 The `blockquote` element*][html-blockquote] in the HTML spec
 //! for more info.
 //!
+//! ## Recommendation
+//!
+//! Always use a single space after a block quote marker (`>`).
+//! Never use lazy continuation.
+//!
 //! ## Tokens
 //!
 //! *   [`BlockQuote`][Name::BlockQuote]
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 6dac458..438092e 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -1,7 +1,9 @@
-//! Character escapes are a construct that occurs in the [string][] and
-//! [text][] content types.
+//! Character escapes occur in the [string][] and [text][] content types.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Character escapes form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! character_escape ::= '\\' ascii_punctuation
@@ -10,13 +12,20 @@
 //! Like much of markdown, there are no “invalid” character escapes: just a
 //! slash, or a slash followed by anything other than an ASCII punctuation
 //! character, is exactly that: just a slash.
-//! To escape (most) arbitrary characters, use a
-//! [character reference][character_reference] instead
+//!
+//! To escape (almost all) arbitrary characters instead of only ASCII
+//! punctuation, use a [character reference][character_reference] instead
 //! (as in, `&amp;`, `&#123;`, or say `&#x9;`).
+//!
 //! It is also possible to escape a line ending in text with a similar
 //! construct: a [hard break (escape)][hard_break_escape] is a backslash followed
 //! by a line ending (that is part of the construct instead of ending it).
 //!
+//! ## Recommendation
+//!
+//! If possible, use a character escape.
+//! Otherwise, use a character reference.
+//!
 //! ## Tokens
 //!
 //! *   [`CharacterEscape`][Name::CharacterEscape]
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 7935109..3bdc636 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -1,25 +1,27 @@
-//! Character references are a construct that occurs in the [string][] and
-//! [text][] content types.
+//! Character references occur in the [string][] and [text][] content types.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Character references form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! character_reference ::= '&' (numeric | named) ';'
 //!
 //! numeric ::= '#' (hexadecimal | decimal)
-//! ; Note: Limit of `6` imposed as all bigger numbers are invalid:
+//! ; Note: Limit of `6` imposed, as all bigger numbers are invalid.
 //! hexadecimal ::= ('x' | 'X') 1*6(ascii_hexdigit)
-//! ; Note: Limit of `7` imposed as all bigger numbers are invalid:
+//! ; Note: Limit of `7` imposed, as all bigger numbers are invalid.
 //! decimal ::= 1*7(ascii_digit)
-//! ; Note: Limit of `31` imposed by `CounterClockwiseContourIntegral`:
+//! ; Note: Limit of `31` imposed, for `CounterClockwiseContourIntegral`.
 //! ; Note: Limited to any known named character reference (see `constants.rs`)
 //! named ::= 1*31(ascii_alphanumeric)
 //! ```
 //!
 //! Like much of markdown, there are no “invalid” character references.
 //! However, for security reasons, several numeric character references parse
-//! fine but are not rendered as their corresponding character and they are
-//! instead replaced by a U+FFFD REPLACEMENT CHARACTER (`�`).
+//! fine but are not rendered as their corresponding character.
+//! They are instead replaced by a U+FFFD REPLACEMENT CHARACTER (`�`).
 //! See [`decode_numeric`][decode_numeric] for more info.
 //!
 //! To escape ASCII punctuation characters, use the terser
@@ -33,13 +35,18 @@
 //!
 //! Character references are parsed insensitive to casing.
 //! The casing of hexadecimal numeric character references has no effect.
-//! The casing of named character references does not matter when parsing them,
-//! but does affect whether they match.
+//! The casing of named character references does not matter when parsing, but
+//! does affect whether they match.
 //! Depending on the name, one or more cases are allowed, such as that `AMP`
 //! and `amp` are both allowed but other cases are not.
 //! See [`CHARACTER_REFERENCES`][character_references] for which
 //! names match.
 //!
+//! ## Recommendation
+//!
+//! If possible, use a character escape.
+//! Otherwise, use a character reference.
+//!
 //! ## Tokens
 //!
 //! *   [`CharacterReference`][Name::CharacterReference]
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 3812d44..748e38f 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -1,9 +1,12 @@
-//! Code (fenced) is a construct that occurs in the [flow][] content type.
+//! Code (fenced) occurs in the [flow][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! Code (fenced) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! code_fenced ::= fence_open *( eol *code ) [ eol fence_close ]
+//! code_fenced ::= fence_open *( eol *byte ) [ eol fence_close ]
 //!
 //! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab
 //! ; Restriction: the number of markers in the closing fence sequence must be
@@ -13,41 +16,53 @@
 //! ; marker in the opening fence sequence
 //! fence_close ::= sequence *space_or_tab
 //! sequence ::= 3*'`' | 3*'~'
+//! ; Restriction: the `` ` `` character cannot occur in `info` if it is the marker.
 //! info ::= 1*text
+//! ; Restriction: the `` ` `` character cannot occur in `meta` if it is the marker.
 //! meta ::= 1*text *( *space_or_tab 1*text )
-//!
-//! ; Restriction: the `` ` `` character cannot occur in `text` if it is the
-//! ; marker of the opening fence sequence.
-//! text ::= code - eol - space_or_tab
-//! eol ::= '\r' | '\r\n' | '\n'
-//! space_or_tab ::= ' ' | '\t'
-//! code ::= . ; any unicode code point (other than line endings).
 //! ```
 //!
-//! The above grammar does not show how whitespace is handled.
-//! To parse code (fenced), let `X` be the number of whitespace characters
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
+//! The above grammar does not show how indentation (with `space_or_tab`) of
+//! each line is handled.
+//! To parse code (fenced), let `x` be the number of `space_or_tab` characters
 //! before the opening fence sequence.
 //! Each line of text is then allowed (not required) to be indented with up
-//! to `X` spaces or tabs, which are then ignored as an indent instead of being
+//! to `x` spaces or tabs, which are then ignored as an indent instead of being
 //! considered as part of the code.
 //! This indent does not affect the closing fence.
 //! It can be indented up to a separate 3 spaces or tabs.
 //! A bigger indent makes it part of the code instead of a fence.
 //!
-//! Code (fenced) relates to both the `<pre>` and the `<code>` elements in
-//! HTML.
-//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
-//! element*][html-code] in the HTML spec for more info.
+//! The `info` and `meta` parts are interpreted as the [string][] content type.
+//! That means that [character escapes][character_escape] and
+//! [character references][character_reference] are allowed.
 //!
 //! The optional `meta` part is ignored: it is not used when parsing or
 //! rendering.
+//!
 //! The optional `info` part is used and is expected to specify the programming
 //! language that the code is in.
 //! Which value it holds depends on what your syntax highlighter supports, if
 //! one is used.
+//!
+//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! [text][] content type.
+//! It is also possible to create code with the
+//! [code (indented)][code_indented] construct.
+//!
+//! ## HTML
+//!
+//! Code (fenced) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
+//! element*][html_code] in the HTML spec for more info.
+//!
 //! The `info` is, when rendering to HTML, typically exposed as a class.
 //! This behavior stems from the HTML spec ([*§ 4.5.15 The `code`
-//! element*][html-code]).
+//! element*][html_code]).
 //! For example:
 //!
 //! ```markdown
@@ -63,17 +78,11 @@
 //! </code></pre>
 //! ```
 //!
-//! The `info` and `meta` parts are interpreted as the [string][] content type.
-//! That means that [character escapes][character_escape] and
-//! [character references][character_reference] are allowed.
+//! ## Recommendation
 //!
-//! In markdown, it is also possible to use [code (text)][code_text] in the
-//! [text][] content type.
-//! It is also possible to create code with the
-//! [code (indented)][code_indented] construct.
-//! That construct is less explicit, different from code (text), and has no
-//! support for specifying the programming language, so it is recommended to
-//! use code (fenced) instead of code (indented).
+//! It is recommended to use code (fenced) instead of code (indented).
+//! Code (fenced) is more explicit, similar to code (text), and has support
+//! for specifying the programming language.
 //!
 //! ## Tokens
 //!
@@ -94,12 +103,12 @@
 //! [flow]: crate::construct::flow
 //! [string]: crate::construct::string
 //! [text]: crate::construct::text
-//! [code_indented]: crate::construct::code_indented
-//! [code_text]: crate::construct::code_text
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
-//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
-//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//! [code_indented]: crate::construct::code_indented
+//! [code_text]: crate::construct::code_text
+//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 
 use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
 use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index e3a5333..89c5652 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -1,30 +1,38 @@
-//! Code (indented) is a construct that occurs in the [flow][] content type.
+//! Code (indented) occurs in the [flow][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! Code (indented) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! code_indented ::= indented_filled_line *( eol *( blank_line eol ) indented_filled_line )
+//! code_indented ::= filled_line *( eol *( blank_line eol ) filled_line )
 //!
-//! ; Restriction: at least one `code` must not be whitespace.
-//! indented_filled_line ::= 4space_or_tab *code
+//! ; Restriction: at least one `line` byte must be `text`.
+//! filled_line ::= 4(space_or_tab) *line
 //! blank_line ::= *space_or_tab
-//! eol ::= '\r' | '\r\n' | '\n'
-//! code ::= . ; any unicode code point (other than line endings).
-//! space_or_tab ::= ' ' | '\t'
 //! ```
 //!
-//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
-//! HTML.
-//! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
-//! element*][html-code] in the HTML spec for more info.
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
 //!
 //! In markdown, it is also possible to use [code (text)][code_text] in the
 //! [text][] content type.
 //! It is also possible to create code with the [code (fenced)][code_fenced]
 //! construct.
-//! That construct is more explicit, more similar to code (text), and has
-//! support for specifying the programming language that the code is in, so it
-//! is recommended to use that instead of indented code.
+//!
+//! ## HTML
+//!
+//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
+//! HTML.
+//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
+//! element*][html_code] in the HTML spec for more info.
+//!
+//! ## Recommendation
+//!
+//! It is recommended to use code (fenced) instead of code (indented).
+//! Code (fenced) is more explicit, similar to code (text), and has support
+//! for specifying the programming language.
 //!
 //! ## Tokens
 //!
@@ -40,10 +48,10 @@
 //!
 //! [flow]: crate::construct::flow
 //! [text]: crate::construct::text
-//! [code_text]: crate::construct::code_text
 //! [code_fenced]: crate::construct::code_fenced
-//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
-//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//! [code_text]: crate::construct::code_text
+//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 
 use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
 use crate::constant::TAB_SIZE;
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 7ebee96..413b5ee 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -1,12 +1,16 @@
-//! Code (text) is a construct that occurs in the [text][] content type.
+//! Code (text) occurs in the [text][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! Code (text) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! ; Restriction: the number of markers in the closing sequence must be equal
 //! ; to the number of markers in the opening sequence.
-//! code_text ::= sequence 1*code sequence
+//! code_text ::= sequence 1*byte sequence
 //!
+//! ; Restriction: not preceded or followed by `` ` ``.
 //! sequence ::= 1*'`'
 //! ```
 //!
@@ -18,15 +22,13 @@
 //! Include more: `a``b` or include less: ``a`b``.
 //! ```
 //!
-//! When turning markdown into HTML, each line ending is turned into a space.
-//!
 //! It is also possible to include just one grave accent (tick):
 //!
 //! ```markdown
 //! Include just one: `` ` ``.
 //! ```
 //!
-//! Sequences are “gready”, in that they cannot be preceded or succeeded by
+//! Sequences are “gready”, in that they cannot be preceded or followed by
 //! more grave accents (ticks).
 //! To illustrate:
 //!
@@ -53,17 +55,17 @@
 //! if both exist and there is also a non-space in the code, are removed.
 //! Line endings, at that stage, are considered as spaces.
 //!
-//! Code (text) relates to the `<code>` element in HTML.
-//! See [*§ 4.5.15 The `code` element*][html-code] in the HTML spec for more
-//! info.
-//!
 //! In markdown, it is possible to create code with the
 //! [code (fenced)][code_fenced] or [code (indented)][code_indented] constructs
 //! in the [flow][] content type.
-//! Compared to code (indented), fenced code is more explicit and more similar
-//! to code (text), and it has support for specifying the programming language
-//! that the code is in, so it is recommended to use that instead of indented
-//! code.
+//!
+//! ## HTML
+//!
+//! Code (text) relates to the `<code>` element in HTML.
+//! See [*§ 4.5.15 The `code` element*][html_code] in the HTML spec for more
+//! info.
+//!
+//! When turning markdown into HTML, each line ending is turned into a space.
 //!
 //! ## Tokens
 //!
@@ -81,7 +83,7 @@
 //! [text]: crate::construct::text
 //! [code_indented]: crate::construct::code_indented
 //! [code_fenced]: crate::construct::code_fenced
-//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 
 use crate::event::Name;
 use crate::state::{Name as StateName, State};
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 8f274ee..071e595 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -1,31 +1,29 @@
-//! Definition is a construct that occurs in the [flow] content type.
+//! Definition occurs in the [flow] content type.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Definition forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! definition ::= label ':' [ whitespace ] destination [ whitespace title ] [ space_or_tab ]
+//! definition ::= label ':' [ space_or_tab_eol ] destination [ space_or_tab_eol title ] [ space_or_tab ]
 //!
 //! ; See the `destination`, `title`, and `label` constructs for the BNF of
 //! ; those parts.
 //! ```
 //!
-//! See [`destination`][destination], [`label`][label], and [`title`][title]
-//! for grammar, notes, and recommendations.
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
 //!
-//! Definitions in markdown do not, on their own, relate to anything in HTML.
-//! When matched with a [label end (reference)][label_end], they together
-//! relate to the `<a>` or `<img>` elements in HTML.
-//! The definition forms its `href` or `src`, and optionally `title`,
-//! attributes.
-//! See [*§ 4.5.1 The `a` element*][html-a] and
-//! [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info.
+//! See [`destination`][destination], [`label`][label], and [`title`][title]
+//! for grammar, notes, and recommendations on each part.
 //!
 //! The `destination`, `label`, and `title` parts are interpreted as the
 //! [string][] content type.
 //! That means that [character escapes][character_escape] and
 //! [character references][character_reference] are allowed.
 //!
-//! Definitions match to references through their label.
+//! Definitions match to references through identifiers.
 //! To match, both labels must be equal after normalizing with
 //! [`normalize_identifier`][normalize_identifier].
 //! One definition can match to multiple references.
@@ -57,6 +55,16 @@
 //! `<img>` when compiling, see
 //! [`sanitize_uri`][sanitize_uri].
 //!
+//! ## HTML
+//!
+//! Definitions in markdown do not, on their own, relate to anything in HTML.
+//! When matched with a [label end (reference)][label_end], they together
+//! relate to the `<a>` or `<img>` elements in HTML.
+//! The definition forms its `href` or `src`, and optionally `title`,
+//! attributes.
+//! See [*§ 4.5.1 The `a` element*][html_a] and
+//! [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info.
+//!
 //! ## Tokens
 //!
 //! *   [`Definition`][Name::Definition]
@@ -84,14 +92,14 @@
 //! [string]: crate::construct::string
 //! [character_escape]: crate::construct::character_escape
 //! [character_reference]: crate::construct::character_reference
-//! [label_end]: crate::construct::label_end
 //! [destination]: crate::construct::partial_destination
-//! [title]: crate::construct::partial_title
 //! [label]: crate::construct::partial_label
+//! [label_end]: crate::construct::label_end
+//! [title]: crate::construct::partial_title
 //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri
 //! [normalize_identifier]: crate::util::normalize_identifier
-//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
-//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
+//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
 
 use crate::construct::partial_space_or_tab::space_or_tab;
 use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;
diff --git a/src/construct/document.rs b/src/construct/document.rs
index 9def6c5..0cda368 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -6,7 +6,7 @@
 //! The constructs found in flow are:
 //!
 //! *   [Block quote][crate::construct::block_quote]
-//! *   [List][crate::construct::list_item]
+//! *   [List item][crate::construct::list_item]
 
 use crate::event::{Content, Event, Kind, Link, Name};
 use crate::state::{Name as StateName, State};
@@ -409,7 +409,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
                 }
             }
 
-            debug_assert!(found, "expected to find container token to exit");
+            debug_assert!(found, "expected to find container event to exit");
         }
 
         if let Some(ref mut list) = tokenizer.tokenize_state.document_exits[index] {
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 1fafa0b..64c909a 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -1,28 +1,33 @@
-//! Hard break (escape) is a construct that occurs in the  [text][] content
-//! type.
+//! Hard break (escape) occurs in the  [text][] content type.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Hard break (escape) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! ; Restriction: followed by a line ending  (that is part of the construct
+//! ; Restriction: followed by a line ending  (that is part of the content
 //! ; instead of ending it).
 //! hard_break_escape ::= '\\'
 //! ```
 //!
-//! Hard breaks in markdown relate to the HTML element `<br>`.
-//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
-//!
 //! It is also possible to create a hard break with a
 //! [hard break (trailing)][hard_break_trailing].
-//! That construct is not recommended because trailing spaces are typically
-//! invisible in editors, or even automatically removed, making them hard to
-//! use.
 //!
-//! It is also possible to escape punctuation characters with a similar
+//! Punctuation characters can be escaped with a similar
 //! construct: a [character escape][character_escape] is a backslash followed
 //! by an ASCII punctuation character.
 //! Arbitrary characters can be escaped with
-//! [character reference][character_reference]s.
+//! [character references][character_reference].
+//!
+//! ## HTML
+//!
+//! Hard breaks in markdown relate to the HTML element `<br>`.
+//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info.
+//!
+//! ## Recommendation
+//!
+//! Always use hard break (escape), never hard break (trailing).
 //!
 //! ## Tokens
 //!
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 3bcff54..960ae32 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -1,17 +1,16 @@
-//! Heading (atx) is a construct that occurs in the [flow] content type.
+//! Heading (atx) occurs in the [flow][] content type.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
 //!
-//! ```bnf
-//! heading_atx ::= 1*6'#' [ 1*space_or_tab text [ 1*space_or_tab 1*'#' ] ] *space_or_tab
+//! Heading (atx) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
-//! text ::= code - eol
-//! space_or_tab ::= ' ' | '\t'
+//! ```bnf
+//! heading_atx ::= 1*6'#' [ 1*space_or_tab line [ 1*space_or_tab 1*'#' ] ] *space_or_tab
 //! ```
 //!
-//! Headings in markdown relate to the `<h1>` through `<h6>` elements in HTML.
-//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
-//! HTML spec][html] for more info.
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
 //!
 //! `CommonMark` introduced the requirement on whitespace existing after the
 //! opening sequence and before text.
@@ -25,16 +24,25 @@
 //! [hard break (escape)][hard_break_escape]).
 //! However, their limit is that they cannot form `<h3>` through `<h6>`
 //! headings.
-//! Due to this limitation, it is recommended to use atx headings.
 //!
 //! > 🏛 **Background**: the word *setext* originates from a small markup
 //! > language by Ian Feldman from 1991.
-//! > See [*§ Setext* on Wikipedia][wiki-setext] for more info.
+//! > See [*§ Setext* on Wikipedia][wiki_setext] for more info.
 //! > The word *atx* originates from a tiny markup language by Aaron Swartz
 //! > from 2002.
 //! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
 //! > more info.
 //!
+//! ## HTML
+//!
+//! Headings in markdown relate to the `<h1>` through `<h6>` elements in HTML.
+//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
+//! HTML spec][html] for more info.
+//!
+//! ## Recommendation
+//!
+//! Always use heading (atx), never heading (setext).
+//!
 //! ## Tokens
 //!
 //! *   [`HeadingAtx`][Name::HeadingAtx]
@@ -51,7 +59,7 @@
 //! [heading_setext]: crate::construct::heading_setext
 //! [hard_break_escape]: crate::construct::hard_break_escape
 //! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
-//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
+//! [wiki_setext]: https://en.wikipedia.org/wiki/Setext
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
 use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 043104a..bad781c 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -1,20 +1,21 @@
-//! Heading (setext) is a construct that occurs in the [flow] content type.
+//! Heading (setext) occurs in the [flow][] content type.
 //!
-//! They’re formed with the following BNF:
+//! ## Grammar
+//!
+//! Heading (setext) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
-//! heading_setext ::= line *(eol line) eol whitespace_optional (1*'-' | 1*'=') whitespace_optional
+//! heading_setext ::= paragraph eol *space_or_tab (1*'-' | 1*'=')  *space_or_tab
 //!
-//! whitespace ::= 1*space_or_tab
-//! whitespace_optional ::= [ whitespace ]
-//! line ::= code - eol
-//! eol ::= '\r' | '\r\n' | '\n'
+//! ; See the `paragraph` construct for the BNF of that part.
 //! ```
 //!
-//! Heading (setext) in markdown relates to the `<h1>` and `<h2>` elements in
-//! HTML.
-//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
-//! HTML spec][html] for more info.
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
+//! See [`paragraph`][paragraph] for grammar, notes, and recommendations on
+//! that part.
 //!
 //! In markdown, it is also possible to create headings with a
 //! [heading (atx)][heading_atx] construct.
@@ -23,7 +24,6 @@
 //! [hard break (escape)][hard_break_escape]).
 //! However, their limit is that they cannot form `<h3>` through `<h6>`
 //! headings.
-//! Due to this limitation, it is recommended to use atx headings.
 //!
 //! [Thematic breaks][thematic_break] formed with dashes and without whitespace
 //! could be interpreted as a heading (setext).
@@ -32,12 +32,23 @@
 //!
 //! > 🏛 **Background**: the word *setext* originates from a small markup
 //! > language by Ian Feldman from 1991.
-//! > See [*§ Setext* on Wikipedia][wiki-setext] for more info.
+//! > See [*§ Setext* on Wikipedia][wiki_setext] for more info.
 //! > The word *atx* originates from a tiny markup language by Aaron Swartz
 //! > from 2002.
 //! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
 //! > more info.
 //!
+//! ## HTML
+//!
+//! Heading (setext) in markdown relates to the `<h1>` and `<h2>` elements in
+//! HTML.
+//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
+//! HTML spec][html] for more info.
+//!
+//! ## Recommendation
+//!
+//! Always use heading (atx), never heading (setext).
+//!
 //! ## Tokens
 //!
 //! *   [`HeadingSetext`][Name::HeadingSetext]
@@ -50,11 +61,12 @@
 //! *   [*§ 4.3 Setext headings* in `CommonMark`](https://spec.commonmark.org/0.30/#setext-headings)
 //!
 //! [flow]: crate::construct::flow
+//! [paragraph]: crate::construct::paragraph
 //! [heading_atx]: crate::construct::heading_atx
 //! [thematic_break]: crate::construct::thematic_break
 //! [hard_break_escape]: crate::construct::hard_break_escape
 //! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
-//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
+//! [wiki_setext]: https://en.wikipedia.org/wiki/Setext
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
 use crate::constant::TAB_SIZE;
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 38e33f8..bd41aa9 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -1,38 +1,38 @@
-//! HTML (flow) is a construct that occurs in the [flow][] cont&ent type.
+//! HTML (flow) occurs in the [flow][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! HTML (flow) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! html_flow ::= raw | comment | instruction | declaration | cdata | basic | complete
 //!
-//! ; Note: closing tag name need to match opening tag name.
-//! raw ::= '<' raw_tag_name [ [ ( whitespace | '>' ) *line ] *( eol *line ) ] [ '</' raw_tag_name *line ]
-//! comment ::= '<!--' [ *'-' '>' *line | *line *( eol *line ) [ '-->' *line ] ]
-//! instruction ::= '<?' [ '>' *line | *line *( eol *line ) [ '?>' *line ] ]
-//! declaration ::= '<!' ascii_alphabetic *line *( eol *line ) [ '>' *line ]
-//! cdata ::= '<![CDATA[' *line *( eol *line ) [ ']]>' *line ]
-//! basic ::= '< [ '/' ] basic_tag_name [ [ '/' ] '>' *line *( eol 1*line ) ]
-//! complete ::= ( opening_tag | closing_tag ) ( whitespace_optional *( eol 1*line ) | whitespace_optional )
+//! ; Note: closing tag name does not need to match opening tag name.
+//! raw ::= '<' raw_tag_name [[space_or_tab *line | '>' *line] eol] *(*line eol) ['</' raw_tag_name *line]
+//! comment ::= '<!--' [*'-' '>' *line | *line *(eol *line) ['-->' *line]]
+//! instruction ::= '<?' ['>' *line | *line *(eol *line) ['?>' *line]]
+//! declaration ::= '<!' ascii_alphabetic *line *(eol *line) ['>' *line]
+//! cdata ::= '<![CDATA[' *line *(eol *line) [']]>' *line]
+//! basic ::= '< ['/'] basic_tag_name [['/'] '>' *line *(eol 1*line)]
+//! complete ::= (opening_tag | closing_tag) [*space_or_tab *(eol 1*line)]
 //!
 //! raw_tag_name ::= 'pre' | 'script' | 'style' | 'textarea' ; Note: case-insensitive.
 //! basic_tag_name ::= 'address' | 'article' | 'aside' | ... ; See `constants.rs`, and note: case-insensitive.
-//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
-//! closing_tag ::= '</' tag_name whitespace_optional '>'
-//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
-//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
-//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
-//! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" )  "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
-//!
-//! whitespace ::= 1*space_or_tab
-//! whitespace_optional ::= [ whitespace ]
-//! line ::= code - eol
-//! eol ::= '\r' | '\r\n' | '\n'
-//! space_or_tab ::= ' ' | '\t'
+//! opening_tag ::= '<' tag_name *(space_or_tab_eol attribute) [[space_or_tab_eol] '/'] [space_or_tab_eol] '>'
+//! closing_tag ::= '</' tag_name [space_or_tab_eol] '>'
+//! tag_name ::= ascii_alphabetic *('-' | ascii_alphanumeric)
+//! attribute ::= attribute_name [[space_or_tab_eol] '=' [space_or_tab_eol] attribute_value]
+//! attribute_name ::= (':' | '_' | ascii_alphabetic) *('-' | '.' | ':' | '_' | ascii_alphanumeric)
+//! attribute_value ::= '"' *(line - '"') '"' | "'" *(line - "'")  "'" | 1*(text - '"' - "'" - '/' - '<' - '=' - '>' - '`')
 //! ```
 //!
+//! As this construct occurs in flow, like all flow constructs, it must be
+//! followed by an eol (line ending) or eof (end of file).
+//!
 //! The grammar for HTML in markdown does not resemble the rules of parsing
 //! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
-//! spec][html-parsing].
+//! spec][html_parsing].
 //! As such, HTML in markdown *resembles* HTML, but is instead a (naïve?)
 //! attempt to parse an XML-like language.
 //! By extension, another notable property of the grammar is that it can
@@ -96,7 +96,7 @@
 //! [paragraph]: crate::construct::paragraph
 //! [html_raw_names]: crate::constant::HTML_RAW_NAMES
 //! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
-//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
+//! [html_parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
 use crate::constant::{
     HTML_BLOCK_NAMES, HTML_CDATA_PREFIX, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE,
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index fde0847..26eded9 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -1,34 +1,31 @@
-//! HTML (text) is a construct that occurs in the [text][] content type.
+//! HTML (text) occurs in the [text][] content type.
 //!
-//! It forms with the following BNF:
+//! ## Grammar
+//!
+//! HTML (text) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
 //!
 //! ```bnf
 //! html_text ::= comment | instruction | declaration | cdata | tag_close | tag_open
 //!
 //! ; Restriction: the text is not allowed to start with `>`, `->`, or to contain `--`.
-//! comment ::= '<!--' *code '-->'
-//! instruction ::= '<?' *code '?>'
-//! declaration ::= '<!' ascii_alphabetic *code '>'
+//! comment ::= '<!--' *byte '-->'
+//! instruction ::= '<?' *byte '?>'
+//! declaration ::= '<!' ascii_alphabetic *byte '>'
 //! ; Restriction: the text is not allowed to contain `]]`.
-//! cdata ::= '<![CDATA[' *code ']]>'
-//! tag_close ::= '</' tag_name whitespace_optional '>'
-//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
+//! cdata ::= '<![CDATA[' *byte ']]>'
+//! tag_close ::= '</' tag_name [space_or_tab_eol] '>'
+//! opening_tag ::= '<' tag_name *(space_or_tab_eol attribute) [[space_or_tab_eol] '/'] [space_or_tab_eol] '>'
 //!
 //! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
-//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
-//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
-//! attribute_value ::= '"' *( code - '"' ) '"' | "'" *( code - "'" )  "'" | 1*( code - space_or_tab - eol - '"' - "'" - '/' - '<' - '=' - '>' - '`')
-//!
-//! ; Note: blank lines can never occur in `text`.
-//! whitespace ::= 1*space_or_tab | [ *space_or_tab eol *space_or_tab ]
-//! whitespace_optional ::= [ whitespace ]
-//! eol ::= '\r' | '\r\n' | '\n'
-//! space_or_tab ::= ' ' | '\t'
+//! attribute ::= attribute_name [[space_or_tab_eol] '=' [space_or_tab_eol] attribute_value]
+//! attribute_name ::= (':' | '_' | ascii_alphabetic) *('-' | '.' | ':' | '_' | ascii_alphanumeric)
+//! attribute_value ::= '"' *(byte - '"') '"' | "'" *(byte - "'")  "'" | 1*(text - '"' - "'" - '/' - '<' - '=' - '>' - '`')
 //! ```
 //!
 //! The grammar for HTML in markdown does not resemble the rules of parsing
 //! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
-//! spec][html-parsing].
+//! spec][html_parsing].
 //! See the related flow construct [HTML (flow)][html_flow] for more info.
 //!
 //! Because the **tag open** and **tag close** productions in the grammar form
@@ -52,7 +49,7 @@
 //!
 //! [text]: crate::construct::text
 //! [html_flow]: crate::construct::html_flow
-//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
+//! [html_parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
 use crate::constant::HTML_CDATA_PREFIX;
 use crate::construct::partial_space_or_tab::space_or_tab;
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 09716b7..4752639 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -1,4 +1,4 @@
-//! Label end is a construct that occurs in the [text][] conten&t type.
+//! Label end is a construct that occurs in the [text][] content type.
 //!
 //! It forms with the following BNF:
 //!
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index a70906a..09678dd 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -289,7 +289,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
         container.size = prefix;
 
         tokenizer.exit(Name::ListItemPrefix);
-        tokenizer.register_resolver_before(ResolveName::List);
+        tokenizer.register_resolver_before(ResolveName::ListItem);
         State::Ok
     }
 }
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 49868e9..da2f5e8 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -1,17 +1,33 @@
 //! Constructs found in markdown.
 //!
-//! There are several *things* found when parsing markdown, such as, say, a
-//! thematic break.
-//! These things are called constructs here.
-//! Sometimes, there are several constructs that result in an equivalent thing.
-//! For example, [code (fenced)][code_fenced] and
-//! [code (indented)][code_indented] are considered different constructs
+//! Constructs are grouped by content type.
+//! Which content type is allowed somewhere, defines which constructs are
+//! allowed there.
+//!
+//! ## Content type
+//!
+//! The following content types are found in markdown:
+//!
+//! *   [document][]
+//! *   [flow][]
+//! *   [string][]
+//! *   [text][]
 //!
 //! Content types also have a *rest* thing: after all things are parsed,
 //! there’s something left.
+//! In document, that is [flow][].
 //! In flow, that is a [paragraph][].
 //! In string and text, that is [data][partial_data].
 //!
+//! ## Construct
+//!
+//! There are several *things* found when parsing markdown, such as, say, a
+//! thematic break.
+//! These things are called constructs here.
+//! Sometimes, there are several constructs that result in an equivalent thing.
+//! For example, [code (fenced)][code_fenced] and
+//! [code (indented)][code_indented] are considered different constructs.
+//!
 //! The following constructs are found in markdown:
 //!
 //! *   [attention (strong, emphasis)][attention]
@@ -39,7 +55,7 @@
 //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
 //! > [whitespace][partial_whitespace].
 //!
-//! There are also several routines used in different places:
+//! There are also several small subroutines typically used in different places:
 //!
 //! *   [bom][partial_bom]
 //! *   [data][partial_data]
@@ -51,20 +67,60 @@
 //! *   [title][partial_title]
 //! *   [whitespace][partial_whitespace]
 //!
+//! ## Grammar
+//!
 //! Each construct maintained here is explained with a BNF diagram.
+//!
+//! Such diagrams are considered to be *non-normative*.
+//! That is to say, they form illustrative, imperfect, but useful, examples.
+//! The code, in Rust, is considered to be normative.
+//!
 //! For example, the docs for [character escape][character_escape] contain:
 //!
 //! ```bnf
 //! character_escape ::= '\\' ascii_punctuation
 //! ```
 //!
-//! Such diagrams are considered to be *non-normative*.
-//! That is to say, they form illustrative, imperfect, but useful, examples.
-//! The code, in Rust, is considered to be normative.
+//! These diagrams contain references to character group as defined by Rust on
+//! for example [char][], but also often on [u8][], which is what `micromark-rs`
+//! typically works on.
+//! So, for example, `ascii_punctuation` refers to
+//! [`u8::is_ascii_punctuation`][u8::is_ascii_punctuation].
 //!
-//! They also contain references to character as defined by [char][], so for
-//! example `ascii_punctuation` refers to
-//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
+//! For clarity, the productions used throughout are:
+//!
+//! ```bnf
+//! ; Rust / ASCII groups:
+//! ; 'a'..='z'
+//! ascii_lowercase ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z'
+//! ; 'A'..='Z'
+//! ascii_uppercase ::= 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
+//! ; 'A'..='Z', 'a'..='z'
+//! ascii_alphabetic ::= ascii_lowercase | ascii_uppercase
+//! ; '0'..='9'
+//! ascii_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+//! ; '0'..='9'; 'A'..='F', 'a'..='f'
+//! ascii_hexdigit ::= ascii_digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F'
+//! ; '0'..='9'; 'A'..='Z', 'a'..='z'
+//! ascii_alphanumeric ::= ascii_digit | ascii_alphabetic
+//! ; '!'..='/'; ':'..='@'; '['..='`'; '{'..='~'
+//! ascii_punctuation ::= '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~'
+//! ; 0x00..=0x1F; 0x7F
+//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F
+//!
+//! ; Markdown groups:
+//! ; Any byte (u8)
+//! byte ::= 0x00..=0xFFFF
+//! space_or_tab ::= '\t' | ' '
+//! eol ::= '\n' | '\r' | '\r\n'
+//! line ::= byte - eol
+//! text ::= line - space_or_tab
+//! space_or_tab_eol ::= 1*space_or_tab | 0*space_or_tab eol 0*space_or_tab
+//!
+//! ; Unicode groups:
+//! unicode_whitespace ::= ? ; See `char::is_whitespace`.
+//! unicode_punctuation ::= ? ; See `src/unicode.rs`.
+//! ```
 
 pub mod attention;
 pub mod autolink;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 3ffa646..adbfae1 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -1,6 +1,6 @@
 //! Data occurs in [text][] and [string][].
 //!
-//! It can include anything (including line endings), and stops at certain
+//! It can include anything (except for line endings) and stops at certain
 //! characters.
 //!
 //! [string]: crate::construct::string
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index bf06df9..04016cb 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -71,7 +71,7 @@ pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whol
     }
 }
 
-/// Trim a [`Data`][Name::Data] token.
+/// Trim a [`Data`][Name::Data] event.
 fn trim_data(
     tokenizer: &mut Tokenizer,
     exit_index: usize,
@@ -109,7 +109,7 @@ fn trim_data(
         };
 
         // The whole data is whitespace.
-        // We can be very fast: we only change the token types.
+        // We can be very fast: we only change the event names.
         if index == 0 {
             tokenizer.events[exit_index - 1].name = name.clone();
             tokenizer.events[exit_index].name = name;
@@ -157,7 +157,7 @@ fn trim_data(
         }
 
         // The whole data is whitespace.
-        // We can be very fast: we only change the token types.
+        // We can be very fast: we only change the event names.
         if index == slice.bytes.len() {
             tokenizer.events[exit_index - 1].name = Name::SpaceOrTab;
             tokenizer.events[exit_index].name = Name::SpaceOrTab;
diff --git a/src/event.rs b/src/event.rs
index 7f81571..8058d64 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -1,3 +1,5 @@
+//! Semantic labels of things happening.
+
 /// Semantic label of a span.
 #[derive(Clone, Debug, Eq, Hash, PartialEq)]
 pub enum Name {
@@ -1832,7 +1834,7 @@ pub enum Name {
     ThematicBreakSequence,
 }
 
-/// List of void tokens, used to make sure everything is working well.
+/// List of void events, used to make sure everything is working well.
 pub const VOID_EVENTS: [Name; 41] = [
     Name::AttentionSequence,
     Name::AutolinkEmail,
@@ -1891,21 +1893,25 @@ pub enum Content {
 /// Link to another event.
 #[derive(Clone, Debug)]
 pub struct Link {
+    /// Previous event.
     pub previous: Option<usize>,
+    /// Next event.
     pub next: Option<usize>,
+    /// Content type.
     pub content: Content,
 }
 
 /// Place in the document.
 ///
-/// The interface for the location in the document comes from unist `Point`:
-/// <https://github.com/syntax-tree/unist#point>.
+/// The interface for the location in the document comes from unist
+/// [`Point`](https://github.com/syntax-tree/unist#point).
 #[derive(Clone, Debug)]
 pub struct Point {
     /// 1-indexed line number.
     pub line: usize,
     /// 1-indexed column number.
-    /// This is increases up to a tab stop for tabs.
+    ///
+    /// This is increased up to a tab stop for tabs.
     /// Some editors count tabs as 1 character, so this position is not the
     /// same as editors.
     pub column: usize,
diff --git a/src/parser.rs b/src/parser.rs
index cc93021..8b13d45 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,4 +1,4 @@
-//! Turn a string of markdown into events.
+//! Turn bytes of markdown into events.
 
 use crate::event::{Event, Point};
 use crate::state::{Name as StateName, State};
diff --git a/src/resolve.rs b/src/resolve.rs
index 1106880..a62d382 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -1,17 +1,57 @@
+//! Resolve events.
+
 use crate::construct;
 use crate::tokenizer::Tokenizer;
 
-/// Names of functions that resolve.
+/// Names of resolvers.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum Name {
+    /// Resolve labels.
+    ///
+    /// Labels are parsed as starts and ends, and when they match, merged
+    /// together to form media (links and images), and otherwise turned into
+    /// data.
     Label,
+    /// Resolve attention.
+    ///
+    /// Attention sequences are parsed and finally matched together to form
+    /// attention (emphasis and strong) based on which characters they contain,
+    /// and what occurs before and after each sequence.
+    /// Otherwise they are turned into data.
     Attention,
+    /// Resolve heading (atx).
+    ///
+    /// Heading (atx) contains further sequences and data.
+    /// At the end, a final sequence is kept that way, while the rest is merged
+    /// with the data.
     HeadingAtx,
+    /// Resolve heading (setext).
+    ///
+    /// Heading (setext) is parsed as an underline that is preceded by a
+    /// paragraph, both will form the whole construct.
     HeadingSetext,
-    List,
+    /// Resolve list item.
+    ///
+    /// List items are parsed on their own.
+    /// They are wrapped into ordered or unordered lists based on whether items
+    /// with the same marker occur next to each other.
+    ListItem,
+    /// Resolve paragraphs.
+    ///
+    /// Paragraphs are parsed as single line paragraphs, as what remains if
+    /// other flow constructs don’t match.
+    /// But, when they occur next to each other, they need to be merged.
     Paragraph,
+    /// Resolve data.
+    ///
+    /// Data is parsed as many small bits, due to many punctuation characters
+    /// potentially starting something in particularly text content.
+    /// It helps performance to merge them together if those markers did not
+    /// match anything and hence they occur next to each other.
     Data,
+    /// Resolve whitespace in `string`.
     String,
+    /// Resolve whitespace in `text`.
     Text,
 }
 
@@ -22,7 +62,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) {
         Name::Attention => construct::attention::resolve,
         Name::HeadingAtx => construct::heading_atx::resolve,
         Name::HeadingSetext => construct::heading_setext::resolve,
-        Name::List => construct::list_item::resolve,
+        Name::ListItem => construct::list_item::resolve,
         Name::Paragraph => construct::paragraph::resolve,
         Name::Data => construct::partial_data::resolve,
         Name::String => construct::string::resolve,
diff --git a/src/state.rs b/src/state.rs
index aae153f..f9cc39a 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -1,7 +1,9 @@
+//! States of the state machine.
+
 use crate::construct;
 use crate::tokenizer::Tokenizer;
 
-/// The result of a state.
+/// Result of a state.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum State {
     /// Move to [`Name`][] next.
@@ -14,7 +16,7 @@ pub enum State {
     Nok,
 }
 
-/// Names of functions to move to.
+/// Names of states to move to.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 #[allow(clippy::enum_variant_names)]
 pub enum Name {
@@ -296,7 +298,7 @@ pub enum Name {
 }
 
 #[allow(clippy::too_many_lines)]
-/// Call the corresponding function for a state name.
+/// Call the corresponding state for a state name.
 pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
     let func = match name {
         Name::AttentionStart => construct::attention::start,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index f55c790..a031e35 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -1,27 +1,23 @@
 //! Deal with content in other content.
 //!
 //! To deal with content in content, *you* (a `micromark-rs` contributor) add
-//! information on events.
-//! Events are a flat list, but they can be connected to each other by setting
-//! `previous` and `next` links.
-//! These links:
-//!
-//! *   …must occur on [`Enter`][Kind::Enter] events only
-//! *   …must occur on void events (they are followed by their corresponding
-//!     [`Exit`][Kind::Exit] event)
-//! *   …must have `link` field
+//! info on events.
+//! Events are a flat list, but they can be connected to each other with a
+//! [`Link`][crate::event::Link].
+//! Links must occur on [`Enter`][Kind::Enter] events only, which are void
+//! (they are followed by their corresponding [`Exit`][Kind::Exit] event).
 //!
 //! Links will then be passed through a tokenizer for the corresponding content
 //! type by `subtokenize`.
-//! The subevents they result in are split up into slots for each linked token
+//! The subevents they result in are split up into slots for each linked event
 //! and replace those links.
 //!
-//! Subevents are not immediately subtokenized again because markdown prevents
-//! us from doing so due to definitions, which can occur after references, and
-//! thus the whole document needs to be parsed up to the level of definitions,
-//! before any level that can include references can be parsed.
+//! Subevents are not immediately subtokenized as markdown prevents us from
+//! doing so due to definitions, which can occur after references, and thus the
+//! whole document needs to be parsed up to the level of definitions, before
+//! any level that can include references can be parsed.
 
-use crate::event::{Content, Event, Kind};
+use crate::event::{Content, Event, Kind, VOID_EVENTS};
 use crate::parser::ParseState;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
@@ -30,31 +26,42 @@ use crate::util::edit_map::EditMap;
 /// Link two [`Event`][]s.
 ///
 /// Arbitrary (void) events can be linked together.
-/// This optimizes for the common case where the token at `index` is connected
-/// to the previous void token.
+/// This optimizes for the common case where the event at `index` is connected
+/// to the previous void event.
 pub fn link(events: &mut [Event], index: usize) {
     link_to(events, index - 2, index);
 }
 
 /// Link two arbitrary [`Event`][]s together.
-pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
-    debug_assert_eq!(events[pevious].kind, Kind::Enter);
-    debug_assert_eq!(events[pevious + 1].kind, Kind::Exit);
-    debug_assert_eq!(events[pevious + 1].name, events[pevious].name);
+pub fn link_to(events: &mut [Event], previous: usize, next: usize) {
+    debug_assert_eq!(events[previous].kind, Kind::Enter);
+    debug_assert!(
+        VOID_EVENTS.iter().any(|d| d == &events[previous].name),
+        "expected `{:?}` to be void",
+        events[previous].name
+    );
+    debug_assert_eq!(events[previous + 1].kind, Kind::Exit);
+    debug_assert_eq!(events[previous].name, events[previous + 1].name);
     debug_assert_eq!(events[next].kind, Kind::Enter);
+    debug_assert!(
+        VOID_EVENTS.iter().any(|d| d == &events[next].name),
+        "expected `{:?}` to be void",
+        events[next].name
+    );
     // Note: the exit of this event may not exist, so don’t check for that.
 
-    let link_previous = events[pevious]
+    let link_previous = events[previous]
         .link
         .as_mut()
         .expect("expected `link` on previous");
     link_previous.next = Some(next);
     let link_next = events[next].link.as_mut().expect("expected `link` on next");
-    link_next.previous = Some(pevious);
+    link_next.previous = Some(previous);
 
     debug_assert_eq!(
-        events[pevious].link.as_ref().unwrap().content,
-        events[next].link.as_ref().unwrap().content
+        events[previous].link.as_ref().unwrap().content,
+        events[next].link.as_ref().unwrap().content,
+        "expected `content` to match"
     );
 }
 
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d66e8f6..7eba194 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1,15 +1,12 @@
-//! The tokenizer glues states from the state machine together.
+//! A tokenizer glues states from the state machine together.
 //!
-//! It facilitates everything needed to turn codes into tokens and  with
-//! a state machine.
-//! It also enables logic needed for parsing markdown, such as an [`attempt`][]
-//! to parse something, which can succeed or, when unsuccessful, revert the
-//! attempt.
-//! Similarly, a [`check`][] exists, which does the same as an `attempt` but
-//! reverts even if successful.
+//! It facilitates everything needed to turn bytes into events with a state
+//! machine.
+//! It also enables the logic needed for parsing markdown, such as an
+//! [`attempt`][] to try and parse something, which can succeed or, when
+//! unsuccessful, revert the attempt.
 //!
 //! [`attempt`]: Tokenizer::attempt
-//! [`check`]: Tokenizer::check
 
 use crate::constant::TAB_SIZE;
 use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS};
@@ -18,17 +15,31 @@ use crate::resolve::{call as call_resolve, Name as ResolveName};
 use crate::state::{call, State};
 use crate::util::edit_map::EditMap;
 
-/// Info used to tokenize the current container.
+/// Containers.
 ///
-/// This info is shared between the initial construct and its continuation.
-/// It’s only used for list items.
+/// Containers are found when tokenizing
+/// [document content][crate::construct::document].
+/// They parse a portion at the start of one or more lines.
+/// The rest of those lines is a different content type (specifically, flow),
+/// which they “contain”.
+#[derive(Debug, Eq, PartialEq)]
+pub enum Container {
+    /// [Block quote][crate::construct::block_quote].
+    BlockQuote,
+    /// [List item][crate::construct::list_item].
+    ListItem,
+}
+
+/// Info used to tokenize a container.
+///
+/// Practically, these fields are only used for list items.
 #[derive(Debug)]
 pub struct ContainerState {
     /// Kind.
     pub kind: Container,
     /// Whether the first line was blank.
     pub blank_initial: bool,
-    /// The size of the initial construct.
+    /// Size.
     pub size: usize,
 }
 
@@ -39,26 +50,19 @@ enum ByteAction {
     ///
     /// Includes replaced bytes.
     Normal(u8),
-    /// This is a new byte.
-    Insert(u8),
     /// This byte must be ignored.
     Ignore,
+    /// This is a new byte.
+    Insert(u8),
 }
 
-/// Supported containers.
-#[derive(Debug, PartialEq)]
-pub enum Container {
-    BlockQuote,
-    ListItem,
-}
-
-/// Loose label starts we found.
+/// Label start, looking for an end.
 #[derive(Debug)]
 pub struct LabelStart {
     /// Indices of where the label starts and ends in `events`.
     pub start: (usize, usize),
-    /// A boolean used internally to figure out if a (link) label start link
-    /// can’t be used anymore (because it would contain another link).
+    /// A boolean used internally to figure out if a (link) label start can’t
+    /// be used anymore (because it would contain another link).
     /// That link start is still looking for a balanced closing bracket though,
     /// so we can’t remove it just yet.
     pub inactive: bool,
@@ -99,9 +103,10 @@ struct Attempt {
     progress: Option<Progress>,
 }
 
-/// The internal state of a tokenizer, not to be confused with states from the
-/// state machine, this instead is all the information about where we currently
-/// are and what’s going on.
+/// The internal state of a tokenizer.
+///
+/// Not to be confused with states from the state machine, this instead is all
+/// the information on where we currently are and what’s going on.
 #[derive(Clone, Debug)]
 struct Progress {
     /// Length of `events`.
@@ -168,7 +173,7 @@ pub struct TokenizeState<'a> {
     /// List of defined identifiers.
     pub definitions: Vec<String>,
 
-    /// Whether to connect tokens.
+    /// Whether to connect events.
     pub connect: bool,
     /// Marker.
     pub marker: u8,
@@ -188,15 +193,15 @@ pub struct TokenizeState<'a> {
     pub start: usize,
     /// Index.
     pub end: usize,
-    /// Slot for a token type.
+    /// Slot for an event name.
     pub token_1: Name,
-    /// Slot for a token type.
+    /// Slot for an event name.
     pub token_2: Name,
-    /// Slot for a token type.
+    /// Slot for an event name.
     pub token_3: Name,
-    /// Slot for a token type.
+    /// Slot for an event name.
     pub token_4: Name,
-    /// Slot for a token type.
+    /// Slot for an event name.
     pub token_5: Name,
 }
 
@@ -433,28 +438,25 @@ impl<'a> Tokenizer<'a> {
 
     /// Mark the end of a semantic label.
     pub fn exit(&mut self, name: Name) {
-        let current_token = self.stack.pop().expect("cannot close w/o open tokens");
+        let current = self.stack.pop().expect("cannot close w/o open tokens");
 
-        debug_assert_eq!(
-            current_token, name,
-            "expected exit token to match current token"
-        );
+        debug_assert_eq!(current, name, "expected exit event to match current event");
 
         let previous = self.events.last().expect("cannot close w/o open event");
         let mut point = self.point.clone();
 
         debug_assert!(
-            current_token != previous.name
+            current != previous.name
                 || previous.point.index != point.index
                 || previous.point.vs != point.vs,
-            "expected non-empty token"
+            "expected non-empty event"
         );
 
         if VOID_EVENTS.iter().any(|d| d == &name) {
             debug_assert!(
-                current_token == previous.name,
-                "expected token to be void (`{:?}`), instead of including `{:?}`",
-                current_token,
+                current == previous.name,
+                "expected event to be void (`{:?}`), instead of including `{:?}`",
+                current,
                 previous.name
             );
         }
diff --git a/src/unicode.rs b/src/unicode.rs
index 764d4c7..2b79a88 100644
--- a/src/unicode.rs
+++ b/src/unicode.rs
@@ -1,6 +1,6 @@
-//! Information on Unicode.
+//! Info on Unicode.
 
-/// List of characters that are considered punctuation according to Unicode.
+/// List of characters that are considered punctuation.
 ///
 /// > 👉 **Important**: this module is generated by `build.rs`.
 /// > It is generate from the latest Unicode data.
diff --git a/src/util/decode_character_reference.rs b/src/util/decode_character_reference.rs
index f8fd18f..8ed32f4 100644
--- a/src/util/decode_character_reference.rs
+++ b/src/util/decode_character_reference.rs
@@ -1,4 +1,4 @@
-//! Utilities to decode character references.
+//! Decode character references.
 
 use crate::constant::CHARACTER_REFERENCES;
 
@@ -43,11 +43,11 @@ pub fn decode_named(value: &str) -> String {
 /// Decode numeric character references.
 ///
 /// Turn the number (in string form as either hexadecimal or decimal) coming
-/// from a numeric character reference into a character.
-/// Whether the base of the string form is `10` (decimal) or `16` (hexadecimal)
-/// must be passed as the `radix` parameter.
+/// from a numeric character reference into a string.
+/// The base of the string form must be passed as the `radix` parameter, as
+/// `10` (decimal) or `16` (hexadecimal).
 ///
-/// This returns the `char` associated with that number or a replacement
+/// This returns a `String` form of the associated character or a replacement
 /// character for C0 control characters (except for ASCII whitespace), C1
 /// control characters, lone surrogates, noncharacters, and out of range
 /// characters.
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 11ac486..33c5706 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -1,6 +1,6 @@
-//! Helpers to deal with several changes in events, batching them together.
+//! Deal with several changes in events, batching them together.
 //!
-//! Preferably, changes should be kept to a minumum.
+//! Preferably, changes should be kept to a minimum.
 //! Sometimes, it’s needed to change the list of events, because parsing can be
 //! messy, and it helps to expose a cleaner interface of events to the compiler
 //! and other users.
diff --git a/src/util/encode.rs b/src/util/encode.rs
index d37a2de..6530011 100644
--- a/src/util/encode.rs
+++ b/src/util/encode.rs
@@ -1,10 +1,11 @@
-//! Utilities to encode HTML.
+//! Encode HTML.
 
 /// Encode dangerous html characters.
 ///
 /// This ensures that certain characters which have special meaning in HTML are
 /// dealt with.
-/// Technically, we can skip `>` and `"` in many cases, but CM includes them.
+/// Technically, we can skip `>` and `"` in many cases, but `CommonMark`
+/// includes them.
 ///
 /// This behavior is not explained in prose in `CommonMark` but can be inferred
 /// from the input/output test cases.
diff --git a/src/util/mod.rs b/src/util/mod.rs
index a01f31e..f51845c 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,4 +1,4 @@
-//! Utilities used when compiling markdown.
+//! Utilities used when processing markdown.
 
 pub mod decode_character_reference;
 pub mod edit_map;
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index f5b12d0..ddc51f8 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -1,17 +1,25 @@
-//! Utility to normalize identifiers.
+//! Normalize identifiers.
 
 /// Normalize an identifier, as found in [references][label_end] and
 /// [definitions][definition], so it can be compared when matching.
 ///
 /// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `)
-/// into one space, trims it (as in, dropping the first and last space),
-/// and then performs unicode case folding twice: first by uppercasing
-/// lowercase characters, and then lowercasing uppercase characters.
+/// into one space, trims it (as in, dropping the first and last space), and
+/// then performs unicode case folding twice: first by lowercasing uppercase
+/// characters, and then uppercasing lowercase characters.
 ///
 /// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if
 /// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a
 /// different uppercase character (U+0398 (`Θ`)).
-/// Hence, to get that form, we perform both upper- and lowercase.
+/// Hence, to get that form, we perform both lower- and uppercase.
+///
+/// Performing these steps in that order works, but the inverse does not work.
+/// To illustrate, say the source markdown containes two identifiers
+/// `SS` (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to
+/// `ss` (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both
+/// uppercase to `SS` (U+0053 U+0053).
+/// If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
+/// change, and then lowercase to `ß`, which would not match `ss`.
 ///
 /// ## Examples
 ///
@@ -64,17 +72,5 @@ pub fn normalize_identifier(value: &str) -> String {
         result.push_str(&value[start..]);
     }
 
-    // Some characters are considered “uppercase”, but if their lowercase
-    // counterpart is uppercased will result in a different uppercase
-    // character.
-    // Hence, to get that form, we perform both lower- and uppercase.
-    // Performing these steps in that order works, but the inverse does not
-    // work.
-    // To illustrate, say the source markdown containes two identifiers `SS`
-    // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss`
-    // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase
-    // to `SS` (U+0053 U+0053).
-    // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
-    // change, and then lowercase to `ß`, which would not match `ss`.
     result.to_lowercase().to_uppercase()
 }
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 051e1e1..593a70e 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -1,4 +1,4 @@
-//! Utilities to make urls safe.
+//! Make urls safe.
 
 use crate::util::encode::encode;
 
@@ -60,9 +60,10 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
     value
 }
 
-/// Normalize a URL (such as used in definitions).
+/// Normalize a URL (such as used in [definitions][definition],
+/// [references][label_end]).
 ///
-/// Encode unsafe characters with percent-encoding, skipping already encoded
+/// It encodes unsafe characters with percent-encoding, skipping already encoded
 /// sequences.
 ///
 /// ## Examples
@@ -77,6 +78,9 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
 /// ## References
 ///
 /// *   [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
+///
+/// [definition]: crate::construct::definition
+/// [label_end]: crate::construct::label_end
 fn normalize_uri(value: &str) -> String {
     let chars = value.chars().collect::<Vec<_>>();
     // Note: it’ll grow bigger for each non-ascii or non-safe character.
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 46cbb4a..a7de408 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -1,4 +1,4 @@
-//! Utilities to deal with lists of events.
+//! Move across lists of events.
 
 use crate::event::{Event, Kind, Name};
 
diff --git a/src/util/slice.rs b/src/util/slice.rs
index e70078a..be2a381 100644
--- a/src/util/slice.rs
+++ b/src/util/slice.rs
@@ -1,4 +1,4 @@
-//! Utilities to deal with characters.
+//! Deal with bytes.
 
 use crate::constant::TAB_SIZE;
 use crate::event::{Event, Kind, Point};
@@ -7,7 +7,9 @@ use std::str;
 /// A range between two points.
 #[derive(Debug)]
 pub struct Position<'a> {
+    /// Start point.
     pub start: &'a Point,
+    /// End point.
     pub end: &'a Point,
 }
 
@@ -55,11 +57,14 @@ impl<'a> Position<'a> {
 
 /// Bytes belonging to a range.
 ///
-/// Includes information on virtual spaces before and after the bytes.
+/// Includes info on virtual spaces before and after the bytes.
 #[derive(Debug)]
 pub struct Slice<'a> {
+    /// Bytes.
     pub bytes: &'a [u8],
+    /// Number of virtual spaces before the bytes.
     pub before: usize,
+    /// Number of virtual spaces after the bytes.
     pub after: usize,
 }
 
-- 
cgit