5 files changed, 245 insertions, 42 deletions
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index e540b44..3035a20 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -14,7 +14,7 @@
 //!
 //! destination ::= destination_enclosed | destination_raw
 //! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>'
-//! destination_enclosed_text ::= code - '<' - '\\' - eol
+//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol
 //! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ]
 //! destination_raw ::= 1*( destination_raw_text | destination_raw_escape )
 //! ; Restriction: unbalanced `)` characters are not allowed.
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index fb79f68..a5e95bc 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -15,7 +15,7 @@
 //!
 //! The following constructs are found in markdown:
 //!
-//! *   attention (strong, emphasis) (text)
+//! *   attention (strong, emphasis)
 //! *   [autolink][]
 //! *   [blank line][blank_line]
 //! *   block quote
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 8cf5b77..a2f638b 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -1,7 +1,52 @@
+//! Destination occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! destination ::= destination_enclosed | destination_raw
+//!
+//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>'
+//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol
+//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ]
+//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape )
+//! ; Restriction: unbalanced `)` characters are not allowed.
+//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol
+//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ]
+//! ```
+//!
+//! Balanced parens allowed in raw destinations.
+//! They are counted with a counter that starts at `0`, and is incremented
+//! every time `(` occurs and decremented every time `)` occurs.
+//! If `)` is found when the counter is `0`, the destination closes immediately
+//! after it.
+//! Escaped parens do not count.
+//!
+//! It is recommended to use the enclosed variant of destinations, as it allows
+//! arbitrary parens, and also allows for whitespace and other characters in
+//! URLs.
+//!
+//! The destination is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//!
+//! <!-- To do: link label end. -->
+
 // To do: pass token types in.
 
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
+/// Before a destination.
+///
+/// ```markdown
+/// |<ab>
+/// |ab
+/// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('<') => {
@@ -27,7 +72,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `<`, before an enclosed destination.
+///
+/// ```markdown
+/// <|ab>
+/// ```
 fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     if let Code::Char('>') = code {
         tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker);
@@ -44,7 +93,11 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In an enclosed destination.
+///
+/// ```markdown
+/// <u|rl>
+/// ```
 fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('>') => {
@@ -66,7 +119,11 @@ fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `\`, in an enclosed destination.
+///
+/// ```markdown
+/// <a\|>b>
+/// ```
 fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('<' | '>' | '\\') => {
@@ -77,7 +134,11 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In a raw destination.
+///
+/// ```markdown
+/// a|b
+/// ```
 // To do: these arms can be improved?
 fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
     // To do: configurable.
@@ -139,7 +200,11 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `\`, in a raw destination.
+///
+/// ```markdown
+/// a\|)b
+/// ```
 fn raw_escape(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
     match code {
         Code::Char('(' | ')' | '\\') => {
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index c772c56..f7ce8d7 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -1,9 +1,65 @@
+//! Label occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: maximum `999` codes allowed between brackets.
+//! ; Restriction: no blank lines.
+//! ; Restriction: at least 1 non-space and non-eol code must exist.
+//! label ::= '[' *( label_text | label_escape ) ']'
+//! label_text ::= code - '[' - '\\' - ']'
+//! label_escape ::= '\\' [ '[' | '\\' | ']' ]
+//! ```
+//!
+//! The maximum allowed size of the label, without the brackets, is `999`
+//! (inclusive), which is defined in
+//! [`LINK_REFERENCE_SIZE_MAX`][link_reference_size_max].
+//!
+//! Labels can contain line endings and whitespace, but they are not allowed to
+//! contain blank lines, and they must not be blank themselves.
+//!
+//! The label is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! > 👉 **Note**: this label relates to, but is not, the initial “label” of
+//! > what is know as a reference in markdown:
+//! >
+//! > | Kind      | Link     | Image     |
+//! > | --------- | -------- | --------- |
+//! > | Shortcut  | `[x]`    | `![x]`    |
+//! > | Collapsed | `[x][]`  | `![x][]`  |
+//! > | Full      | `[x][y]` | `![x][y]` |
+//! >
+//! > The 6 above things are references, in the three kinds they come in, as
+//! > links and images.
+//! > The label that this module focusses on is only the thing that contains
+//! > `y`.
+//! >
+//! > The thing that contains `x` is not a single thing when parsing markdown,
+//! > but instead constists of an opening (label start (image) or label start
+//! > (link)) and a closing (label end), so as to allow further phrasing such
+//! > as code (text) or attention.
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX
+//!
+//! <!-- To do: link label end, label starts. -->
+
 // To do: pass token types in.
 
 use crate::constant::LINK_REFERENCE_SIZE_MAX;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
-/// To do.
+/// Before a label.
+///
+/// ```markdown
+/// |[a]
+/// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('[') => {
@@ -14,12 +70,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.enter(TokenType::DefinitionLabelData);
             (State::Fn(Box::new(|t, c| at_break(t, c, false, 0))), None)
         }
-        // To do: allow?
-        _ => unreachable!("expected `[` at start of label"),
+        _ => (State::Nok, None),
     }
 }
 
-/// To do.
+/// In a label, at something.
+///
+/// ```markdown
+/// [|a]
+/// [a|]
+/// ```
 fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
     match code {
         Code::None | Code::Char('[') => (State::Nok, None),
@@ -37,6 +97,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
             tokenizer.enter(TokenType::LineEnding);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
+            // To do: limit blank lines.
             (
                 State::Fn(Box::new(move |t, c| at_break(t, c, data, size))),
                 None,
@@ -50,7 +111,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
     }
 }
 
-/// To do.
+/// In a label, in text.
+///
+/// ```markdown
+/// [a|b]
+/// ```
 fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => {
@@ -85,7 +150,11 @@ fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> Stat
     }
 }
 
-/// To do.
+/// After `\` in a label.
+///
+/// ```markdown
+/// [a\|[b]
+/// ```
 fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
     match code {
         Code::Char('[' | '\\' | ']') => {
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 4c7b527..a626c50 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -1,32 +1,70 @@
+//! Title occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: no blank lines.
+//! ; Restriction: markers must match (in case of `(` with `)`).
+//! title ::= marker [  *( code - '\\' | '\\' [ marker ] ) ] marker
+//! marker ::= '"' | '\'' | '('
+//! ```
+//!
+//! Titles can be double quoted (`"a"`), single quoted (`'a'`), or
+//! parenthesized (`(a)`).
+//!
+//! Titles can contain line endings and whitespace, but they are not allowed to
+//! contain blank lines.
+//! They are allowed to be blank themselves.
+//!
+//! The title is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//!
+//! <!-- To do: link label end. -->
+
 // To do: pass token types in.
 
 use crate::construct::partial_whitespace::start as whitespace;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
-/// Type of quote, if we’re in an attribure, in complete (condition 7).
+/// Type of title.
 #[derive(Debug, Clone, PartialEq)]
-enum TitleKind {
-    /// In a parenthesised (`(` and `)`) title.
+enum Kind {
+    /// In a parenthesized (`(` and `)`) title.
     Paren,
     /// In a double quoted (`"`) title.
     Double,
-    /// In a single quoted (`"`) title.
+    /// In a single quoted (`'`) title.
     Single,
 }
 
-fn kind_to_marker(kind: &TitleKind) -> char {
+/// Display a marker.
+fn kind_to_marker(kind: &Kind) -> char {
     match kind {
-        TitleKind::Double => '"',
-        TitleKind::Single => '\'',
-        TitleKind::Paren => ')',
+        Kind::Double => '"',
+        Kind::Single => '\'',
+        Kind::Paren => ')',
     }
 }
 
+/// Before a title.
+///
+/// ```markdown
+/// |"a"
+/// |'a'
+/// |(a)
+/// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     let kind = match code {
-        Code::Char('"') => Some(TitleKind::Double),
-        Code::Char('\'') => Some(TitleKind::Single),
-        Code::Char('(') => Some(TitleKind::Paren),
+        Code::Char('"') => Some(Kind::Double),
+        Code::Char('\'') => Some(Kind::Single),
+        Code::Char('(') => Some(Kind::Paren),
         _ => None,
     };
 
@@ -35,14 +73,22 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         tokenizer.enter(TokenType::DefinitionTitleMarker);
         tokenizer.consume(code);
         tokenizer.exit(TokenType::DefinitionTitleMarker);
-        (State::Fn(Box::new(|t, c| at_first_break(t, c, kind))), None)
+        (State::Fn(Box::new(|t, c| begin(t, c, kind))), None)
     } else {
         (State::Nok, None)
     }
 }
 
-/// To do.
-fn at_first_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After the opening marker.
+///
+/// This is also used when at the closing marker.
+///
+/// ```markdown
+/// "|a"
+/// '|a'
+/// (|a)
+/// ```
+fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     match code {
         Code::Char(char) if char == kind_to_marker(&kind) => {
             tokenizer.enter(TokenType::DefinitionTitleMarker);
@@ -58,12 +104,19 @@ fn at_first_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> Sta
     }
 }
 
-/// To do.
-fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// At something, before something else.
+///
+/// ```markdown
+/// "|a"
+/// 'a|'
+/// (a|
+/// b)
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     match code {
         Code::Char(char) if char == kind_to_marker(&kind) => {
             tokenizer.exit(TokenType::DefinitionTitleString);
-            at_first_break(tokenizer, code, kind)
+            begin(tokenizer, code, kind)
         }
         Code::None => (State::Nok, None),
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -71,7 +124,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnRe
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
             (
-                State::Fn(Box::new(|t, c| at_break_line_start(t, c, kind))),
+                State::Fn(Box::new(|t, c| line_start(t, c, kind))),
                 None,
             )
         }
@@ -83,14 +136,26 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnRe
     }
 }
 
-fn at_break_line_start(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After a line ending.
+///
+/// ```markdown
+/// "a
+/// |b"
+/// ```
+fn line_start(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     tokenizer.attempt(
         |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(|t, c| at_break_line_begin(t, c, kind)),
+        |_ok| Box::new(|t, c| line_begin(t, c, kind)),
     )(tokenizer, code)
 }
 
-fn at_break_line_begin(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After a line ending, after optional whitespace.
+///
+/// ```markdown
+/// "a
+/// |b"
+/// ```
+fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     match code {
         // Blank line not allowed.
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
@@ -98,8 +163,12 @@ fn at_break_line_begin(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -
     }
 }
 
-/// To do.
-fn title(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// In title text.
+///
+/// ```markdown
+/// "a|b"
+/// ```
+fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     match code {
         Code::Char(char) if char == kind_to_marker(&kind) => {
             tokenizer.exit(TokenType::ChunkString);
@@ -120,17 +189,17 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResul
     }
 }
 
-/// To do.
-fn escape(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After `\`, in title text.
+///
+/// ```markdown
+/// "a\|"b"
+/// ```
+fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
     match code {
         Code::Char(char) if char == kind_to_marker(&kind) => {
             tokenizer.consume(code);
             (State::Fn(Box::new(move |t, c| title(t, c, kind))), None)
         }
-        Code::Char('\\') => {
-            tokenizer.consume(code);
-            (State::Fn(Box::new(move |t, c| title(t, c, kind))), None)
-        }
         _ => title(tokenizer, code, kind),
     }
 }