aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--readme.md2
-rw-r--r--src/construct/definition.rs2
-rw-r--r--src/construct/mod.rs2
-rw-r--r--src/construct/partial_destination.rs75
-rw-r--r--src/construct/partial_label.rs81
-rw-r--r--src/construct/partial_title.rs127
6 files changed, 246 insertions, 43 deletions
diff --git a/readme.md b/readme.md
index b1ad782..a00a658 100644
--- a/readme.md
+++ b/readme.md
@@ -66,7 +66,6 @@ cargo doc --document-private-items
### Small things
-- [ ] (1) Add docs to partials
- [ ] (1) Remove all `pub fn`s from constructs, except for start
- [ ] (1) Remove `content` content type, as it is no longer needed
- [ ] (1) Connect `ChunkString` in label, destination, title
@@ -174,6 +173,7 @@ cargo doc --document-private-items
- [x] (1) text in heading
- [x] (1) Setext headings: can they be solved in content, or do they have to be
solved in flow somehow
+- [x] (1) Add docs to partials
### Extensions
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index e540b44..3035a20 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -14,7 +14,7 @@
//!
//! destination ::= destination_enclosed | destination_raw
//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>'
-//! destination_enclosed_text ::= code - '<' - '\\' - eol
+//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol
//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ]
//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape )
//! ; Restriction: unbalanced `)` characters are not allowed.
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index fb79f68..a5e95bc 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -15,7 +15,7 @@
//!
//! The following constructs are found in markdown:
//!
-//! * attention (strong, emphasis) (text)
+//! * attention (strong, emphasis)
//! * [autolink][]
//! * [blank line][blank_line]
//! * block quote
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 8cf5b77..a2f638b 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -1,7 +1,52 @@
+//! Destination occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! destination ::= destination_enclosed | destination_raw
+//!
+//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>'
+//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol
+//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ]
+//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape )
+//! ; Restriction: unbalanced `)` characters are not allowed.
+//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol
+//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ]
+//! ```
+//!
+//! Balanced parens allowed in raw destinations.
+//! They are counted with a counter that starts at `0`, and is incremented
+//! every time `(` occurs and decremented every time `)` occurs.
+//! If `)` is found when the counter is `0`, the destination closes immediately
+//! after it.
+//! Escaped parens do not count.
+//!
+//! It is recommended to use the enclosed variant of destinations, as it allows
+//! arbitrary parens, and also allows for whitespace and other characters in
+//! URLs.
+//!
+//! The destination is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! ## References
+//!
+//! * [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//!
+//! <!-- To do: link label end. -->
+
// To do: pass token types in.
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+/// Before a destination.
+///
+/// ```markdown
+/// |<ab>
+/// |ab
+/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('<') => {
@@ -27,7 +72,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `<`, before an enclosed destination.
+///
+/// ```markdown
+/// <|ab>
+/// ```
fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
if let Code::Char('>') = code {
tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker);
@@ -44,7 +93,11 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In an enclosed destination.
+///
+/// ```markdown
+/// <u|rl>
+/// ```
fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('>') => {
@@ -66,7 +119,11 @@ fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `\`, in an enclosed destination.
+///
+/// ```markdown
+/// <a\|>b>
+/// ```
fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('<' | '>' | '\\') => {
@@ -77,7 +134,11 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In a raw destination.
+///
+/// ```markdown
+/// a|b
+/// ```
// To do: these arms can be improved?
fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
// To do: configurable.
@@ -139,7 +200,11 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
}
}
-/// To do.
+/// After `\`, in a raw destination.
+///
+/// ```markdown
+/// a\|)b
+/// ```
fn raw_escape(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult {
match code {
Code::Char('(' | ')' | '\\') => {
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index c772c56..f7ce8d7 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -1,9 +1,65 @@
+//! Label occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: maximum `999` codes allowed between brackets.
+//! ; Restriction: no blank lines.
+//! ; Restriction: at least 1 non-space and non-eol code must exist.
+//! label ::= '[' *( label_text | label_escape ) ']'
+//! label_text ::= code - '[' - '\\' - ']'
+//! label_escape ::= '\\' [ '[' | '\\' | ']' ]
+//! ```
+//!
+//! The maximum allowed size of the label, without the brackets, is `999`
+//! (inclusive), which is defined in
+//! [`LINK_REFERENCE_SIZE_MAX`][link_reference_size_max].
+//!
+//! Labels can contain line endings and whitespace, but they are not allowed to
+//! contain blank lines, and they must not be blank themselves.
+//!
+//! The label is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! > 👉 **Note**: this label relates to, but is not, the initial “label” of
+//! > what is know as a reference in markdown:
+//! >
+//! > | Kind | Link | Image |
+//! > | --------- | -------- | --------- |
+//! > | Shortcut | `[x]` | `![x]` |
+//! > | Collapsed | `[x][]` | `![x][]` |
+//! > | Full | `[x][y]` | `![x][y]` |
+//! >
+//! > The 6 above things are references, in the three kinds they come in, as
+//! > links and images.
+//! > The label that this module focusses on is only the thing that contains
+//! > `y`.
+//! >
+//! > The thing that contains `x` is not a single thing when parsing markdown,
+//! > but instead constists of an opening (label start (image) or label start
+//! > (link)) and a closing (label end), so as to allow further phrasing such
+//! > as code (text) or attention.
+//!
+//! ## References
+//!
+//! * [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX
+//!
+//! <!-- To do: link label end, label starts. -->
+
// To do: pass token types in.
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-/// To do.
+/// Before a label.
+///
+/// ```markdown
+/// |[a]
+/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('[') => {
@@ -14,12 +70,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::DefinitionLabelData);
(State::Fn(Box::new(|t, c| at_break(t, c, false, 0))), None)
}
- // To do: allow?
- _ => unreachable!("expected `[` at start of label"),
+ _ => (State::Nok, None),
}
}
-/// To do.
+/// In a label, at something.
+///
+/// ```markdown
+/// [|a]
+/// [a|]
+/// ```
fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
match code {
Code::None | Code::Char('[') => (State::Nok, None),
@@ -37,6 +97,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
tokenizer.enter(TokenType::LineEnding);
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
+ // To do: limit blank lines.
(
State::Fn(Box::new(move |t, c| at_break(t, c, data, size))),
None,
@@ -50,7 +111,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
}
}
-/// To do.
+/// In a label, in text.
+///
+/// ```markdown
+/// [a|b]
+/// ```
fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => {
@@ -85,7 +150,11 @@ fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> Stat
}
}
-/// To do.
+/// After `\` in a label.
+///
+/// ```markdown
+/// [a\|[b]
+/// ```
fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
match code {
Code::Char('[' | '\\' | ']') => {
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 4c7b527..a626c50 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -1,32 +1,70 @@
+//! Title occurs in [definition][] and label end.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: no blank lines.
+//! ; Restriction: markers must match (in case of `(` with `)`).
+//! title ::= marker [ *( code - '\\' | '\\' [ marker ] ) ] marker
+//! marker ::= '"' | '\'' | '('
+//! ```
+//!
+//! Titles can be double quoted (`"a"`), single quoted (`'a'`), or
+//! parenthesized (`(a)`).
+//!
+//! Titles can contain line endings and whitespace, but they are not allowed to
+//! contain blank lines.
+//! They are allowed to be blank themselves.
+//!
+//! The title is interpreted as the [string][] content type.
+//! That means that character escapes and character reference are allowed.
+//!
+//! ## References
+//!
+//! * [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
+//!
+//! [definition]: crate::construct::definition
+//! [string]: crate::content::string
+//!
+//! <!-- To do: link label end. -->
+
// To do: pass token types in.
use crate::construct::partial_whitespace::start as whitespace;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-/// Type of quote, if we’re in an attribure, in complete (condition 7).
+/// Type of title.
#[derive(Debug, Clone, PartialEq)]
-enum TitleKind {
- /// In a parenthesised (`(` and `)`) title.
+enum Kind {
+ /// In a parenthesized (`(` and `)`) title.
Paren,
/// In a double quoted (`"`) title.
Double,
- /// In a single quoted (`"`) title.
+ /// In a single quoted (`'`) title.
Single,
}
-fn kind_to_marker(kind: &TitleKind) -> char {
+/// Display a marker.
+fn kind_to_marker(kind: &Kind) -> char {
match kind {
- TitleKind::Double => '"',
- TitleKind::Single => '\'',
- TitleKind::Paren => ')',
+ Kind::Double => '"',
+ Kind::Single => '\'',
+ Kind::Paren => ')',
}
}
+/// Before a title.
+///
+/// ```markdown
+/// |"a"
+/// |'a'
+/// |(a)
+/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let kind = match code {
- Code::Char('"') => Some(TitleKind::Double),
- Code::Char('\'') => Some(TitleKind::Single),
- Code::Char('(') => Some(TitleKind::Paren),
+ Code::Char('"') => Some(Kind::Double),
+ Code::Char('\'') => Some(Kind::Single),
+ Code::Char('(') => Some(Kind::Paren),
_ => None,
};
@@ -35,14 +73,22 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::DefinitionTitleMarker);
tokenizer.consume(code);
tokenizer.exit(TokenType::DefinitionTitleMarker);
- (State::Fn(Box::new(|t, c| at_first_break(t, c, kind))), None)
+ (State::Fn(Box::new(|t, c| begin(t, c, kind))), None)
} else {
(State::Nok, None)
}
}
-/// To do.
-fn at_first_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After the opening marker.
+///
+/// This is also used when at the closing marker.
+///
+/// ```markdown
+/// "|a"
+/// '|a'
+/// (|a)
+/// ```
+fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.enter(TokenType::DefinitionTitleMarker);
@@ -58,12 +104,19 @@ fn at_first_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> Sta
}
}
-/// To do.
-fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// At something, before something else.
+///
+/// ```markdown
+/// "|a"
+/// 'a|'
+/// (a|
+/// b)
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.exit(TokenType::DefinitionTitleString);
- at_first_break(tokenizer, code, kind)
+ begin(tokenizer, code, kind)
}
Code::None => (State::Nok, None),
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -71,7 +124,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnRe
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
(
- State::Fn(Box::new(|t, c| at_break_line_start(t, c, kind))),
+ State::Fn(Box::new(|t, c| line_start(t, c, kind))),
None,
)
}
@@ -83,14 +136,26 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnRe
}
}
-fn at_break_line_start(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After a line ending.
+///
+/// ```markdown
+/// "a
+/// |b"
+/// ```
+fn line_start(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
tokenizer.attempt(
|t, c| whitespace(t, c, TokenType::Whitespace),
- |_ok| Box::new(|t, c| at_break_line_begin(t, c, kind)),
+ |_ok| Box::new(|t, c| line_begin(t, c, kind)),
)(tokenizer, code)
}
-fn at_break_line_begin(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After a line ending, after optional whitespace.
+///
+/// ```markdown
+/// "a
+/// |b"
+/// ```
+fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
// Blank line not allowed.
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
@@ -98,8 +163,12 @@ fn at_break_line_begin(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -
}
}
-/// To do.
-fn title(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// In title text.
+///
+/// ```markdown
+/// "a|b"
+/// ```
+fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.exit(TokenType::ChunkString);
@@ -120,17 +189,17 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResul
}
}
-/// To do.
-fn escape(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult {
+/// After `\`, in title text.
+///
+/// ```markdown
+/// "a\|"b"
+/// ```
+fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.consume(code);
(State::Fn(Box::new(move |t, c| title(t, c, kind))), None)
}
- Code::Char('\\') => {
- tokenizer.consume(code);
- (State::Fn(Box::new(move |t, c| title(t, c, kind))), None)
- }
_ => title(tokenizer, code, kind),
}
}