diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-16 13:04:38 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-16 13:04:38 +0200 |
commit | 93d0b7c6465f4ffe220b3ddada729746b11eb6ce (patch) | |
tree | e7907fb89a6660c5546d2ca128b2d48ec87539ae /src | |
parent | 2379c2749916483be68dbf816a4c56cd59ced958 (diff) | |
download | markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.tar.gz markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.tar.bz2 markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.zip |
Update some last docs, bnf grammars
Diffstat (limited to '')
-rw-r--r-- | src/construct/autolink.rs | 4 | ||||
-rw-r--r-- | src/construct/label_end.rs | 46 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 14 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 16 | ||||
-rw-r--r-- | src/construct/list_item.rs | 50 | ||||
-rw-r--r-- | src/construct/mod.rs | 21 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 18 | ||||
-rw-r--r-- | src/construct/partial_bom.rs | 13 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_destination.rs | 40 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 15 | ||||
-rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab.rs | 17 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab_eol.rs | 26 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 9 | ||||
-rw-r--r-- | src/construct/partial_whitespace.rs | 28 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 16 |
17 files changed, 226 insertions, 111 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 9890aaf..b25dc32 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -1,8 +1,8 @@ -//! Autolinks occur in the [text][] content type. +//! Autolink occurs in the [text][] content type. //! //! ## Grammar //! -//! Autolinks form with the following BNF +//! Autolink forms with the following BNF //! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 4752639..5e31444 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -1,11 +1,14 @@ -//! Label end is a construct that occurs in the [text][] content type. +//! Label end occurs in the [text][] content type. //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label end forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf -//! label_end ::= ']' [ resource | reference_full | reference_collapsed ] +//! label_end ::= ']' [resource | reference_full | reference_collapsed] //! -//! resource ::= '(' [ whitespace ] destination [ whitespace title ] [ whitespace ] ')' +//! resource ::= '(' [space_or_tab_eol] destination [space_or_tab_eol title] [space_or_tab_eol] ')' //! reference_full ::= '[' label ']' //! reference_collapsed ::= '[' ']' //! @@ -14,15 +17,7 @@ //! ``` //! //! See [`destination`][destination], [`label`][label], and [`title`][title] -//! for grammar, notes, and recommendations. -//! -//! Label end does not, on its own, relate to anything in HTML. -//! When matched with a [label start (link)][label_start_link], they together -//! relate to the `<a>` element in HTML. -//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! It can also match with [label start (image)][label_start_image], in which -//! case they form an `<img>` element. -//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! for grammar, notes, and recommendations on each part. //! //! In the case of a resource, the destination and title are given directly //! with the label end. @@ -49,7 +44,7 @@ //! //! When the resource or reference matches, the destination forms the `href` //! attribute in case of a [label start (link)][label_start_link], and an -//! `src` attribute otherwise. +//! `src` attribute in case of a [label start (image)][label_start_image]. //! The title is formed, optionally, on either `<a>` or `<img>`. //! //! For info on how to encode characters in URLs, see @@ -94,12 +89,29 @@ //! <p>a [b <a href="#">c</a> d](#) e</p> //! ``` //! -//! This limiation is imposed because links in links is invalid according to +//! This limitation is imposed because links in links is invalid according to //! HTML. //! Technically though, in markdown it is still possible to construct them by //! using an [autolink][] in a link. //! You definitely should not do that. //! +//! ## HTML +//! +//! Label end does not, on its own, relate to anything in HTML. +//! When matched with a [label start (link)][label_start_link], they together +//! relate to the `<a>` element in HTML. +//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. +//! It can also match with [label start (image)][label_start_image], in which +//! case they form an `<img>` element. +//! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! It is recommended to use labels instead of [autolinks][autolink]. +//! Labels allow more characters in URLs, and allow relative URLs and `www.` +//! URLs. +//! They also allow for descriptive text to explain the URL in prose. +//! //! ## Tokens //! //! * [`Data`][Name::Data] @@ -143,8 +155,8 @@ //! [autolink]: crate::construct::autolink //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier::normalize_identifier -//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element -//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index ffc1aee..8d35df2 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -1,16 +1,20 @@ -//! Label start (image) is a construct that occurs in the [text][] content -//! type. +//! Label start (image) occurs in the [text][] content type. //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label start (image) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! label_start_image ::= '!' '[' //! ``` //! +//! ## HTML +//! //! Label start (image) does not, on its own, relate to anything in HTML. //! When matched with a [label end][label_end], they together relate to the //! `<img>` element in HTML. -//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info. //! Without an end, the characters (`![`) are output. //! //! ## Tokens @@ -26,7 +30,7 @@ //! //! [text]: crate::construct::text //! [label_end]: crate::construct::label_end -//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element +//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::event::Name; use crate::resolve::Name as ResolveName; diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index dad6884..e079b2d 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -1,17 +1,21 @@ -//! Label start (link) is a construct that occurs in the [text][] content -//! type. +//! Label start (link) occurs in the [text][] content type. //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label start (link) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! label_start_link ::= '[' //! ``` //! +//! ## HTML +//! //! Label start (link) does not, on its own, relate to anything in HTML. //! When matched with a [label end][label_end], they together relate to the //! `<a>` element in HTML. -//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! Without an end, the characters (`[`) are output. +//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. +//! Without an end, the character (`[`) is output. //! //! ## Tokens //! @@ -25,7 +29,7 @@ //! //! [text]: crate::construct::text //! [label_end]: crate::construct::label_end -//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element use crate::event::Name; use crate::resolve::Name as ResolveName; diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 09678dd..3e632fb 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -1,27 +1,41 @@ -//! List item is a construct that occurs in the [document][] content type. +//! List item occurs in the [document][] content type. //! -//! It forms with, roughly, the following BNF: +//! ## Grammar +//! +//! List item forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf -//! ; Restriction: there must be `eol | space_or_tab` after the start. +//! ; Restriction: if there is no space after the marker, the start must be followed by an `eol`. //! ; Restriction: if the first line after the marker is not blank and starts with `5( space_or_tab )`, //! ; only the first `space_or_tab` is part of the start. -//! list_item_start ::= '*' | '+' | '-' | 1*9( ascii_decimal ) ( '.' | ')' ) [ 1*4 space_or_tab ] +//! list_item_start ::= '*' | '+' | '-' | 1*9(ascii_decimal) ('.' | ')') [1*4 space_or_tab] +//! //! ; Restriction: blank line allowed, except when this is the first continuation after a blank start. //! ; Restriction: if not blank, the line must be indented, exactly `n` times. -//! list_item_cont ::= [ n( space_or_tab ) ] +//! list_item_cont ::= [n(space_or_tab)] //! ``` //! -//! Further lines that are not prefixed with `list_item_cont` cause the item -//! to be exited, except when those lines are lazy continuation. -//! Like so many things in markdown, list (items) too, are very complex. -//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing -//! details. +//! Further lines that are not prefixed with `list_item_cont` cause the list +//! item to be exited, except when those lines are lazy continuation. +//! Like so many things in markdown, list items too, are complex. +//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for +//! more on parsing details. +//! +//! As list item is a container, it takes several bytes from the start of the +//! line, while the rest of the line includes more containers or flow. +//! +//! ## HTML +//! +//! List item relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML. +//! See [*§ 4.4.8 The `li` element*][html_li], +//! [*§ 4.4.5 The `ol` element*][html_ol], and +//! [*§ 4.4.7 The `ul` element*][html_ul] in the HTML spec for more info. +//! +//! ## Recommendation //! -//! Lists relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML. -//! See [*§ 4.4.8 The `li` element*][html-li], -//! [*§ 4.4.5 The `ol` element*][html-ol], and -//! [*§ 4.4.7 The `ul` element*][html-ul] in the HTML spec for more info. +//! Use a single space after a marker. +//! Never use lazy continuation. //! //! ## Tokens //! @@ -39,10 +53,10 @@ //! * [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists) //! //! [document]: crate::construct::document -//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element -//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element -//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element -//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure +//! [html_li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element +//! [html_ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element +//! [html_ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element +//! [commonmark_block]: https://spec.commonmark.org/0.30/#phase-1-block-structure use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; use crate::construct::partial_space_or_tab::space_or_tab_min_max; diff --git a/src/construct/mod.rs b/src/construct/mod.rs index da2f5e8..5630143 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -75,12 +75,15 @@ //! That is to say, they form illustrative, imperfect, but useful, examples. //! The code, in Rust, is considered to be normative. //! -//! For example, the docs for [character escape][character_escape] contain: +//! The actual syntax of markdown can be described in Backus–Naur form (BNF) as: //! //! ```bnf -//! character_escape ::= '\\' ascii_punctuation +//! markdown = .* //! ``` //! +//! No, that’s [not a typo][bnf]: markdown has no syntax errors; anything +//! thrown at it renders *something*. +//! //! These diagrams contain references to character group as defined by Rust on //! for example [char][], but also often on [u8][], which is what `micromark-rs` //! typically works on. @@ -99,14 +102,14 @@ //! ascii_alphabetic ::= ascii_lowercase | ascii_uppercase //! ; '0'..='9' //! ascii_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' -//! ; '0'..='9'; 'A'..='F', 'a'..='f' +//! ; '0'..='9', 'A'..='F', 'a'..='f' //! ascii_hexdigit ::= ascii_digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' -//! ; '0'..='9'; 'A'..='Z', 'a'..='z' +//! ; '0'..='9', 'A'..='Z', 'a'..='z' //! ascii_alphanumeric ::= ascii_digit | ascii_alphabetic -//! ; '!'..='/'; ':'..='@'; '['..='`'; '{'..='~' +//! ; '!'..='/', ':'..='@', '['..='`', '{'..='~' //! ascii_punctuation ::= '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' -//! ; 0x00..=0x1F; 0x7F -//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F +//! ; 0x00..=0x1F, 0x7F +//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x11 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F //! //! ; Markdown groups: //! ; Any byte (u8) @@ -115,12 +118,14 @@ //! eol ::= '\n' | '\r' | '\r\n' //! line ::= byte - eol //! text ::= line - space_or_tab -//! space_or_tab_eol ::= 1*space_or_tab | 0*space_or_tab eol 0*space_or_tab +//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab //! //! ; Unicode groups: //! unicode_whitespace ::= ? ; See `char::is_whitespace`. //! unicode_punctuation ::= ? ; See `src/unicode.rs`. //! ``` +//! +//! [bnf]: http://trevorjim.com/a-specification-for-markdown/ pub mod attention; pub mod autolink; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 5f89ef9..0ddd0c4 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -1,15 +1,18 @@ -//! Paragraph is a construct that occurs in the [flow] content type. +//! Paragraph occurs in the [flow][] content type. //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Paragraph forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! ; Restriction: lines cannot start other flow constructs. //! ; Restriction: lines cannot be blank. -//! paragraph ::= 1*line *( eol 1*line ) +//! paragraph ::= 1*line *(eol 1*line) //! ``` //! -//! Paragraphs in markdown relate to the `<p>` element in HTML. -//! See [*§ 4.4.1 The `p` element* in the HTML spec][html] for more info. +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). //! //! Paragraphs can contain line endings and whitespace, but they are not //! allowed to contain blank lines, or to be blank themselves. @@ -17,6 +20,11 @@ //! The paragraph is interpreted as the [text][] content type. //! That means that [autolinks][autolink], [code (text)][code_text], etc are allowed. //! +//! ## HTML +//! +//! Paragraphs in markdown relate to the `<p>` element in HTML. +//! See [*§ 4.4.1 The `p` element* in the HTML spec][html] for more info. +//! //! ## Tokens //! //! * [`Paragraph`][Name::Paragraph] diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 1818ef4..3f772e9 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -1,6 +1,17 @@ //! Byte order mark occurs at the start of the document. //! -//! It’s the three bytes 0xEF, 0xBB, and 0xBF. +//! ## Grammar +//! +//! Byte order mark forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! byte_order_mark ::= 0xEF 0xBB 0xBF +//! ``` +//! +//! ## Recommendation +//! +//! Don’t use BOMs. //! //! ## Tokens //! diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index adbfae1..19f5f3b 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -1,4 +1,4 @@ -//! Data occurs in [text][] and [string][]. +//! Data occurs in the [string][] and [text][] content types. //! //! It can include anything (except for line endings) and stops at certain //! characters. diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index b5254d3..99b5c5a 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -1,17 +1,21 @@ //! Destination occurs in [definition][] and [label end][label_end]. //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Destination forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! destination ::= destination_enclosed | destination_raw //! -//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>' -//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol -//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ] -//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape ) +//! destination_enclosed ::= '<' *(destination_enclosed_byte | destination_enclosed_escape) '>' +//! destination_enclosed_byte ::= line - '<' - '\\' - '>' +//! destination_enclosed_escape ::= '\\' ['<' | '\\' | '>'] +//! +//! destination_raw ::= 1*(destination_raw_byte | destination_raw_escape) //! ; Restriction: unbalanced `)` characters are not allowed. -//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol -//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ] +//! destination_raw_byte ::= text - '\\' - ascii_control +//! destination_raw_escape ::= '\\' ['(' | ')' | '\\'] //! ``` //! //! Balanced parens allowed in raw destinations. @@ -28,18 +32,19 @@ //! The grammar for enclosed destinations (`<x>`) prohibits the use of `<`, //! `>`, and line endings to form URLs. //! The angle brackets can be encoded as a character reference, character -//! escape, or percent encoding: for `<` as `<`, `\<`, or `%3c` and for -//! `>` as `>`, `\>`, or `%3e`. +//! escape, or percent encoding: +//! +//! * `<` as `<`, `\<`, or `%3c` +//! * `>` as `>`, `\>`, or `%3e` //! //! The grammar for raw destinations (`x`) prohibits space (` `) and all -//! [ASCII control][char::is_ascii_control] characters, which thus must be +//! [ASCII control][u8::is_ascii_control] characters, which thus must be //! encoded. -//! Unbalanced arens can be encoded as a character reference, character escape, -//! or percent encoding: for `(` as `(`, `\(`, or `%28` and for `)` as -//! `)`, `\)`, or `%29`. +//! Unbalanced parens can be encoded as a character reference, character escape, +//! or percent encoding: //! -//! It is recommended to use the enclosed variant of destinations, as it allows -//! the most characters, including arbitrary parens, in URLs. +//! * `(` as `(`, `\(`, or `%28` +//! * `)` as `)`, `\)`, or `%29` //! //! There are several cases where incorrect encoding of URLs would, in other //! languages, result in a parse error. @@ -60,6 +65,11 @@ //! <p><a href="https://a%F0%9F%91%8Db%25">x</a></p> //! ``` //! +//! ## Recommendation +//! +//! It is recommended to use the enclosed variant of destinations, as it allows +//! the most characters, including arbitrary parens, in URLs. +//! //! ## References //! //! * [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js) diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 00d613a..1c1dcec 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -1,14 +1,17 @@ -//! Label occurs in [definition][] and label end. +//! Label occurs in [definition][] and [label end][label_end]. //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Label forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! ; Restriction: maximum `999` codes allowed between brackets. //! ; Restriction: no blank lines. -//! ; Restriction: at least 1 non-space and non-eol code must exist. -//! label ::= '[' *( label_text | label_escape ) ']' -//! label_text ::= code - '[' - '\\' - ']' -//! label_escape ::= '\\' [ '[' | '\\' | ']' ] +//! ; Restriction: at least 1 `text` byte must exist. +//! label ::= '[' *(label_byte | label_escape) ']' +//! label_byte ::= code - '[' - '\\' - ']' +//! label_escape ::= '\\' ['[' | '\\' | ']'] //! ``` //! //! The maximum allowed size of the label, without the brackets, is `999` diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 5eec71f..35641ee 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -3,7 +3,7 @@ //! This is a tiny helper that [flow][] constructs can use to make sure that //! the following line is not lazy. //! For example, [html (flow)][html_flow] and code ([fenced][code_fenced], -//! [indented][code_indented]), stop when next line is lazy. +//! [indented][code_indented]), stop when the next line is lazy. //! //! [flow]: crate::construct::flow //! [code_fenced]: crate::construct::code_fenced diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 43cfd45..198dae2 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -1,4 +1,13 @@ -//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`). +//! Space or tab occurs in tons of places. +//! +//! ## Grammar +//! +//! Space or tab forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! space_or_tab ::= 1*('\t' | ' ') +//! ``` //! //! ## References //! @@ -9,16 +18,16 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Options to parse `space_or_tab`. +/// Configuration. #[derive(Debug)] pub struct Options { /// Minimum allowed bytes (inclusive). pub min: usize, /// Maximum allowed bytes (inclusive). pub max: usize, - /// Token type to use for whitespace events. + /// Name to use for events. pub kind: Name, - /// Connect this whitespace to the previous. + /// Connect this event to the previous. pub connect: bool, /// Embedded content type to use. pub content: Option<Content>, diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index 427cb11..01f440e 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -1,8 +1,23 @@ -//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`). +//! Space or tab (eol) occurs in [destination][], [label][], and [title][]. +//! +//! ## Grammar +//! +//! Space or tab (eol) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab +//! ``` +//! +//! Importantly, this allows one line ending, but not blank lines. //! //! ## References //! //! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) +//! +//! [destination]: crate::construct::partial_destination +//! [label]: crate::construct::partial_label +//! [title]: crate::construct::partial_title use crate::construct::partial_space_or_tab::{ space_or_tab_with_options, Options as SpaceOrTabOptions, @@ -12,7 +27,7 @@ use crate::state::{Name as StateName, State}; use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -/// Options to parse `space_or_tab` and one optional eol, but no blank line. +/// Configuration. #[derive(Debug)] pub struct Options { /// Connect this whitespace to the previous. @@ -21,12 +36,7 @@ pub struct Options { pub content: Option<Content>, } -/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and -/// optionally `space_or_tab`. -/// -/// ```bnf -/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' ) -/// ``` +/// `space_or_tab_eol` pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName { space_or_tab_eol_with_options( tokenizer, diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 50d26be..a878be3 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -1,11 +1,16 @@ //! Title occurs in [definition][] and [label end][label_end]. //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Title forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! ; Restriction: no blank lines. //! ; Restriction: markers must match (in case of `(` with `)`). -//! title ::= marker [ *( code - '\\' | '\\' [ marker ] ) ] marker +//! title ::= marker *(title_byte | title_escape) marker +//! title_byte ::= code - '\\' - marker +//! title_escape ::= '\\' ['\\' | marker] //! marker ::= '"' | '\'' | '(' //! ``` //! diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 04016cb..faaff73 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -1,25 +1,25 @@ //! Trailing whitespace occurs in [string][] and [text][]. //! -//! It occurs around line endings, and, in the case of text content it also -//! occurs at the start or end of the whole. +//! ## Grammar //! -//! They’re formed with the following BNF: +//! Trailing whitespace forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! ; Restriction: the start and end here count as an eol in the case of `text`. -//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab +//! whitespace ::= *space_or_tab eol *space_or_tab //! ``` //! +//! It occurs around line endings and, in the case of text content, it also +//! occurs at the start or end of the whole. +//! //! Normally this whitespace is ignored. //! In the case of text content, whitespace before a line ending that //! consistents solely of spaces, at least 2, forms a hard break (trailing). //! -//! The minimum number of the spaces is defined in +//! The minimum number of those spaces is defined in //! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min]. //! -//! Hard breaks in markdown relate to the HTML element `<br>`. -//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. -//! //! It is also possible to create a hard break with a similar construct: a //! [hard break (escape)][hard_break_escape] is a backslash followed //! by a line ending. @@ -28,6 +28,18 @@ //! “escaped” in other languages. //! Trailing spaces are typically invisible in editors, or even automatically //! removed, making hard break (trailing) hard to use. +//! +//! ## HTML +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! ## Recommendation +//! +//! Do not use trailing whitespace. +//! It is never needed when using [hard break (escape)][hard_break_escape] +//! to create hard breaks. +//! //! ## Tokens //! //! * [`HardBreakTrailing`][Name::HardBreakTrailing] diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 9496d98..0a8ebe9 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -1,20 +1,28 @@ -//! Thematic breaks, sometimes called horizontal rules, are a construct that -//! occurs in the [flow][] content type. +//! Thematic break occurs in the [flow][] content type. //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Thematic break forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): //! //! ```bnf //! ; Restriction: all markers must be identical. //! ; Restriction: at least 3 markers must be used. //! thematic_break ::= *space_or_tab 1*(1*marker *space_or_tab) //! -//! space_or_tab ::= ' ' | '\t' //! marker ::= '*' | '-' | '_' //! ``` //! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! +//! ## HTML +//! //! Thematic breaks in markdown typically relate to the HTML element `<hr>`. //! See [*§ 4.4.2 The `hr` element* in the HTML spec][html] for more info. //! +//! ## Recommendation +//! //! It is recommended to use exactly three asterisks without whitespace when //! writing markdown. //! As using more than three markers has no effect other than wasting space, |