diff options
| author | 2022-08-16 13:04:38 +0200 | |
|---|---|---|
| committer | 2022-08-16 13:04:38 +0200 | |
| commit | 93d0b7c6465f4ffe220b3ddada729746b11eb6ce (patch) | |
| tree | e7907fb89a6660c5546d2ca128b2d48ec87539ae /src | |
| parent | 2379c2749916483be68dbf816a4c56cd59ced958 (diff) | |
| download | markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.tar.gz markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.tar.bz2 markdown-rs-93d0b7c6465f4ffe220b3ddada729746b11eb6ce.zip | |
Update some last docs, bnf grammars
Diffstat (limited to '')
| -rw-r--r-- | src/construct/autolink.rs | 4 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 46 | ||||
| -rw-r--r-- | src/construct/label_start_image.rs | 14 | ||||
| -rw-r--r-- | src/construct/label_start_link.rs | 16 | ||||
| -rw-r--r-- | src/construct/list_item.rs | 50 | ||||
| -rw-r--r-- | src/construct/mod.rs | 21 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 18 | ||||
| -rw-r--r-- | src/construct/partial_bom.rs | 13 | ||||
| -rw-r--r-- | src/construct/partial_data.rs | 2 | ||||
| -rw-r--r-- | src/construct/partial_destination.rs | 40 | ||||
| -rw-r--r-- | src/construct/partial_label.rs | 15 | ||||
| -rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 2 | ||||
| -rw-r--r-- | src/construct/partial_space_or_tab.rs | 17 | ||||
| -rw-r--r-- | src/construct/partial_space_or_tab_eol.rs | 26 | ||||
| -rw-r--r-- | src/construct/partial_title.rs | 9 | ||||
| -rw-r--r-- | src/construct/partial_whitespace.rs | 28 | ||||
| -rw-r--r-- | src/construct/thematic_break.rs | 16 | 
17 files changed, 226 insertions, 111 deletions
| diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 9890aaf..b25dc32 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -1,8 +1,8 @@ -//! Autolinks occur in the [text][] content type. +//! Autolink occurs in the [text][] content type.  //!  //! ## Grammar  //! -//! Autolinks form with the following BNF +//! Autolink forms with the following BNF  //! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 4752639..5e31444 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -1,11 +1,14 @@ -//! Label end is a construct that occurs in the [text][] content type. +//! Label end occurs in the [text][] content type.  //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label end forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf -//! label_end ::= ']' [ resource | reference_full | reference_collapsed ] +//! label_end ::= ']' [resource | reference_full | reference_collapsed]  //! -//! resource ::= '(' [ whitespace ] destination [ whitespace title ] [ whitespace ] ')' +//! resource ::= '(' [space_or_tab_eol] destination [space_or_tab_eol title] [space_or_tab_eol] ')'  //! reference_full ::= '[' label ']'  //! reference_collapsed ::= '[' ']'  //! @@ -14,15 +17,7 @@  //! ```  //!  //! See [`destination`][destination], [`label`][label], and [`title`][title] -//! for grammar, notes, and recommendations. -//! -//! Label end does not, on its own, relate to anything in HTML. -//! When matched with a [label start (link)][label_start_link], they together -//! relate to the `<a>` element in HTML. -//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! It can also match with [label start (image)][label_start_image], in which -//! case they form an `<img>` element. -//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! for grammar, notes, and recommendations on each part.  //!  //! In the case of a resource, the destination and title are given directly  //! with the label end. @@ -49,7 +44,7 @@  //!  //! When the resource or reference matches, the destination forms the `href`  //! attribute in case of a [label start (link)][label_start_link], and an -//! `src` attribute otherwise. +//! `src` attribute in case of a [label start (image)][label_start_image].  //! The title is formed, optionally, on either `<a>` or `<img>`.  //!  //! For info on how to encode characters in URLs, see @@ -94,12 +89,29 @@  //! <p>a [b <a href="#">c</a> d](#) e</p>  //! ```  //! -//! This limiation is imposed because links in links is invalid according to +//! This limitation is imposed because links in links is invalid according to  //! HTML.  //! Technically though, in markdown it is still possible to construct them by  //! using an [autolink][] in a link.  //! You definitely should not do that.  //! +//! ## HTML +//! +//! Label end does not, on its own, relate to anything in HTML. +//! When matched with a [label start (link)][label_start_link], they together +//! relate to the `<a>` element in HTML. +//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. +//! It can also match with [label start (image)][label_start_image], in which +//! case they form an `<img>` element. +//! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! It is recommended to use labels instead of [autolinks][autolink]. +//! Labels allow more characters in URLs, and allow relative URLs and `www.` +//! URLs. +//! They also allow for descriptive text to explain the URL in prose. +//!  //! ## Tokens  //!  //! *   [`Data`][Name::Data] @@ -143,8 +155,8 @@  //! [autolink]: crate::construct::autolink  //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri  //! [normalize_identifier]: crate::util::normalize_identifier::normalize_identifier -//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element -//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element  use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;  use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index ffc1aee..8d35df2 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -1,16 +1,20 @@ -//! Label start (image) is a construct that occurs in the [text][] content -//! type. +//! Label start (image) occurs in the [text][] content type.  //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label start (image) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! label_start_image ::= '!' '['  //! ```  //! +//! ## HTML +//!  //! Label start (image) does not, on its own, relate to anything in HTML.  //! When matched with a [label end][label_end], they together relate to the  //! `<img>` element in HTML. -//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info.  //! Without an end, the characters (`![`) are output.  //!  //! ## Tokens @@ -26,7 +30,7 @@  //!  //! [text]: crate::construct::text  //! [label_end]: crate::construct::label_end -//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element +//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element  use crate::event::Name;  use crate::resolve::Name as ResolveName; diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index dad6884..e079b2d 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -1,17 +1,21 @@ -//! Label start (link) is a construct that occurs in the [text][] content -//! type. +//! Label start (link) occurs in the [text][] content type.  //! -//! It forms with the following BNF: +//! ## Grammar +//! +//! Label start (link) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! label_start_link ::= '['  //! ```  //! +//! ## HTML +//!  //! Label start (link) does not, on its own, relate to anything in HTML.  //! When matched with a [label end][label_end], they together relate to the  //! `<a>` element in HTML. -//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! Without an end, the characters (`[`) are output. +//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. +//! Without an end, the character (`[`) is output.  //!  //! ## Tokens  //! @@ -25,7 +29,7 @@  //!  //! [text]: crate::construct::text  //! [label_end]: crate::construct::label_end -//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element  use crate::event::Name;  use crate::resolve::Name as ResolveName; diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 09678dd..3e632fb 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -1,27 +1,41 @@ -//! List item is a construct that occurs in the [document][] content type. +//! List item occurs in the [document][] content type.  //! -//! It forms with, roughly, the following BNF: +//! ## Grammar +//! +//! List item forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf -//! ; Restriction: there must be `eol | space_or_tab` after the start. +//! ; Restriction: if there is no space after the marker, the start must be followed by an `eol`.  //! ; Restriction: if the first line after the marker is not blank and starts with `5( space_or_tab )`,  //! ; only the first `space_or_tab` is part of the start. -//! list_item_start ::= '*' | '+' | '-' | 1*9( ascii_decimal ) ( '.' | ')' ) [ 1*4 space_or_tab ] +//! list_item_start ::= '*' | '+' | '-' | 1*9(ascii_decimal) ('.' | ')') [1*4 space_or_tab] +//!  //! ; Restriction: blank line allowed, except when this is the first continuation after a blank start.  //! ; Restriction: if not blank, the line must be indented, exactly `n` times. -//! list_item_cont ::= [ n( space_or_tab ) ] +//! list_item_cont ::= [n(space_or_tab)]  //! ```  //! -//! Further lines that are not prefixed with `list_item_cont` cause the item -//! to be exited, except when those lines are lazy continuation. -//! Like so many things in markdown, list (items) too, are very complex. -//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing -//! details. +//! Further lines that are not prefixed with `list_item_cont` cause the list +//! item to be exited, except when those lines are lazy continuation. +//! Like so many things in markdown, list items too, are complex. +//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for +//! more on parsing details. +//! +//! As list item is a container, it takes several bytes from the start of the +//! line, while the rest of the line includes more containers or flow. +//! +//! ## HTML +//! +//! List item relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML. +//! See [*§ 4.4.8 The `li` element*][html_li], +//! [*§ 4.4.5 The `ol` element*][html_ol], and +//! [*§ 4.4.7 The `ul` element*][html_ul] in the HTML spec for more info. +//! +//! ## Recommendation  //! -//! Lists relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML. -//! See [*§ 4.4.8 The `li` element*][html-li], -//! [*§ 4.4.5 The `ol` element*][html-ol], and -//! [*§ 4.4.7 The `ul` element*][html-ul] in the HTML spec for more info. +//! Use a single space after a marker. +//! Never use lazy continuation.  //!  //! ## Tokens  //! @@ -39,10 +53,10 @@  //! *   [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists)  //!  //! [document]: crate::construct::document -//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element -//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element -//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element -//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure +//! [html_li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element +//! [html_ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element +//! [html_ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element +//! [commonmark_block]: https://spec.commonmark.org/0.30/#phase-1-block-structure  use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE};  use crate::construct::partial_space_or_tab::space_or_tab_min_max; diff --git a/src/construct/mod.rs b/src/construct/mod.rs index da2f5e8..5630143 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -75,12 +75,15 @@  //! That is to say, they form illustrative, imperfect, but useful, examples.  //! The code, in Rust, is considered to be normative.  //! -//! For example, the docs for [character escape][character_escape] contain: +//! The actual syntax of markdown can be described in Backus–Naur form (BNF) as:  //!  //! ```bnf -//! character_escape ::= '\\' ascii_punctuation +//! markdown = .*  //! ```  //! +//! No, that’s [not a typo][bnf]: markdown has no syntax errors; anything +//! thrown at it renders *something*. +//!  //! These diagrams contain references to character group as defined by Rust on  //! for example [char][], but also often on [u8][], which is what `micromark-rs`  //! typically works on. @@ -99,14 +102,14 @@  //! ascii_alphabetic ::= ascii_lowercase | ascii_uppercase  //! ; '0'..='9'  //! ascii_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' -//! ; '0'..='9'; 'A'..='F', 'a'..='f' +//! ; '0'..='9', 'A'..='F', 'a'..='f'  //! ascii_hexdigit ::= ascii_digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' -//! ; '0'..='9'; 'A'..='Z', 'a'..='z' +//! ; '0'..='9', 'A'..='Z', 'a'..='z'  //! ascii_alphanumeric ::= ascii_digit | ascii_alphabetic -//! ; '!'..='/'; ':'..='@'; '['..='`'; '{'..='~' +//! ; '!'..='/', ':'..='@', '['..='`', '{'..='~'  //! ascii_punctuation ::= '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' -//! ; 0x00..=0x1F; 0x7F -//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F +//! ; 0x00..=0x1F, 0x7F +//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x11 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F  //!  //! ; Markdown groups:  //! ; Any byte (u8) @@ -115,12 +118,14 @@  //! eol ::= '\n' | '\r' | '\r\n'  //! line ::= byte - eol  //! text ::= line - space_or_tab -//! space_or_tab_eol ::= 1*space_or_tab | 0*space_or_tab eol 0*space_or_tab +//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab  //!  //! ; Unicode groups:  //! unicode_whitespace ::= ? ; See `char::is_whitespace`.  //! unicode_punctuation ::= ? ; See `src/unicode.rs`.  //! ``` +//! +//! [bnf]: http://trevorjim.com/a-specification-for-markdown/  pub mod attention;  pub mod autolink; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 5f89ef9..0ddd0c4 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -1,15 +1,18 @@ -//! Paragraph is a construct that occurs in the [flow] content type. +//! Paragraph occurs in the [flow][] content type.  //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Paragraph forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! ; Restriction: lines cannot start other flow constructs.  //! ; Restriction: lines cannot be blank. -//! paragraph ::= 1*line *( eol 1*line ) +//! paragraph ::= 1*line *(eol 1*line)  //! ```  //! -//! Paragraphs in markdown relate to the `<p>` element in HTML. -//! See [*§ 4.4.1 The `p` element* in the HTML spec][html] for more info. +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file).  //!  //! Paragraphs can contain line endings and whitespace, but they are not  //! allowed to contain blank lines, or to be blank themselves. @@ -17,6 +20,11 @@  //! The paragraph is interpreted as the [text][] content type.  //! That means that [autolinks][autolink], [code (text)][code_text], etc are allowed.  //! +//! ## HTML +//! +//! Paragraphs in markdown relate to the `<p>` element in HTML. +//! See [*§ 4.4.1 The `p` element* in the HTML spec][html] for more info. +//!  //! ## Tokens  //!  //! *   [`Paragraph`][Name::Paragraph] diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 1818ef4..3f772e9 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -1,6 +1,17 @@  //! Byte order mark occurs at the start of the document.  //! -//! It’s the three bytes 0xEF, 0xBB, and 0xBF. +//! ## Grammar +//! +//! Byte order mark forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! byte_order_mark ::= 0xEF 0xBB 0xBF +//! ``` +//! +//! ## Recommendation +//! +//! Don’t use BOMs.  //!  //! ## Tokens  //! diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index adbfae1..19f5f3b 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -1,4 +1,4 @@ -//! Data occurs in [text][] and [string][]. +//! Data occurs in the [string][] and [text][] content types.  //!  //! It can include anything (except for line endings) and stops at certain  //! characters. diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index b5254d3..99b5c5a 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -1,17 +1,21 @@  //! Destination occurs in [definition][] and [label end][label_end].  //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Destination forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! destination ::= destination_enclosed | destination_raw  //! -//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>' -//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol -//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ] -//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape ) +//! destination_enclosed ::= '<' *(destination_enclosed_byte | destination_enclosed_escape) '>' +//! destination_enclosed_byte ::= line - '<' - '\\' - '>' +//! destination_enclosed_escape ::= '\\' ['<' | '\\' | '>'] +//! +//! destination_raw ::= 1*(destination_raw_byte | destination_raw_escape)  //! ; Restriction: unbalanced `)` characters are not allowed. -//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol -//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ] +//! destination_raw_byte ::= text - '\\' - ascii_control +//! destination_raw_escape ::= '\\' ['(' | ')' | '\\']  //! ```  //!  //! Balanced parens allowed in raw destinations. @@ -28,18 +32,19 @@  //! The grammar for enclosed destinations (`<x>`) prohibits the use of `<`,  //! `>`, and line endings to form URLs.  //! The angle brackets can be encoded as a character reference, character -//! escape, or percent encoding: for `<` as `<`, `\<`, or `%3c` and for -//! `>` as `>`, `\>`, or `%3e`. +//! escape, or percent encoding: +//! +//! *   `<` as `<`, `\<`, or `%3c` +//! *   `>` as `>`, `\>`, or `%3e`  //!  //! The grammar for raw destinations (`x`) prohibits space (` `) and all -//! [ASCII control][char::is_ascii_control] characters, which thus must be +//! [ASCII control][u8::is_ascii_control] characters, which thus must be  //! encoded. -//! Unbalanced arens can be encoded as a character reference, character escape, -//! or percent encoding: for `(` as `(`, `\(`, or `%28` and for `)` as -//! `)`, `\)`, or `%29`. +//! Unbalanced parens can be encoded as a character reference, character escape, +//! or percent encoding:  //! -//! It is recommended to use the enclosed variant of destinations, as it allows -//! the most characters, including arbitrary parens, in URLs. +//! *   `(` as `(`, `\(`, or `%28` +//! *   `)` as `)`, `\)`, or `%29`  //!  //! There are several cases where incorrect encoding of URLs would, in other  //! languages, result in a parse error. @@ -60,6 +65,11 @@  //! <p><a href="https://a%F0%9F%91%8Db%25">x</a></p>  //! ```  //! +//! ## Recommendation +//! +//! It is recommended to use the enclosed variant of destinations, as it allows +//! the most characters, including arbitrary parens, in URLs. +//!  //! ## References  //!  //! *   [`micromark-factory-destination/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-destination/dev/index.js) diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 00d613a..1c1dcec 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -1,14 +1,17 @@ -//! Label occurs in [definition][] and label end. +//! Label occurs in [definition][] and [label end][label_end].  //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Label forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! ; Restriction: maximum `999` codes allowed between brackets.  //! ; Restriction: no blank lines. -//! ; Restriction: at least 1 non-space and non-eol code must exist. -//! label ::= '[' *( label_text | label_escape ) ']' -//! label_text ::= code - '[' - '\\' - ']' -//! label_escape ::= '\\' [ '[' | '\\' | ']' ] +//! ; Restriction: at least 1 `text` byte must exist. +//! label ::= '[' *(label_byte | label_escape) ']' +//! label_byte ::= code - '[' - '\\' - ']' +//! label_escape ::= '\\' ['[' | '\\' | ']']  //! ```  //!  //! The maximum allowed size of the label, without the brackets, is `999` diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 5eec71f..35641ee 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -3,7 +3,7 @@  //! This is a tiny helper that [flow][] constructs can use to make sure that  //! the following line is not lazy.  //! For example, [html (flow)][html_flow] and code ([fenced][code_fenced], -//! [indented][code_indented]), stop when next line is lazy. +//! [indented][code_indented]), stop when the next line is lazy.  //!  //! [flow]: crate::construct::flow  //! [code_fenced]: crate::construct::code_fenced diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 43cfd45..198dae2 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -1,4 +1,13 @@ -//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`). +//! Space or tab occurs in tons of places. +//! +//! ## Grammar +//! +//! Space or tab forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! space_or_tab ::= 1*('\t' | ' ') +//! ```  //!  //! ## References  //! @@ -9,16 +18,16 @@ use crate::state::{Name as StateName, State};  use crate::subtokenize::link;  use crate::tokenizer::Tokenizer; -/// Options to parse `space_or_tab`. +/// Configuration.  #[derive(Debug)]  pub struct Options {      /// Minimum allowed bytes (inclusive).      pub min: usize,      /// Maximum allowed bytes (inclusive).      pub max: usize, -    /// Token type to use for whitespace events. +    /// Name to use for events.      pub kind: Name, -    /// Connect this whitespace to the previous. +    /// Connect this event to the previous.      pub connect: bool,      /// Embedded content type to use.      pub content: Option<Content>, diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs index 427cb11..01f440e 100644 --- a/src/construct/partial_space_or_tab_eol.rs +++ b/src/construct/partial_space_or_tab_eol.rs @@ -1,8 +1,23 @@ -//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`). +//! Space or tab (eol) occurs in [destination][], [label][], and [title][]. +//! +//! ## Grammar +//! +//! Space or tab (eol) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab +//! ``` +//! +//! Importantly, this allows one line ending, but not blank lines.  //!  //! ## References  //!  //! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) +//! +//! [destination]: crate::construct::partial_destination +//! [label]: crate::construct::partial_label +//! [title]: crate::construct::partial_title  use crate::construct::partial_space_or_tab::{      space_or_tab_with_options, Options as SpaceOrTabOptions, @@ -12,7 +27,7 @@ use crate::state::{Name as StateName, State};  use crate::subtokenize::link;  use crate::tokenizer::Tokenizer; -/// Options to parse `space_or_tab` and one optional eol, but no blank line. +/// Configuration.  #[derive(Debug)]  pub struct Options {      /// Connect this whitespace to the previous. @@ -21,12 +36,7 @@ pub struct Options {      pub content: Option<Content>,  } -/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and -/// optionally `space_or_tab`. -/// -/// ```bnf -/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' ) -/// ``` +/// `space_or_tab_eol`  pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {      space_or_tab_eol_with_options(          tokenizer, diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 50d26be..a878be3 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -1,11 +1,16 @@  //! Title occurs in [definition][] and [label end][label_end].  //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Title forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! ; Restriction: no blank lines.  //! ; Restriction: markers must match (in case of `(` with `)`). -//! title ::= marker [  *( code - '\\' | '\\' [ marker ] ) ] marker +//! title ::= marker *(title_byte | title_escape) marker +//! title_byte ::= code - '\\' - marker +//! title_escape ::= '\\' ['\\' | marker]  //! marker ::= '"' | '\'' | '('  //! ```  //! diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 04016cb..faaff73 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -1,25 +1,25 @@  //! Trailing whitespace occurs in [string][] and [text][].  //! -//! It occurs around line endings, and, in the case of text content it also -//! occurs at the start or end of the whole. +//! ## Grammar  //! -//! They’re formed with the following BNF: +//! Trailing whitespace forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! ; Restriction: the start and end here count as an eol in the case of `text`. -//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab +//! whitespace ::= *space_or_tab eol *space_or_tab  //! ```  //! +//! It occurs around line endings and, in the case of text content, it also +//! occurs at the start or end of the whole. +//!  //! Normally this whitespace is ignored.  //! In the case of text content, whitespace before a line ending that  //! consistents solely of spaces, at least 2, forms a hard break (trailing).  //! -//! The minimum number of the spaces is defined in +//! The minimum number of those spaces is defined in  //! [`HARD_BREAK_PREFIX_SIZE_MIN`][hard_break_prefix_size_min].  //! -//! Hard breaks in markdown relate to the HTML element `<br>`. -//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. -//!  //! It is also possible to create a hard break with a similar construct: a  //! [hard break (escape)][hard_break_escape] is a backslash followed  //! by a line ending. @@ -28,6 +28,18 @@  //! “escaped” in other languages.  //! Trailing spaces are typically invisible in editors, or even automatically  //! removed, making hard break (trailing) hard to use. +//! +//! ## HTML +//! +//! Hard breaks in markdown relate to the HTML element `<br>`. +//! See [*§ 4.5.27 The `br` element* in the HTML spec][html] for more info. +//! +//! ## Recommendation +//! +//! Do not use trailing whitespace. +//! It is never needed when using [hard break (escape)][hard_break_escape] +//! to create hard breaks. +//!  //! ## Tokens  //!  //! *   [`HardBreakTrailing`][Name::HardBreakTrailing] diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 9496d98..0a8ebe9 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -1,20 +1,28 @@ -//! Thematic breaks, sometimes called horizontal rules, are a construct that -//! occurs in the [flow][] content type. +//! Thematic break occurs in the [flow][] content type.  //! -//! They’re formed with the following BNF: +//! ## Grammar +//! +//! Thematic break forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>):  //!  //! ```bnf  //! ; Restriction: all markers must be identical.  //! ; Restriction: at least 3 markers must be used.  //! thematic_break ::= *space_or_tab 1*(1*marker *space_or_tab)  //! -//! space_or_tab ::= ' ' | '\t'  //! marker ::= '*' | '-' | '_'  //! ```  //! +//! As this construct occurs in flow, like all flow constructs, it must be +//! followed by an eol (line ending) or eof (end of file). +//! +//! ## HTML +//!  //! Thematic breaks in markdown typically relate to the HTML element `<hr>`.  //! See [*§ 4.4.2 The `hr` element* in the HTML spec][html] for more info.  //! +//! ## Recommendation +//!  //! It is recommended to use exactly three asterisks without whitespace when  //! writing markdown.  //! As using more than three markers has no effect other than wasting space, | 
