diff options
Diffstat (limited to 'src/construct/mod.rs')
-rw-r--r-- | src/construct/mod.rs | 82 |
1 files changed, 69 insertions, 13 deletions
diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 49868e9..da2f5e8 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -1,17 +1,33 @@ //! Constructs found in markdown. //! -//! There are several *things* found when parsing markdown, such as, say, a -//! thematic break. -//! These things are called constructs here. -//! Sometimes, there are several constructs that result in an equivalent thing. -//! For example, [code (fenced)][code_fenced] and -//! [code (indented)][code_indented] are considered different constructs +//! Constructs are grouped by content type. +//! Which content type is allowed somewhere, defines which constructs are +//! allowed there. +//! +//! ## Content type +//! +//! The following content types are found in markdown: +//! +//! * [document][] +//! * [flow][] +//! * [string][] +//! * [text][] //! //! Content types also have a *rest* thing: after all things are parsed, //! there’s something left. +//! In document, that is [flow][]. //! In flow, that is a [paragraph][]. //! In string and text, that is [data][partial_data]. //! +//! ## Construct +//! +//! There are several *things* found when parsing markdown, such as, say, a +//! thematic break. +//! These things are called constructs here. +//! Sometimes, there are several constructs that result in an equivalent thing. +//! For example, [code (fenced)][code_fenced] and +//! [code (indented)][code_indented] are considered different constructs. +//! //! The following constructs are found in markdown: //! //! * [attention (strong, emphasis)][attention] @@ -39,7 +55,7 @@ //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by //! > [whitespace][partial_whitespace]. //! -//! There are also several routines used in different places: +//! There are also several small subroutines typically used in different places: //! //! * [bom][partial_bom] //! * [data][partial_data] @@ -51,20 +67,60 @@ //! * [title][partial_title] //! * [whitespace][partial_whitespace] //! +//! ## Grammar +//! //! Each construct maintained here is explained with a BNF diagram. +//! +//! Such diagrams are considered to be *non-normative*. +//! That is to say, they form illustrative, imperfect, but useful, examples. +//! The code, in Rust, is considered to be normative. +//! //! For example, the docs for [character escape][character_escape] contain: //! //! ```bnf //! character_escape ::= '\\' ascii_punctuation //! ``` //! -//! Such diagrams are considered to be *non-normative*. -//! That is to say, they form illustrative, imperfect, but useful, examples. -//! The code, in Rust, is considered to be normative. +//! These diagrams contain references to character group as defined by Rust on +//! for example [char][], but also often on [u8][], which is what `micromark-rs` +//! typically works on. +//! So, for example, `ascii_punctuation` refers to +//! [`u8::is_ascii_punctuation`][u8::is_ascii_punctuation]. //! -//! They also contain references to character as defined by [char][], so for -//! example `ascii_punctuation` refers to -//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. +//! For clarity, the productions used throughout are: +//! +//! ```bnf +//! ; Rust / ASCII groups: +//! ; 'a'..='z' +//! ascii_lowercase ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' +//! ; 'A'..='Z' +//! ascii_uppercase ::= 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' +//! ; 'A'..='Z', 'a'..='z' +//! ascii_alphabetic ::= ascii_lowercase | ascii_uppercase +//! ; '0'..='9' +//! ascii_digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' +//! ; '0'..='9'; 'A'..='F', 'a'..='f' +//! ascii_hexdigit ::= ascii_digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' +//! ; '0'..='9'; 'A'..='Z', 'a'..='z' +//! ascii_alphanumeric ::= ascii_digit | ascii_alphabetic +//! ; '!'..='/'; ':'..='@'; '['..='`'; '{'..='~' +//! ascii_punctuation ::= '!' | '"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' +//! ; 0x00..=0x1F; 0x7F +//! ascii_control ::= 0x00 | 0x01 | 0x02 | 0x03 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0A | 0x0B | 0x0C | 0x0D | 0x0E | 0x0F | 0x10 | 0x12 | 0x13 | 0x14 | 0x15 | 0x16 | 0x17 | 0x18 | 0x19 | 0x1A | 0x1B | 0x1C | 0x1D | 0x1E | 0x1F | 0x7F +//! +//! ; Markdown groups: +//! ; Any byte (u8) +//! byte ::= 0x00..=0xFFFF +//! space_or_tab ::= '\t' | ' ' +//! eol ::= '\n' | '\r' | '\r\n' +//! line ::= byte - eol +//! text ::= line - space_or_tab +//! space_or_tab_eol ::= 1*space_or_tab | 0*space_or_tab eol 0*space_or_tab +//! +//! ; Unicode groups: +//! unicode_whitespace ::= ? ; See `char::is_whitespace`. +//! unicode_punctuation ::= ? ; See `src/unicode.rs`. +//! ``` pub mod attention; pub mod autolink; |