diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-28 16:54:37 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-28 16:54:43 +0200 |
commit | 6e9447b5ec7f1823b7d08ed8f51eed23d3856cce (patch) | |
tree | c313716db1082e6cd1383af66297ce7acd4f1911 | |
parent | e9db93b428bc3da7c1fa3bcf8acf166012cf18c8 (diff) | |
download | markdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.tar.gz markdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.tar.bz2 markdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.zip |
Add docs to label end
-rw-r--r-- | readme.md | 5 | ||||
-rw-r--r-- | src/construct/autolink.rs | 2 | ||||
-rw-r--r-- | src/construct/definition.rs | 28 | ||||
-rw-r--r-- | src/construct/label_end.rs | 153 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 7 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 7 | ||||
-rw-r--r-- | src/construct/partial_destination.rs | 5 |
7 files changed, 167 insertions, 40 deletions
@@ -124,7 +124,6 @@ cargo doc --document-private-items #### Docs -- [ ] (1) `label_end` - [ ] (1) `space_or_tab_one_line_ending` - [ ] (1) `ParseState` - [ ] (1) Image, Link, and other media token types; `LabelStart`, `Media` @@ -135,7 +134,7 @@ cargo doc --document-private-items #### Refactor -- [ ] (1) Move map handling from `resolve_media`, reuse in `subtokenize` +- [ ] (1) Use `edit_map` in `subtokenize` - [ ] (1) Clean shifting, assertions in the above helper - [ ] (1) Clean `space_or_tab_one_line_ending` - [ ] (1) Use `link_to` (and `space_or_tab_one_line_ending`) in more places? @@ -284,3 +283,5 @@ important. - [x] (1) Add improved docs in compiler - [x] (1) Add docs for `RESOURCE_DESTINATION_BALANCE_MAX` - [x] (1) Add docs for `label_start_image`, `label_start_link` +- [x] (1) Add docs for `label_end` +- [x] (1) Move map handling from `resolve_media` diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 33cb3f0..e94066b 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -99,8 +99,6 @@ //! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX //! [sanitize_uri]: crate::util::sanitize_uri //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element -//! -//! <!-- To do: add explanation of sanitation. --> use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 674bd65..6a3aceb 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -5,31 +5,13 @@ //! ```bnf //! definition ::= label ':' whitespace destination [ whitespace title ] [ space_or_tab ] //! -//! ; Restriction: maximum `999` codes allowed between brackets. -//! ; Restriction: no blank lines. -//! ; Restriction: at least 1 non-space and non-eol code must exist. -//! label ::= '[' *( label_text | label_escape ) ']' -//! label_text ::= code - '[' - '\\' - ']' -//! label_escape ::= '\\' [ '[' | '\\' | ']' ] -//! -//! destination ::= destination_enclosed | destination_raw -//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>' -//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol -//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ] -//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape ) -//! ; Restriction: unbalanced `)` characters are not allowed. -//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol -//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ] -//! -//! ; Restriction: no blank lines. -//! ; Restriction: markers must match (in case of `(` with `)`). -//! title ::= marker [ *( code - '\\' | '\\' [ marker ] ) ] marker -//! marker ::= '"' | '\'' | '(' -//! //! whitespace ::= eol *whitespace | 1*space_or_tab [ eol *whitespace ] //! space_or_tab ::= ' ' | '\t' //! ``` //! +//! See [`destination`][destination], [`title`][title], and [`label`][label] +//! for grammar, notes, and recommendations. +//! //! Definitions in markdown do not, on their own, relate to anything in HTML. //! When matched with a link (reference), they together relate to the `<a>` //! element in HTML. @@ -72,7 +54,7 @@ //! ``` //! //! For info on how to encode characters in URLs, see -//! [`partial_destination`][destination]. +//! [`destination`][destination]. //! For info on how to characters are encoded as `href` on `<a>` or `src` on //! `<img>` when compiling, see //! [`sanitize_uri`][sanitize_uri]. @@ -105,6 +87,8 @@ //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference //! [destination]: crate::construct::partial_destination +//! [title]: crate::construct::partial_title +//! [label]: crate::construct::partial_label //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index a814302..05c7635 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -1,4 +1,148 @@ -//! To do +//! Label end is a construct that occurs in the [text][] content type. +//! +//! It forms with the following BNF: +//! +//! ```bnf +//! label_end ::= ']' [ resource | reference_full | reference_collapsed ] +//! +//! resource ::= '(' [ whitespace ] destination [ whitespace title ] [ whitespace ] ')' +//! reference_full ::= '[' label ']' +//! reference_collapsed ::= '[' ']' +//! +//! ; See the `destination`, `title`, and `label` constructs for the BNF of +//! ; those parts. +//! ``` +//! +//! See [`destination`][destination], [`title`][title], and [`label`][label] +//! for grammar, notes, and recommendations. +//! +//! Label end does not, on its own, relate to anything in HTML. +//! When matched with a [label start (link)][label_start_link], they together +//! relate to the `<a>` element in HTML. +//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. +//! It can also match with [label start (image)][label_start_image], in which +//! case they form an `<img>` element. +//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! +//! In the case of a resource, the destination and title are given directly +//! with the label end. +//! In the case of a reference, this information is provided by a matched +//! [definition][]. +//! Full references (`[x][y]`) match to definitions through their explicit, +//! second, label (`y`). +//! Collapsed labels (`[x][]`) and shortcut labels (`[x]`) match by +//! interpreting the text provided between the first, implicit, label (`x`). +//! To match, the effective label of the reference must be equal to the label +//! of the definition after normalizing with +//! [`normalize_identifier`][normalize_identifier]. +//! +//! Importantly, while the label of a full reference *can* include [string][] +//! content, and in case of collapsed and shortcut references even [text][] +//! content, that content is not considered when matching. +//! To illustrate, neither label matches the definition: +//! +//! ```markdown +//! [a&b]: https://example.com +//! +//! [x][a&b], [a\&b][] +//! ``` +//! +//! When the resource or reference matches, the destination forms the `href` +//! attribute in case of a [label start (link)][label_start_link], and an +//! `src` attribute otherwise. +//! The title is, optionally, formed, on either `<a>` or `<img>`. +//! +//! For info on how to encode characters in URLs, see +//! [`destination`][destination]. +//! For info on how to characters are encoded as `href` on `<a>` or `src` on +//! `<img>` when compiling, see +//! [`sanitize_uri`][sanitize_uri]. +//! +//! In case of a matched [label start (link)][label_start_link], the interpreted +//! content between it and the label end, is placed between the opening and +//! closing tags. +//! Otherwise, the text is also interpreted, but used *without* the resulting +//! tags: +//! +//! ```markdown +//! [a *b* c](#) +//! +//! ![a *b* c](#) +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p><a href="#">a <em>b</em> c</a></p> +//! <p><img src="#" alt="a b c" /></p> +//! ``` +//! +//! It is possible to use images in links. +//! It’s, somewhat, possible to have links in images (the text will be used, +//! not the HTML, see above). +//! But it’s not possible to use links in links, and the “deepest” link wins. +//! To illustrate: +//! +//! ```markdown +//! a [b [c](#) d](#) e +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p>a [b <a href="#">c</a> d](#) e</p> +//! ``` +//! +//! This limiation is imposed because links in links is invalid according to +//! HTML. +//! Technically though, it is possible by using an [autolink][] in a link, +//! but you definitely should not do that. +//! +//! ## Tokens +//! +//! * [`Link`][TokenType::Link] +//! * [`Image`][TokenType::Image] +//! * [`Label`][TokenType::Label] +//! * [`LabelText`][TokenType::LabelText] +//! * [`LabelEnd`][TokenType::LabelEnd] +//! * [`LabelMarker`][TokenType::LabelMarker] +//! * [`Resource`][TokenType::Resource] +//! * [`ResourceMarker`][TokenType::ResourceMarker] +//! * [`ResourceDestination`][TokenType::ResourceDestination] +//! * [`ResourceDestinationLiteral`][TokenType::ResourceDestinationLiteral] +//! * [`ResourceDestinationLiteralMarker`][TokenType::ResourceDestinationLiteralMarker] +//! * [`ResourceDestinationRaw`][TokenType::ResourceDestinationRaw] +//! * [`ResourceDestinationString`][TokenType::ResourceDestinationString] +//! * [`ResourceTitle`][TokenType::ResourceTitle] +//! * [`ResourceTitleMarker`][TokenType::ResourceTitleMarker] +//! * [`ResourceTitleString`][TokenType::ResourceTitleString] +//! * [`Reference`][TokenType::Reference] +//! * [`ReferenceMarker`][TokenType::ReferenceMarker] +//! * [`ReferenceString`][TokenType::ReferenceString] +//! * [`Data`][TokenType::Data] +//! * [`SpaceOrTab`][TokenType::SpaceOrTab] +//! * [`LineEnding`][TokenType::LineEnding] +//! +//! ## References +//! +//! * [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js) +//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) +//! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links) +//! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images) +//! +//! [string]: crate::content::string +//! [text]: crate::content::text +//! [destination]: crate::construct::partial_destination +//! [title]: crate::construct::partial_title +//! [label]: crate::construct::partial_label +//! [label_start_image]: crate::construct::label_start_image +//! [label_start_link]: crate::construct::label_start_link +//! [definition]: crate::construct::definition +//! [autolink]: crate::construct::autolink +//! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri +//! [normalize_identifier]: crate::util::normalize_identifier +//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; use crate::construct::{ @@ -16,14 +160,17 @@ use crate::util::{ span::{serialize, Span}, }; +/// State needed to parse label end. #[derive(Debug)] struct Info { - /// To do. + /// Index into `label_start_stack` of the corresponding opening. label_start_index: usize, - /// To do. + /// The proposed `Media` that this seems to represent. media: Media, } +/// Resolve media: turn correct label start (image, link) and label end +/// into links and images, or turn them back into data. #[allow(clippy::too_many_lines)] pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> { let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect(); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 97231e0..7725334 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -7,11 +7,10 @@ //! label_start_image ::= '!' '[' //! ``` //! -//! Label start (images) relates to the `<img>` element in HTML. +//! Label start (image) does not, on its own, relate to anything in HTML. +//! When matched with a [label end][label_end], they together relate to the +//! `<img>` element in HTML. //! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. -//! -//! Whether it contributes an image depends on whether it is followed by a -//! valid [label end][label_end] or not. //! Without an end, the characters (`![`) are output. //! //! ## Tokens diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 677fb50..46d7c9c 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -7,11 +7,10 @@ //! label_start_link ::= '[' //! ``` //! -//! Label start (link) relates to the `<a>` element in HTML. +//! Label start (link) does not, on its own, relate to anything in HTML. +//! When matched with a [label end][label_end], they together relate to the +//! `<a>` element in HTML. //! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! -//! Whether it contributes a link depends on whether it is followed by a -//! valid [label end][label_end] or not. //! Without an end, the characters (`[`) are output. //! //! ## Tokens diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 05f5060..71e26df 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -1,4 +1,4 @@ -//! Destination occurs in [definition][] and label end. +//! Destination occurs in [definition][] and [label end][]. //! //! They’re formed with the following BNF: //! @@ -68,9 +68,8 @@ //! [string]: crate::content::string //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference +//! [label_end]: crate::construct::label_end //! [sanitize_uri]: crate::util::sanitize_uri -//! -//! <!-- To do: link label end. --> use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; |