diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/autolink.rs | 2 | ||||
| -rw-r--r-- | src/construct/definition.rs | 28 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 153 | ||||
| -rw-r--r-- | src/construct/label_start_image.rs | 7 | ||||
| -rw-r--r-- | src/construct/label_start_link.rs | 7 | ||||
| -rw-r--r-- | src/construct/partial_destination.rs | 5 | 
6 files changed, 164 insertions, 38 deletions
| diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 33cb3f0..e94066b 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -99,8 +99,6 @@  //! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX  //! [sanitize_uri]: crate::util::sanitize_uri  //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element -//! -//! <!-- To do: add explanation of sanitation. -->  use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 674bd65..6a3aceb 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -5,31 +5,13 @@  //! ```bnf  //! definition ::= label ':' whitespace destination [ whitespace title ] [ space_or_tab ]  //! -//! ; Restriction: maximum `999` codes allowed between brackets. -//! ; Restriction: no blank lines. -//! ; Restriction: at least 1 non-space and non-eol code must exist. -//! label ::= '[' *( label_text | label_escape ) ']' -//! label_text ::= code - '[' - '\\' - ']' -//! label_escape ::= '\\' [ '[' | '\\' | ']' ] -//! -//! destination ::= destination_enclosed | destination_raw -//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>' -//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol -//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ] -//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape ) -//! ; Restriction: unbalanced `)` characters are not allowed. -//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol -//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ] -//! -//! ; Restriction: no blank lines. -//! ; Restriction: markers must match (in case of `(` with `)`). -//! title ::= marker [  *( code - '\\' | '\\' [ marker ] ) ] marker -//! marker ::= '"' | '\'' | '(' -//!  //! whitespace ::= eol *whitespace | 1*space_or_tab [ eol *whitespace ]  //! space_or_tab ::= ' ' | '\t'  //! ```  //! +//! See [`destination`][destination], [`title`][title], and [`label`][label] +//! for grammar, notes, and recommendations. +//!  //! Definitions in markdown do not, on their own, relate to anything in HTML.  //! When matched with a link (reference), they together relate to the `<a>`  //! element in HTML. @@ -72,7 +54,7 @@  //! ```  //!  //! For info on how to encode characters in URLs, see -//! [`partial_destination`][destination]. +//! [`destination`][destination].  //! For info on how to characters are encoded as `href` on `<a>` or `src` on  //! `<img>` when compiling, see  //! [`sanitize_uri`][sanitize_uri]. @@ -105,6 +87,8 @@  //! [character_escape]: crate::construct::character_escape  //! [character_reference]: crate::construct::character_reference  //! [destination]: crate::construct::partial_destination +//! [title]: crate::construct::partial_title +//! [label]: crate::construct::partial_label  //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri  //! [normalize_identifier]: crate::util::normalize_identifier  //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index a814302..05c7635 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -1,4 +1,148 @@ -//! To do +//! Label end is a construct that occurs in the [text][] content type. +//! +//! It forms with the following BNF: +//! +//! ```bnf +//! label_end ::= ']' [ resource | reference_full | reference_collapsed ] +//! +//! resource ::= '(' [ whitespace ] destination [ whitespace title ] [ whitespace ] ')' +//! reference_full ::= '[' label ']' +//! reference_collapsed ::= '[' ']' +//! +//! ; See the `destination`, `title`, and `label` constructs for the BNF of +//! ; those parts. +//! ``` +//! +//! See [`destination`][destination], [`title`][title], and [`label`][label] +//! for grammar, notes, and recommendations. +//! +//! Label end does not, on its own, relate to anything in HTML. +//! When matched with a [label start (link)][label_start_link], they together +//! relate to the `<a>` element in HTML. +//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. +//! It can also match with [label start (image)][label_start_image], in which +//! case they form an `<img>` element. +//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. +//! +//! In the case of a resource, the destination and title are given directly +//! with the label end. +//! In the case of a reference, this information is provided by a matched +//! [definition][]. +//! Full references (`[x][y]`) match to definitions through their explicit, +//! second, label (`y`). +//! Collapsed labels (`[x][]`) and shortcut labels (`[x]`) match by +//! interpreting the text provided between the first, implicit, label (`x`). +//! To match, the effective label of the reference must be equal to the label +//! of the definition after normalizing with +//! [`normalize_identifier`][normalize_identifier]. +//! +//! Importantly, while the label of a full reference *can* include [string][] +//! content, and in case of collapsed and shortcut references even [text][] +//! content, that content is not considered when matching. +//! To illustrate, neither label matches the definition: +//! +//! ```markdown +//! [a&b]: https://example.com +//! +//! [x][a&b], [a\&b][] +//! ``` +//! +//! When the resource or reference matches, the destination forms the `href` +//! attribute in case of a [label start (link)][label_start_link], and an +//! `src` attribute otherwise. +//! The title is, optionally, formed, on either `<a>` or `<img>`. +//! +//! For info on how to encode characters in URLs, see +//! [`destination`][destination]. +//! For info on how to characters are encoded as `href` on `<a>` or `src` on +//! `<img>` when compiling, see +//! [`sanitize_uri`][sanitize_uri]. +//! +//! In case of a matched [label start (link)][label_start_link], the interpreted +//! content between it and the label end, is placed between the opening and +//! closing tags. +//! Otherwise, the text is also interpreted, but used *without* the resulting +//! tags: +//! +//! ```markdown +//! [a *b* c](#) +//! +//!  +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p><a href="#">a <em>b</em> c</a></p> +//! <p><img src="#" alt="a b c" /></p> +//! ``` +//! +//! It is possible to use images in links. +//! It’s, somewhat, possible to have links in images (the text will be used, +//! not the HTML, see above). +//! But it’s not possible to use links in links, and the “deepest” link wins. +//! To illustrate: +//! +//! ```markdown +//! a [b [c](#) d](#) e +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p>a [b <a href="#">c</a> d](#) e</p> +//! ``` +//! +//! This limiation is imposed because links in links is invalid according to +//! HTML. +//! Technically though, it is possible by using an [autolink][] in a link, +//! but you definitely should not do that. +//! +//! ## Tokens +//! +//! *   [`Link`][TokenType::Link] +//! *   [`Image`][TokenType::Image] +//! *   [`Label`][TokenType::Label] +//! *   [`LabelText`][TokenType::LabelText] +//! *   [`LabelEnd`][TokenType::LabelEnd] +//! *   [`LabelMarker`][TokenType::LabelMarker] +//! *   [`Resource`][TokenType::Resource] +//! *   [`ResourceMarker`][TokenType::ResourceMarker] +//! *   [`ResourceDestination`][TokenType::ResourceDestination] +//! *   [`ResourceDestinationLiteral`][TokenType::ResourceDestinationLiteral] +//! *   [`ResourceDestinationLiteralMarker`][TokenType::ResourceDestinationLiteralMarker] +//! *   [`ResourceDestinationRaw`][TokenType::ResourceDestinationRaw] +//! *   [`ResourceDestinationString`][TokenType::ResourceDestinationString] +//! *   [`ResourceTitle`][TokenType::ResourceTitle] +//! *   [`ResourceTitleMarker`][TokenType::ResourceTitleMarker] +//! *   [`ResourceTitleString`][TokenType::ResourceTitleString] +//! *   [`Reference`][TokenType::Reference] +//! *   [`ReferenceMarker`][TokenType::ReferenceMarker] +//! *   [`ReferenceString`][TokenType::ReferenceString] +//! *   [`Data`][TokenType::Data] +//! *   [`SpaceOrTab`][TokenType::SpaceOrTab] +//! *   [`LineEnding`][TokenType::LineEnding] +//! +//! ## References +//! +//! *   [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js) +//! *   [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) +//! *   [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links) +//! *   [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images) +//! +//! [string]: crate::content::string +//! [text]: crate::content::text +//! [destination]: crate::construct::partial_destination +//! [title]: crate::construct::partial_title +//! [label]: crate::construct::partial_label +//! [label_start_image]: crate::construct::label_start_image +//! [label_start_link]: crate::construct::label_start_link +//! [definition]: crate::construct::definition +//! [autolink]: crate::construct::autolink +//! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri +//! [normalize_identifier]: crate::util::normalize_identifier +//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element  use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;  use crate::construct::{ @@ -16,14 +160,17 @@ use crate::util::{      span::{serialize, Span},  }; +/// State needed to parse label end.  #[derive(Debug)]  struct Info { -    /// To do. +    /// Index into `label_start_stack` of the corresponding opening.      label_start_index: usize, -    /// To do. +    /// The proposed `Media` that this seems to represent.      media: Media,  } +/// Resolve media: turn correct label start (image, link) and label end +/// into links and images, or turn them back into data.  #[allow(clippy::too_many_lines)]  pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {      let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect(); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 97231e0..7725334 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -7,11 +7,10 @@  //! label_start_image ::= '!' '['  //! ```  //! -//! Label start (images) relates to the `<img>` element in HTML. +//! Label start (image) does not, on its own, relate to anything in HTML. +//! When matched with a [label end][label_end], they together relate to the +//! `<img>` element in HTML.  //! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. -//! -//! Whether it contributes an image depends on whether it is followed by a -//! valid [label end][label_end] or not.  //! Without an end, the characters (`![`) are output.  //!  //! ## Tokens diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 677fb50..46d7c9c 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -7,11 +7,10 @@  //! label_start_link ::= '['  //! ```  //! -//! Label start (link) relates to the `<a>` element in HTML. +//! Label start (link) does not, on its own, relate to anything in HTML. +//! When matched with a [label end][label_end], they together relate to the +//! `<a>` element in HTML.  //! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. -//! -//! Whether it contributes a link depends on whether it is followed by a -//! valid [label end][label_end] or not.  //! Without an end, the characters (`[`) are output.  //!  //! ## Tokens diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 05f5060..71e26df 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -1,4 +1,4 @@ -//! Destination occurs in [definition][] and label end. +//! Destination occurs in [definition][] and [label end][].  //!  //! They’re formed with the following BNF:  //! @@ -68,9 +68,8 @@  //! [string]: crate::content::string  //! [character_escape]: crate::construct::character_escape  //! [character_reference]: crate::construct::character_reference +//! [label_end]: crate::construct::label_end  //! [sanitize_uri]: crate::util::sanitize_uri -//! -//! <!-- To do: link label end. -->  use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; | 
