aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-28 16:54:37 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-28 16:54:43 +0200
commit6e9447b5ec7f1823b7d08ed8f51eed23d3856cce (patch)
treec313716db1082e6cd1383af66297ce7acd4f1911
parente9db93b428bc3da7c1fa3bcf8acf166012cf18c8 (diff)
downloadmarkdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.tar.gz
markdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.tar.bz2
markdown-rs-6e9447b5ec7f1823b7d08ed8f51eed23d3856cce.zip
Add docs to label end
Diffstat (limited to '')
-rw-r--r--readme.md5
-rw-r--r--src/construct/autolink.rs2
-rw-r--r--src/construct/definition.rs28
-rw-r--r--src/construct/label_end.rs153
-rw-r--r--src/construct/label_start_image.rs7
-rw-r--r--src/construct/label_start_link.rs7
-rw-r--r--src/construct/partial_destination.rs5
7 files changed, 167 insertions, 40 deletions
diff --git a/readme.md b/readme.md
index 1250ab6..4f7d4a0 100644
--- a/readme.md
+++ b/readme.md
@@ -124,7 +124,6 @@ cargo doc --document-private-items
#### Docs
-- [ ] (1) `label_end`
- [ ] (1) `space_or_tab_one_line_ending`
- [ ] (1) `ParseState`
- [ ] (1) Image, Link, and other media token types; `LabelStart`, `Media`
@@ -135,7 +134,7 @@ cargo doc --document-private-items
#### Refactor
-- [ ] (1) Move map handling from `resolve_media`, reuse in `subtokenize`
+- [ ] (1) Use `edit_map` in `subtokenize`
- [ ] (1) Clean shifting, assertions in the above helper
- [ ] (1) Clean `space_or_tab_one_line_ending`
- [ ] (1) Use `link_to` (and `space_or_tab_one_line_ending`) in more places?
@@ -284,3 +283,5 @@ important.
- [x] (1) Add improved docs in compiler
- [x] (1) Add docs for `RESOURCE_DESTINATION_BALANCE_MAX`
- [x] (1) Add docs for `label_start_image`, `label_start_link`
+- [x] (1) Add docs for `label_end`
+- [x] (1) Move map handling from `resolve_media`
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 33cb3f0..e94066b 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -99,8 +99,6 @@
//! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX
//! [sanitize_uri]: crate::util::sanitize_uri
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
-//!
-//! <!-- To do: add explanation of sanitation. -->
use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 674bd65..6a3aceb 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -5,31 +5,13 @@
//! ```bnf
//! definition ::= label ':' whitespace destination [ whitespace title ] [ space_or_tab ]
//!
-//! ; Restriction: maximum `999` codes allowed between brackets.
-//! ; Restriction: no blank lines.
-//! ; Restriction: at least 1 non-space and non-eol code must exist.
-//! label ::= '[' *( label_text | label_escape ) ']'
-//! label_text ::= code - '[' - '\\' - ']'
-//! label_escape ::= '\\' [ '[' | '\\' | ']' ]
-//!
-//! destination ::= destination_enclosed | destination_raw
-//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>'
-//! destination_enclosed_text ::= code - '<' - '\\' - '>' - eol
-//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ]
-//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape )
-//! ; Restriction: unbalanced `)` characters are not allowed.
-//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol
-//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ]
-//!
-//! ; Restriction: no blank lines.
-//! ; Restriction: markers must match (in case of `(` with `)`).
-//! title ::= marker [ *( code - '\\' | '\\' [ marker ] ) ] marker
-//! marker ::= '"' | '\'' | '('
-//!
//! whitespace ::= eol *whitespace | 1*space_or_tab [ eol *whitespace ]
//! space_or_tab ::= ' ' | '\t'
//! ```
//!
+//! See [`destination`][destination], [`title`][title], and [`label`][label]
+//! for grammar, notes, and recommendations.
+//!
//! Definitions in markdown do not, on their own, relate to anything in HTML.
//! When matched with a link (reference), they together relate to the `<a>`
//! element in HTML.
@@ -72,7 +54,7 @@
//! ```
//!
//! For info on how to encode characters in URLs, see
-//! [`partial_destination`][destination].
+//! [`destination`][destination].
//! For info on how to characters are encoded as `href` on `<a>` or `src` on
//! `<img>` when compiling, see
//! [`sanitize_uri`][sanitize_uri].
@@ -105,6 +87,8 @@
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [destination]: crate::construct::partial_destination
+//! [title]: crate::construct::partial_title
+//! [label]: crate::construct::partial_label
//! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri
//! [normalize_identifier]: crate::util::normalize_identifier
//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index a814302..05c7635 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -1,4 +1,148 @@
-//! To do
+//! Label end is a construct that occurs in the [text][] content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! label_end ::= ']' [ resource | reference_full | reference_collapsed ]
+//!
+//! resource ::= '(' [ whitespace ] destination [ whitespace title ] [ whitespace ] ')'
+//! reference_full ::= '[' label ']'
+//! reference_collapsed ::= '[' ']'
+//!
+//! ; See the `destination`, `title`, and `label` constructs for the BNF of
+//! ; those parts.
+//! ```
+//!
+//! See [`destination`][destination], [`title`][title], and [`label`][label]
+//! for grammar, notes, and recommendations.
+//!
+//! Label end does not, on its own, relate to anything in HTML.
+//! When matched with a [label start (link)][label_start_link], they together
+//! relate to the `<a>` element in HTML.
+//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
+//! It can also match with [label start (image)][label_start_image], in which
+//! case they form an `<img>` element.
+//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info.
+//!
+//! In the case of a resource, the destination and title are given directly
+//! with the label end.
+//! In the case of a reference, this information is provided by a matched
+//! [definition][].
+//! Full references (`[x][y]`) match to definitions through their explicit,
+//! second, label (`y`).
+//! Collapsed labels (`[x][]`) and shortcut labels (`[x]`) match by
+//! interpreting the text provided between the first, implicit, label (`x`).
+//! To match, the effective label of the reference must be equal to the label
+//! of the definition after normalizing with
+//! [`normalize_identifier`][normalize_identifier].
+//!
+//! Importantly, while the label of a full reference *can* include [string][]
+//! content, and in case of collapsed and shortcut references even [text][]
+//! content, that content is not considered when matching.
+//! To illustrate, neither label matches the definition:
+//!
+//! ```markdown
+//! [a&b]: https://example.com
+//!
+//! [x][a&amp;b], [a\&b][]
+//! ```
+//!
+//! When the resource or reference matches, the destination forms the `href`
+//! attribute in case of a [label start (link)][label_start_link], and an
+//! `src` attribute otherwise.
+//! The title is, optionally, formed, on either `<a>` or `<img>`.
+//!
+//! For info on how to encode characters in URLs, see
+//! [`destination`][destination].
+//! For info on how to characters are encoded as `href` on `<a>` or `src` on
+//! `<img>` when compiling, see
+//! [`sanitize_uri`][sanitize_uri].
+//!
+//! In case of a matched [label start (link)][label_start_link], the interpreted
+//! content between it and the label end, is placed between the opening and
+//! closing tags.
+//! Otherwise, the text is also interpreted, but used *without* the resulting
+//! tags:
+//!
+//! ```markdown
+//! [a *b* c](#)
+//!
+//! ![a *b* c](#)
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p><a href="#">a <em>b</em> c</a></p>
+//! <p><img src="#" alt="a b c" /></p>
+//! ```
+//!
+//! It is possible to use images in links.
+//! It’s, somewhat, possible to have links in images (the text will be used,
+//! not the HTML, see above).
+//! But it’s not possible to use links in links, and the “deepest” link wins.
+//! To illustrate:
+//!
+//! ```markdown
+//! a [b [c](#) d](#) e
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p>a [b <a href="#">c</a> d](#) e</p>
+//! ```
+//!
+//! This limiation is imposed because links in links is invalid according to
+//! HTML.
+//! Technically though, it is possible by using an [autolink][] in a link,
+//! but you definitely should not do that.
+//!
+//! ## Tokens
+//!
+//! * [`Link`][TokenType::Link]
+//! * [`Image`][TokenType::Image]
+//! * [`Label`][TokenType::Label]
+//! * [`LabelText`][TokenType::LabelText]
+//! * [`LabelEnd`][TokenType::LabelEnd]
+//! * [`LabelMarker`][TokenType::LabelMarker]
+//! * [`Resource`][TokenType::Resource]
+//! * [`ResourceMarker`][TokenType::ResourceMarker]
+//! * [`ResourceDestination`][TokenType::ResourceDestination]
+//! * [`ResourceDestinationLiteral`][TokenType::ResourceDestinationLiteral]
+//! * [`ResourceDestinationLiteralMarker`][TokenType::ResourceDestinationLiteralMarker]
+//! * [`ResourceDestinationRaw`][TokenType::ResourceDestinationRaw]
+//! * [`ResourceDestinationString`][TokenType::ResourceDestinationString]
+//! * [`ResourceTitle`][TokenType::ResourceTitle]
+//! * [`ResourceTitleMarker`][TokenType::ResourceTitleMarker]
+//! * [`ResourceTitleString`][TokenType::ResourceTitleString]
+//! * [`Reference`][TokenType::Reference]
+//! * [`ReferenceMarker`][TokenType::ReferenceMarker]
+//! * [`ReferenceString`][TokenType::ReferenceString]
+//! * [`Data`][TokenType::Data]
+//! * [`SpaceOrTab`][TokenType::SpaceOrTab]
+//! * [`LineEnding`][TokenType::LineEnding]
+//!
+//! ## References
+//!
+//! * [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js)
+//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
+//! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
+//! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
+//!
+//! [string]: crate::content::string
+//! [text]: crate::content::text
+//! [destination]: crate::construct::partial_destination
+//! [title]: crate::construct::partial_title
+//! [label]: crate::construct::partial_label
+//! [label_start_image]: crate::construct::label_start_image
+//! [label_start_link]: crate::construct::label_start_link
+//! [definition]: crate::construct::definition
+//! [autolink]: crate::construct::autolink
+//! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri
+//! [normalize_identifier]: crate::util::normalize_identifier
+//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
use crate::construct::{
@@ -16,14 +160,17 @@ use crate::util::{
span::{serialize, Span},
};
+/// State needed to parse label end.
#[derive(Debug)]
struct Info {
- /// To do.
+ /// Index into `label_start_stack` of the corresponding opening.
label_start_index: usize,
- /// To do.
+ /// The proposed `Media` that this seems to represent.
media: Media,
}
+/// Resolve media: turn correct label start (image, link) and label end
+/// into links and images, or turn them back into data.
#[allow(clippy::too_many_lines)]
pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect();
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 97231e0..7725334 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -7,11 +7,10 @@
//! label_start_image ::= '!' '['
//! ```
//!
-//! Label start (images) relates to the `<img>` element in HTML.
+//! Label start (image) does not, on its own, relate to anything in HTML.
+//! When matched with a [label end][label_end], they together relate to the
+//! `<img>` element in HTML.
//! See [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info.
-//!
-//! Whether it contributes an image depends on whether it is followed by a
-//! valid [label end][label_end] or not.
//! Without an end, the characters (`![`) are output.
//!
//! ## Tokens
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 677fb50..46d7c9c 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -7,11 +7,10 @@
//! label_start_link ::= '['
//! ```
//!
-//! Label start (link) relates to the `<a>` element in HTML.
+//! Label start (link) does not, on its own, relate to anything in HTML.
+//! When matched with a [label end][label_end], they together relate to the
+//! `<a>` element in HTML.
//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
-//!
-//! Whether it contributes a link depends on whether it is followed by a
-//! valid [label end][label_end] or not.
//! Without an end, the characters (`[`) are output.
//!
//! ## Tokens
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 05f5060..71e26df 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -1,4 +1,4 @@
-//! Destination occurs in [definition][] and label end.
+//! Destination occurs in [definition][] and [label end][].
//!
//! They’re formed with the following BNF:
//!
@@ -68,9 +68,8 @@
//! [string]: crate::content::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
+//! [label_end]: crate::construct::label_end
//! [sanitize_uri]: crate::util::sanitize_uri
-//!
-//! <!-- To do: link label end. -->
use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};