//! Definition is a construct that occurs in the [flow] content type. //! //! They’re formed with the following BNF: //! //! ```bnf //! definition ::= label ':' [ whitespace ] destination [ whitespace title ] [ space_or_tab ] //! //! ; See the `destination`, `title`, and `label` constructs for the BNF of //! ; those parts. //! ``` //! //! See [`destination`][destination], [`label`][label], and [`title`][title] //! for grammar, notes, and recommendations. //! //! Definitions in markdown do not, on their own, relate to anything in HTML. //! When matched with a [label end (reference)][label_end], they together //! relate to the `` or `` elements in HTML. //! The definition forms its `href` or `src`, and optionally `title`, //! attributes. //! See [*§ 4.5.1 The `a` element*][html-a] and //! [*§ 4.8.3 The `img` element*][html-img] in the HTML spec for more info. //! //! The `destination`, `label`, and `title` parts are interpreted as the //! [string][] content type. //! That means that [character escapes][character_escape] and //! [character references][character_reference] are allowed. //! //! Definitions match to references through their label. //! To match, both labels must be equal after normalizing with //! [`normalize_identifier`][normalize_identifier]. //! One definition can match to multiple references. //! Multiple definitions with the same, normalized, identifier are ignored: the //! first definition is preferred. //! To illustrate, the definition with a destination of `x` wins: //! //! ```markdown //! [a]: x //! [a]: y //! //! [a] //! ``` //! //! Importantly, while labels *can* include [string][] content (character //! escapes and character references), these are not considered when matching. //! To illustrate, neither definition matches the reference: //! //! ```markdown //! [a&b]: x //! [a\&b]: y //! //! [a&b] //! ``` //! //! For info on how to encode characters in URLs, see //! [`destination`][destination]. //! For info on how characters are encoded as `href` on `` or `src` on //! `` when compiling, see //! [`sanitize_uri`][sanitize_uri]. //! //! ## Tokens //! //! * [`Definition`][Token::Definition] //! * [`DefinitionDestination`][Token::DefinitionDestination] //! * [`DefinitionDestinationLiteral`][Token::DefinitionDestinationLiteral] //! * [`DefinitionDestinationLiteralMarker`][Token::DefinitionDestinationLiteralMarker] //! * [`DefinitionDestinationRaw`][Token::DefinitionDestinationRaw] //! * [`DefinitionDestinationString`][Token::DefinitionDestinationString] //! * [`DefinitionLabel`][Token::DefinitionLabel] //! * [`DefinitionLabelMarker`][Token::DefinitionLabelMarker] //! * [`DefinitionLabelString`][Token::DefinitionLabelString] //! * [`DefinitionMarker`][Token::DefinitionMarker] //! * [`DefinitionTitle`][Token::DefinitionTitle] //! * [`DefinitionTitleMarker`][Token::DefinitionTitleMarker] //! * [`DefinitionTitleString`][Token::DefinitionTitleString] //! * [`LineEnding`][Token::LineEnding] //! * [`SpaceOrTab`][Token::SpaceOrTab] //! //! ## References //! //! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js) //! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) //! //! [flow]: crate::content::flow //! [string]: crate::content::string //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference //! [label_end]: crate::construct::label_end //! [destination]: crate::construct::partial_destination //! [title]: crate::construct::partial_title //! [label]: crate::construct::partial_label //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::construct::{ partial_destination::{start as destination, Options as DestinationOptions}, partial_label::{start as label, Options as LabelOptions}, partial_space_or_tab::{space_or_tab, space_or_tab_eol}, partial_title::{start as title, Options as TitleOptions}, }; use crate::token::Token; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; use crate::util::skip::opt_back as skip_opt_back; /// At the start of a definition. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let definition_before = !tokenizer.events.is_empty() && tokenizer.events[skip_opt_back( &tokenizer.events, tokenizer.events.len() - 1, &[Token::LineEnding, Token::SpaceOrTab], )] .token_type == Token::Definition; // Do not interrupt paragraphs (but do follow definitions). if (!tokenizer.interrupt || definition_before) && tokenizer.parse_state.constructs.definition { tokenizer.enter(Token::Definition); // Note: arbitrary whitespace allowed even if code (indented) is on. tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } else { (State::Nok, 0) } } /// At the start of a definition, after whitespace. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('[') => tokenizer.go( |t, c| { label( t, c, LabelOptions { label: Token::DefinitionLabel, marker: Token::DefinitionLabelMarker, string: Token::DefinitionLabelString, }, ) }, label_after, )(tokenizer, code), _ => (State::Nok, 0), } } /// After the label of a definition. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(':') => { tokenizer.enter(Token::DefinitionMarker); tokenizer.consume(code); tokenizer.exit(Token::DefinitionMarker); ( State::Fn(Box::new( tokenizer.attempt_opt(space_or_tab_eol(), destination_before), )), 0, ) } _ => (State::Nok, 0), } } /// Before a destination. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.go( |t, c| { destination( t, c, DestinationOptions { limit: usize::MAX, destination: Token::DefinitionDestination, literal: Token::DefinitionDestinationLiteral, marker: Token::DefinitionDestinationLiteralMarker, raw: Token::DefinitionDestinationRaw, string: Token::DefinitionDestinationString, }, ) }, destination_after, )(tokenizer, code) } /// After a destination. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.attempt_opt(title_before, after)(tokenizer, code) } /// After a definition. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer, code) } /// After a definition, after optional whitespace. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(Token::Definition); // You’d be interrupting. tokenizer.interrupt = true; (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } _ => (State::Nok, 0), } } /// After a destination, presumably before a title. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer, code) } /// Before a title, after a line ending. /// /// ```markdown /// | [a]: b /// > | "c" /// ^ /// ``` fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.go( |t, c| { title( t, c, TitleOptions { title: Token::DefinitionTitle, marker: Token::DefinitionTitleMarker, string: Token::DefinitionTitleString, }, ) }, title_after, )(tokenizer, code) } /// After a title. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer, code) } /// After a title, after optional whitespace. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` fn title_after_after_optional_whitespace(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { (State::Ok, if matches!(code, Code::None) { 0 } else { 1 }) } _ => (State::Nok, 0), } }