//! Definition occurs in the [flow] content type. //! //! ## Grammar //! //! Definition forms with the following BNF //! (see [construct][crate::construct] for character groups): //! //! ```bnf //! definition ::= label ':' [ space_or_tab_eol ] destination [ space_or_tab_eol title ] [ space_or_tab ] //! //! ; See the `destination`, `title`, and `label` constructs for the BNF of //! ; those parts. //! ``` //! //! As this construct occurs in flow, like all flow constructs, it must be //! followed by an eol (line ending) or eof (end of file). //! //! See [`destination`][destination], [`label`][label], and [`title`][title] //! for grammar, notes, and recommendations on each part. //! //! The `destination`, `label`, and `title` parts are interpreted as the //! [string][] content type. //! That means that [character escapes][character_escape] and //! [character references][character_reference] are allowed. //! //! Definitions match to references through identifiers. //! To match, both labels must be equal after normalizing with //! [`normalize_identifier`][normalize_identifier]. //! One definition can match to multiple references. //! Multiple definitions with the same, normalized, identifier are ignored: the //! first definition is preferred. //! To illustrate, the definition with a destination of `x` wins: //! //! ```markdown //! [a]: x //! [a]: y //! //! [a] //! ``` //! //! Importantly, while labels *can* include [string][] content (character //! escapes and character references), these are not considered when matching. //! To illustrate, neither definition matches the reference: //! //! ```markdown //! [a&b]: x //! [a\&b]: y //! //! [a&b] //! ``` //! //! For info on how to encode characters in URLs, see //! [`destination`][destination]. //! For info on how characters are encoded as `href` on `` or `src` on //! `` when compiling, see //! [`sanitize_uri`][sanitize_uri]. //! //! ## HTML //! //! Definitions in markdown do not, on their own, relate to anything in HTML. //! When matched with a [label end (reference)][label_end], they together //! relate to the `` or `` elements in HTML. //! The definition forms its `href` or `src`, and optionally `title`, //! attributes. //! See [*§ 4.5.1 The `a` element*][html_a] and //! [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info. //! //! ## Tokens //! //! * [`Definition`][Name::Definition] //! * [`DefinitionDestination`][Name::DefinitionDestination] //! * [`DefinitionDestinationLiteral`][Name::DefinitionDestinationLiteral] //! * [`DefinitionDestinationLiteralMarker`][Name::DefinitionDestinationLiteralMarker] //! * [`DefinitionDestinationRaw`][Name::DefinitionDestinationRaw] //! * [`DefinitionDestinationString`][Name::DefinitionDestinationString] //! * [`DefinitionLabel`][Name::DefinitionLabel] //! * [`DefinitionLabelMarker`][Name::DefinitionLabelMarker] //! * [`DefinitionLabelString`][Name::DefinitionLabelString] //! * [`DefinitionMarker`][Name::DefinitionMarker] //! * [`DefinitionTitle`][Name::DefinitionTitle] //! * [`DefinitionTitleMarker`][Name::DefinitionTitleMarker] //! * [`DefinitionTitleString`][Name::DefinitionTitleString] //! * [`LineEnding`][Name::LineEnding] //! * [`SpaceOrTab`][Name::SpaceOrTab] //! //! ## References //! //! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js) //! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) //! //! [flow]: crate::construct::flow //! [string]: crate::construct::string //! [character_escape]: crate::construct::character_escape //! [character_reference]: crate::construct::character_reference //! [destination]: crate::construct::partial_destination //! [label]: crate::construct::partial_label //! [label_end]: crate::construct::label_end //! [title]: crate::construct::partial_title //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier //! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::construct::partial_space_or_tab::space_or_tab; use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::{ normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, }; /// At start of a definition. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { // Do not interrupt paragraphs (but do follow definitions). if tokenizer.parse_state.constructs.definition && (!tokenizer.interrupt || (!tokenizer.events.is_empty() && tokenizer.events[skip::opt_back( &tokenizer.events, tokenizer.events.len() - 1, &[Name::LineEnding, Name::SpaceOrTab], )] .name == Name::Definition)) { tokenizer.enter(Name::Definition); if matches!(tokenizer.current, Some(b'\t' | b' ')) { // Note: arbitrary whitespace allowed even if code (indented) is on. tokenizer.attempt(State::Next(StateName::DefinitionBefore), State::Nok); State::Retry(space_or_tab(tokenizer)) } else { State::Retry(StateName::DefinitionBefore) } } else { State::Nok } } /// After optional whitespace, at `[`. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.tokenize_state.token_1 = Name::DefinitionLabel; tokenizer.tokenize_state.token_2 = Name::DefinitionLabelMarker; tokenizer.tokenize_state.token_3 = Name::DefinitionLabelString; tokenizer.attempt(State::Next(StateName::DefinitionLabelAfter), State::Nok); State::Retry(StateName::LabelStart) } _ => State::Nok, } } /// After label. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn label_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; tokenizer.tokenize_state.end = skip::to_back( &tokenizer.events, tokenizer.events.len() - 1, &[Name::DefinitionLabelString], ); match tokenizer.current { Some(b':') => { tokenizer.enter(Name::DefinitionMarker); tokenizer.consume(); tokenizer.exit(Name::DefinitionMarker); State::Next(StateName::DefinitionMarkerAfter) } _ => State::Nok, } } /// After marker. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn marker_after(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { tokenizer.attempt( State::Next(StateName::DefinitionDestinationBefore), State::Next(StateName::DefinitionDestinationBefore), ); State::Retry(space_or_tab_eol(tokenizer)) } else { State::Retry(StateName::DefinitionDestinationBefore) } } /// Before destination. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn destination_before(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::DefinitionDestination; tokenizer.tokenize_state.token_2 = Name::DefinitionDestinationLiteral; tokenizer.tokenize_state.token_3 = Name::DefinitionDestinationLiteralMarker; tokenizer.tokenize_state.token_4 = Name::DefinitionDestinationRaw; tokenizer.tokenize_state.token_5 = Name::DefinitionDestinationString; tokenizer.tokenize_state.size_b = usize::MAX; tokenizer.attempt( State::Next(StateName::DefinitionDestinationAfter), State::Next(StateName::DefinitionDestinationMissing), ); State::Retry(StateName::DestinationStart) } /// After destination. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; tokenizer.tokenize_state.token_4 = Name::Data; tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; tokenizer.attempt( State::Next(StateName::DefinitionAfter), State::Next(StateName::DefinitionAfter), ); State::Retry(StateName::DefinitionTitleBefore) } /// Without destination. pub fn destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; tokenizer.tokenize_state.token_4 = Name::Data; tokenizer.tokenize_state.token_5 = Name::Data; tokenizer.tokenize_state.size_b = 0; tokenizer.tokenize_state.end = 0; State::Nok } /// After definition. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( State::Next(StateName::DefinitionAfterWhitespace), State::Nok, ); State::Retry(space_or_tab(tokenizer)) } else { State::Retry(StateName::DefinitionAfterWhitespace) } } /// After definition, after optional whitespace. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => { tokenizer.exit(Name::Definition); // Note: we don’t care about uniqueness. // It’s likely that that doesn’t happen very frequently. // It is more likely that it wastes precious time. tokenizer.tokenize_state.definitions.push( // Note: we don’t care about virtual spaces, so `as_str` is fine. normalize_identifier( Slice::from_position( tokenizer.parse_state.bytes, &Position::from_exit_event(&tokenizer.events, tokenizer.tokenize_state.end), ) .as_str(), ), ); tokenizer.tokenize_state.end = 0; // You’d be interrupting. tokenizer.interrupt = true; State::Ok } _ => { tokenizer.tokenize_state.end = 0; State::Nok } } } /// After destination, at whitespace. /// /// ```markdown /// > | [a]: b /// ^ /// > | [a]: b "c" /// ^ /// ``` pub fn title_before(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { tokenizer.attempt( State::Next(StateName::DefinitionTitleBeforeMarker), State::Nok, ); State::Retry(space_or_tab_eol(tokenizer)) } else { State::Nok } } /// At title. /// /// ```markdown /// | [a]: b /// > | "c" /// ^ /// ``` pub fn title_before_marker(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::DefinitionTitle; tokenizer.tokenize_state.token_2 = Name::DefinitionTitleMarker; tokenizer.tokenize_state.token_3 = Name::DefinitionTitleString; tokenizer.attempt(State::Next(StateName::DefinitionTitleAfter), State::Nok); State::Retry(StateName::TitleStart) } /// After title. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Name::Data; tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; if matches!(tokenizer.current, Some(b'\t' | b' ')) { tokenizer.attempt( State::Next(StateName::DefinitionTitleAfterOptionalWhitespace), State::Nok, ); State::Retry(space_or_tab(tokenizer)) } else { State::Retry(StateName::DefinitionTitleAfterOptionalWhitespace) } } /// After title, after optional whitespace. /// /// ```markdown /// > | [a]: b "c" /// ^ /// ``` pub fn title_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { None | Some(b'\n') => State::Ok, _ => State::Nok, } }