a [b c d](#) e
//! ``` //! //! This limiation is imposed because links in links is invalid according to //! HTML. //! Technically though, in markdown it is still possible to construct them by //! using an [autolink][] in a link. //! You definitely should not do that. //! //! ## Tokens //! //! * [`Data`][Token::Data] //! * [`Image`][Token::Image] //! * [`Label`][Token::Label] //! * [`LabelEnd`][Token::LabelEnd] //! * [`LabelMarker`][Token::LabelMarker] //! * [`LabelText`][Token::LabelText] //! * [`LineEnding`][Token::LineEnding] //! * [`Link`][Token::Link] //! * [`Reference`][Token::Reference] //! * [`ReferenceMarker`][Token::ReferenceMarker] //! * [`ReferenceString`][Token::ReferenceString] //! * [`Resource`][Token::Resource] //! * [`ResourceDestination`][Token::ResourceDestination] //! * [`ResourceDestinationLiteral`][Token::ResourceDestinationLiteral] //! * [`ResourceDestinationLiteralMarker`][Token::ResourceDestinationLiteralMarker] //! * [`ResourceDestinationRaw`][Token::ResourceDestinationRaw] //! * [`ResourceDestinationString`][Token::ResourceDestinationString] //! * [`ResourceMarker`][Token::ResourceMarker] //! * [`ResourceTitle`][Token::ResourceTitle] //! * [`ResourceTitleMarker`][Token::ResourceTitleMarker] //! * [`ResourceTitleString`][Token::ResourceTitleString] //! * [`SpaceOrTab`][Token::SpaceOrTab] //! //! ## References //! //! * [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js) //! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) //! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links) //! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images) //! //! [string]: crate::content::string //! [text]: crate::content::text //! [destination]: crate::construct::partial_destination //! [title]: crate::construct::partial_title //! [label]: crate::construct::partial_label //! [label_start_image]: crate::construct::label_start_image //! [label_start_link]: crate::construct::label_start_link //! [definition]: crate::construct::definition //! [autolink]: crate::construct::autolink //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier::normalize_identifier //! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! [html-img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX; use crate::construct::partial_space_or_tab::space_or_tab_eol; use crate::state::{Name, State}; use crate::token::Token; use crate::tokenizer::{Event, EventType, Media, Tokenizer}; use crate::util::{ normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, }; /// Start of label end. /// /// ```markdown /// > | [a](b) c /// ^ /// > | [a][b] c /// ^ /// > | [a][] b /// ^ /// > | [a] b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { let mut label_start_index = None; let mut index = tokenizer.tokenize_state.label_start_stack.len(); while index > 0 { index -= 1; if !tokenizer.tokenize_state.label_start_stack[index].balanced { label_start_index = Some(index); break; } } // If there is an okay opening: if let Some(label_start_index) = label_start_index { let label_start = tokenizer .tokenize_state .label_start_stack .get_mut(label_start_index) .unwrap(); tokenizer.tokenize_state.start = label_start_index; tokenizer.tokenize_state.end = tokenizer.events.len(); // Mark as balanced if the info is inactive. if label_start.inactive { return State::Retry(Name::LabelEndNok); } tokenizer.enter(Token::LabelEnd); tokenizer.enter(Token::LabelMarker); tokenizer.consume(); tokenizer.exit(Token::LabelMarker); tokenizer.exit(Token::LabelEnd); return State::Next(Name::LabelEndAfter); } } State::Nok } /// After `]`. /// /// ```markdown /// > | [a](b) c /// ^ /// > | [a][b] c /// ^ /// > | [a][] b /// ^ /// > | [a] b /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { let start = &tokenizer.tokenize_state.label_start_stack[tokenizer.tokenize_state.start]; let defined = tokenizer .parse_state .definitions .contains(&normalize_identifier( // We don’t care about virtual spaces, so `indices` and `as_str` are fine. Slice::from_indices( tokenizer.parse_state.bytes, tokenizer.events[start.start.1].point.index, tokenizer.events[tokenizer.tokenize_state.end].point.index, ) .as_str(), )); match tokenizer.current { // Resource (`[asd](fgh)`)? Some(b'(') => tokenizer.attempt( Name::LabelEndResourceStart, State::Next(Name::LabelEndOk), State::Next(if defined { Name::LabelEndOk } else { Name::LabelEndNok }), ), // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? Some(b'[') => tokenizer.attempt( Name::LabelEndReferenceFull, State::Next(Name::LabelEndOk), State::Next(if defined { Name::LabelEndReferenceNotFull } else { Name::LabelEndNok }), ), // Shortcut (`[asd]`) reference? _ => State::Retry(if defined { Name::LabelEndOk } else { Name::LabelEndNok }), } } /// After `]`, at `[`, but not at a full reference. /// /// > 👉 **Note**: we only get here if the label is defined. /// /// ```markdown /// > | [a][] b /// ^ /// > | [a] b /// ^ /// ``` pub fn reference_not_full(tokenizer: &mut Tokenizer) -> State { tokenizer.attempt( Name::LabelEndReferenceCollapsed, State::Next(Name::LabelEndOk), State::Next(Name::LabelEndNok), ) } /// Done, we found something. /// /// ```markdown /// > | [a](b) c /// ^ /// > | [a][b] c /// ^ /// > | [a][] b /// ^ /// > | [a] b /// ^ /// ``` pub fn ok(tokenizer: &mut Tokenizer) -> State { let label_start_index = tokenizer.tokenize_state.start; // Remove this one and everything after it. let mut left = tokenizer .tokenize_state .label_start_stack .split_off(label_start_index); // Remove this one from `left`, as we’ll move it to `media_list`. let label_start = left.remove(0); tokenizer .tokenize_state .label_start_list_loose .append(&mut left); let is_link = tokenizer.events[label_start.start.0].token_type == Token::LabelLink; if is_link { let mut index = 0; while index < tokenizer.tokenize_state.label_start_stack.len() { let label_start = &mut tokenizer.tokenize_state.label_start_stack[index]; if tokenizer.events[label_start.start.0].token_type == Token::LabelLink { label_start.inactive = true; } index += 1; } } tokenizer.tokenize_state.media_list.push(Media { start: label_start.start, end: (tokenizer.tokenize_state.end, tokenizer.events.len() - 1), }); tokenizer.tokenize_state.start = 0; tokenizer.tokenize_state.end = 0; tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); State::Ok } /// Done, it’s nothing. /// /// There was an okay opening, but we didn’t match anything. /// /// ```markdown /// > | [a](b c /// ^ /// > | [a][b c /// ^ /// > | [a] b /// ^ /// ``` pub fn nok(tokenizer: &mut Tokenizer) -> State { tokenizer .tokenize_state .label_start_stack .get_mut(tokenizer.tokenize_state.start) .unwrap() .balanced = true; tokenizer.tokenize_state.start = 0; tokenizer.tokenize_state.end = 0; State::Nok } /// Before a resource, at `(`. /// /// ```markdown /// > | [a](b) c /// ^ /// ``` pub fn resource_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'(') => { tokenizer.enter(Token::Resource); tokenizer.enter(Token::ResourceMarker); tokenizer.consume(); tokenizer.exit(Token::ResourceMarker); State::Next(Name::LabelEndResourceBefore) } _ => unreachable!("expected `(`"), } } /// At the start of a resource, after `(`, before a destination. /// /// ```markdown /// > | [a](b) c /// ^ /// ``` pub fn resource_before(tokenizer: &mut Tokenizer) -> State { let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, State::Next(Name::LabelEndResourceOpen), State::Next(Name::LabelEndResourceOpen), ) } /// At the start of a resource, after optional whitespace. /// /// ```markdown /// > | [a](b) c /// ^ /// ``` pub fn resource_open(tokenizer: &mut Tokenizer) -> State { if let Some(b')') = tokenizer.current { State::Retry(Name::LabelEndResourceEnd) } else { tokenizer.tokenize_state.token_1 = Token::ResourceDestination; tokenizer.tokenize_state.token_2 = Token::ResourceDestinationLiteral; tokenizer.tokenize_state.token_3 = Token::ResourceDestinationLiteralMarker; tokenizer.tokenize_state.token_4 = Token::ResourceDestinationRaw; tokenizer.tokenize_state.token_5 = Token::ResourceDestinationString; tokenizer.tokenize_state.size_b = RESOURCE_DESTINATION_BALANCE_MAX; tokenizer.attempt( Name::DestinationStart, State::Next(Name::LabelEndResourceDestinationAfter), State::Next(Name::LabelEndResourceDestinationMissing), ) } } /// In a resource, after a destination, before optional whitespace. /// /// ```markdown /// > | [a](b) c /// ^ /// ``` pub fn resource_destination_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; tokenizer.tokenize_state.token_4 = Token::Data; tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_b = 0; let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, State::Next(Name::LabelEndResourceBetween), State::Next(Name::LabelEndResourceEnd), ) } /// Without destination. pub fn resource_destination_missing(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; tokenizer.tokenize_state.token_4 = Token::Data; tokenizer.tokenize_state.token_5 = Token::Data; tokenizer.tokenize_state.size_b = 0; State::Nok } /// In a resource, after a destination, after whitespace. /// /// ```markdown /// > | [a](b ) c /// ^ /// ``` pub fn resource_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'"' | b'\'' | b'(') => { tokenizer.tokenize_state.token_1 = Token::ResourceTitle; tokenizer.tokenize_state.token_2 = Token::ResourceTitleMarker; tokenizer.tokenize_state.token_3 = Token::ResourceTitleString; tokenizer.attempt( Name::TitleStart, State::Next(Name::LabelEndResourceTitleAfter), State::Nok, ) } _ => State::Retry(Name::LabelEndResourceEnd), } } /// In a resource, after a title. /// /// ```markdown /// > | [a](b "c") d /// ^ /// ``` pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; let name = space_or_tab_eol(tokenizer); tokenizer.attempt( name, State::Next(Name::LabelEndResourceEnd), State::Next(Name::LabelEndResourceEnd), ) } /// In a resource, at the `)`. /// /// ```markdown /// > | [a](b) d /// ^ /// ``` pub fn resource_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b')') => { tokenizer.enter(Token::ResourceMarker); tokenizer.consume(); tokenizer.exit(Token::ResourceMarker); tokenizer.exit(Token::Resource); State::Ok } _ => State::Nok, } } /// In a reference (full), at the `[`. /// /// ```markdown /// > | [a][b] d /// ^ /// ``` pub fn reference_full(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.tokenize_state.token_1 = Token::Reference; tokenizer.tokenize_state.token_2 = Token::ReferenceMarker; tokenizer.tokenize_state.token_3 = Token::ReferenceString; tokenizer.attempt( Name::LabelStart, State::Next(Name::LabelEndReferenceFullAfter), State::Nok, ) } _ => unreachable!("expected `[`"), } } /// In a reference (full), after `]`. /// /// ```markdown /// > | [a][b] d /// ^ /// ``` pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_1 = Token::Data; tokenizer.tokenize_state.token_2 = Token::Data; tokenizer.tokenize_state.token_3 = Token::Data; if tokenizer .parse_state .definitions // We don’t care about virtual spaces, so `as_str` is fine. .contains(&normalize_identifier( Slice::from_position( tokenizer.parse_state.bytes, &Position::from_exit_event( &tokenizer.events, skip::to_back( &tokenizer.events, tokenizer.events.len() - 1, &[Token::ReferenceString], ), ), ) .as_str(), )) { State::Ok } else { State::Nok } } /// In a reference (collapsed), at the `[`. /// /// > 👉 **Note**: we only get here if the label is defined. /// /// ```markdown /// > | [a][] d /// ^ /// ``` pub fn reference_collapsed(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'[') => { tokenizer.enter(Token::Reference); tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::ReferenceMarker); State::Next(Name::LabelEndReferenceCollapsedOpen) } _ => State::Nok, } } /// In a reference (collapsed), at the `]`. /// /// > 👉 **Note**: we only get here if the label is defined. /// /// ```markdown /// > | [a][] d /// ^ /// ``` pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b']') => { tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::ReferenceMarker); tokenizer.exit(Token::Reference); State::Ok } _ => State::Nok, } } /// Resolve media. /// /// This turns correct label start (image, link) and label end into links and /// images, or turns them back into data. #[allow(clippy::too_many_lines)] pub fn resolve_media(tokenizer: &mut Tokenizer) { let mut left = tokenizer.tokenize_state.label_start_list_loose.split_off(0); let mut left_2 = tokenizer.tokenize_state.label_start_stack.split_off(0); let media = tokenizer.tokenize_state.media_list.split_off(0); left.append(&mut left_2); let events = &tokenizer.events; // Remove loose label starts. let mut index = 0; while index < left.len() { let label_start = &left[index]; let data_enter_index = label_start.start.0; let data_exit_index = label_start.start.1; tokenizer.map.add( data_enter_index, data_exit_index - data_enter_index + 1, vec![ Event { event_type: EventType::Enter, token_type: Token::Data, point: events[data_enter_index].point.clone(), link: None, }, Event { event_type: EventType::Exit, token_type: Token::Data, point: events[data_exit_index].point.clone(), link: None, }, ], ); index += 1; } // Add grouping events. let mut index = 0; while index < media.len() { let media = &media[index]; // LabelLink:Enter or LabelImage:Enter. let group_enter_index = media.start.0; let group_enter_event = &events[group_enter_index]; // LabelLink:Exit or LabelImage:Exit. let text_enter_index = media.start.0 + (if group_enter_event.token_type == Token::LabelLink { 4 } else { 6 }); // LabelEnd:Enter. let text_exit_index = media.end.0; // LabelEnd:Exit. let label_exit_index = media.end.0 + 3; // Resource:Exit, etc. let group_end_index = media.end.1; // Insert a group enter and label enter. tokenizer.map.add( group_enter_index, 0, vec![ Event { event_type: EventType::Enter, token_type: if group_enter_event.token_type == Token::LabelLink { Token::Link } else { Token::Image }, point: group_enter_event.point.clone(), link: None, }, Event { event_type: EventType::Enter, token_type: Token::Label, point: group_enter_event.point.clone(), link: None, }, ], ); // Empty events not allowed. if text_enter_index != text_exit_index { // Insert a text enter. tokenizer.map.add( text_enter_index, 0, vec![Event { event_type: EventType::Enter, token_type: Token::LabelText, point: events[text_enter_index].point.clone(), link: None, }], ); // Insert a text exit. tokenizer.map.add( text_exit_index, 0, vec![Event { event_type: EventType::Exit, token_type: Token::LabelText, point: events[text_exit_index].point.clone(), link: None, }], ); } // Insert a label exit. tokenizer.map.add( label_exit_index + 1, 0, vec![Event { event_type: EventType::Exit, token_type: Token::Label, point: events[label_exit_index].point.clone(), link: None, }], ); // Insert a group exit. tokenizer.map.add( group_end_index + 1, 0, vec![Event { event_type: EventType::Exit, token_type: if group_enter_event.token_type == Token::LabelLink { Token::Link } else { Token::Image }, point: events[group_end_index].point.clone(), link: None, }], ); index += 1; } tokenizer.map.consume(&mut tokenizer.events); }