From a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 24 Jun 2022 17:57:10 +0200 Subject: Add link, images (resource) This is still some messy code that needs cleaning up, but it adds support for links and images, of the resource kind (`[a](b)`). References (`[a][b]`) are parsed and will soon be supported, but need matching. * Fix bug to pad percent-encoded bytes when normalizing urls * Fix bug with escapes counting as balancing in destination * Add `space_or_tab_one_line_ending`, to parse whitespace including up to one line ending (but not a blank line) * Add `ParserState` to share codes, definitions, etc --- src/construct/definition.rs | 136 ++----- src/construct/label_end.rs | 712 ++++++++++++++++++++++++++++++++++ src/construct/label_start_image.rs | 47 +++ src/construct/label_start_link.rs | 30 ++ src/construct/mod.rs | 11 +- src/construct/partial_destination.rs | 3 +- src/construct/partial_space_or_tab.rs | 39 ++ src/construct/partial_title.rs | 14 +- 8 files changed, 876 insertions(+), 116 deletions(-) create mode 100644 src/construct/label_end.rs create mode 100644 src/construct/label_start_image.rs create mode 100644 src/construct/label_start_link.rs (limited to 'src/construct') diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 92d275c..674bd65 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -115,7 +115,7 @@ use crate::construct::{ partial_destination::{start as destination, Options as DestinationOptions}, partial_label::{start as label, Options as LabelOptions}, - partial_space_or_tab::space_or_tab, + partial_space_or_tab::{space_or_tab, space_or_tab_one_line_ending}, partial_title::{start as title, Options as TitleOptions}, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -168,7 +168,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::DefinitionMarker); ( State::Fn(Box::new( - tokenizer.attempt_opt(space_or_tab(), marker_after), + tokenizer.go(space_or_tab_one_line_ending(), destination_before), )), None, ) @@ -177,31 +177,6 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// After the marker, after whitespace. -/// -/// ```markdown -/// [a]: |b "c" -/// -/// [a]: |␊ -/// b "c" -/// ``` -fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - ( - State::Fn(Box::new( - tokenizer.attempt_opt(space_or_tab(), destination_before), - )), - None, - ) - } - _ => destination_before(tokenizer, code), - } -} - /// Before a destination. /// /// ```markdown @@ -211,35 +186,23 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// |b "c" /// ``` fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let event = tokenizer.events.last().unwrap(); - - // Whitespace. - if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab) - // Blank line not ok. - && !matches!( - code, - Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') - ) { - tokenizer.go( - |t, c| { - destination( - t, - c, - DestinationOptions { - limit: usize::MAX, - destination: TokenType::DefinitionDestination, - literal: TokenType::DefinitionDestinationLiteral, - marker: TokenType::DefinitionDestinationLiteralMarker, - raw: TokenType::DefinitionDestinationRaw, - string: TokenType::DefinitionDestinationString, - }, - ) - }, - destination_after, - )(tokenizer, code) - } else { - (State::Nok, None) - } + tokenizer.go( + |t, c| { + destination( + t, + c, + DestinationOptions { + limit: usize::MAX, + destination: TokenType::DefinitionDestination, + literal: TokenType::DefinitionDestinationLiteral, + marker: TokenType::DefinitionDestinationLiteralMarker, + raw: TokenType::DefinitionDestinationRaw, + string: TokenType::DefinitionDestinationString, + }, + ) + }, + destination_after, + )(tokenizer, code) } /// After a destination. @@ -289,32 +252,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// "c" /// ``` fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt_opt(space_or_tab(), title_before_after_optional_whitespace)(tokenizer, code) -} - -/// Before a title, after optional whitespace. -/// -/// ```markdown -/// [a]: b |"c" -/// -/// [a]: b |␊ -/// "c" -/// ``` -fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - ( - State::Fn(Box::new( - tokenizer.attempt_opt(space_or_tab(), title_before_marker), - )), - None, - ) - } - _ => title_before_marker(tokenizer, code), - } + tokenizer.go(space_or_tab_one_line_ending(), title_before_marker)(tokenizer, code) } /// Before a title, after a line ending. @@ -324,26 +262,20 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) /// | "c" /// ``` fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let event = tokenizer.events.last().unwrap(); - - if event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab { - tokenizer.go( - |t, c| { - title( - t, - c, - TitleOptions { - title: TokenType::DefinitionTitle, - marker: TokenType::DefinitionTitleMarker, - string: TokenType::DefinitionTitleString, - }, - ) - }, - title_after, - )(tokenizer, code) - } else { - (State::Nok, None) - } + tokenizer.go( + |t, c| { + title( + t, + c, + TitleOptions { + title: TokenType::DefinitionTitle, + marker: TokenType::DefinitionTitleMarker, + string: TokenType::DefinitionTitleString, + }, + ) + }, + title_after, + )(tokenizer, code) } /// After a title. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs new file mode 100644 index 0000000..405858d --- /dev/null +++ b/src/construct/label_end.rs @@ -0,0 +1,712 @@ +//! To do + +use crate::constant::LINK_RESOURCE_DESTINATION_BALANCE_MAX; +use crate::construct::{ + partial_destination::{start as destination, Options as DestinationOptions}, + partial_label::{start as label, Options as LabelOptions}, + partial_space_or_tab::space_or_tab_one_line_ending, + partial_title::{start as title, Options as TitleOptions}, +}; +use crate::tokenizer::{ + Code, Event, EventType, LabelStart, Media, State, StateFnResult, TokenType, Tokenizer, +}; +use crate::util::{ + normalize_identifier::normalize_identifier, + span::{serialize, Span}, +}; +/// To do: could we do without `HashMap`, so we don’t need `std`? +use std::collections::HashMap; + +#[derive(Debug)] +struct Info { + /// To do. + label_start_index: usize, + /// To do. + media: Media, +} + +#[allow(clippy::too_many_lines)] +pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec { + let mut left: Vec = tokenizer.label_start_list_loose.drain(..).collect(); + let mut left_2: Vec = tokenizer.label_start_stack.drain(..).collect(); + let media: Vec = tokenizer.media_list.drain(..).collect(); + left.append(&mut left_2); + + let mut map: HashMap)> = HashMap::new(); + let events = &tokenizer.events; + + let mut index = 0; + while index < left.len() { + let label_start = &left[index]; + let data_enter_index = label_start.start.0; + let data_exit_index = label_start.start.1; + + map.insert( + data_enter_index, + ( + data_exit_index - data_enter_index, + vec![ + Event { + event_type: EventType::Enter, + token_type: TokenType::Data, + point: events[data_enter_index].point.clone(), + index: events[data_enter_index].index, + previous: None, + next: None, + }, + Event { + event_type: EventType::Exit, + token_type: TokenType::Data, + point: events[data_exit_index].point.clone(), + index: events[data_exit_index].index, + previous: None, + next: None, + }, + ], + ), + ); + + index += 1; + } + + let mut index = 0; + while index < media.len() { + let media = &media[index]; + // LabelLink:Enter or LabelImage:Enter. + let group_enter_index = media.start.0; + let group_enter_event = &events[group_enter_index]; + // LabelLink:Exit or LabelImage:Exit. + let text_enter_index = media.start.0 + + (if group_enter_event.token_type == TokenType::LabelLink { + 4 + } else { + 6 + }); + // LabelEnd:Enter. + let text_exit_index = media.end.0; + // LabelEnd:Exit. + let label_exit_index = media.end.0 + 3; + // Resource:Exit, etc. + let group_end_index = media.end.1; + + // Insert a group enter and label enter. + add( + &mut map, + group_enter_index, + 0, + vec![ + Event { + event_type: EventType::Enter, + token_type: if group_enter_event.token_type == TokenType::LabelLink { + TokenType::Link + } else { + TokenType::Image + }, + point: group_enter_event.point.clone(), + index: group_enter_event.index, + previous: None, + next: None, + }, + Event { + event_type: EventType::Enter, + token_type: TokenType::Label, + point: group_enter_event.point.clone(), + index: group_enter_event.index, + previous: None, + next: None, + }, + ], + ); + + // Empty events not allowed. + if text_enter_index != text_exit_index { + // Insert a text enter. + add( + &mut map, + text_enter_index, + 0, + vec![Event { + event_type: EventType::Enter, + token_type: TokenType::LabelText, + point: events[text_enter_index].point.clone(), + index: events[text_enter_index].index, + previous: None, + next: None, + }], + ); + + // Insert a text exit. + add( + &mut map, + text_exit_index, + 0, + vec![Event { + event_type: EventType::Exit, + token_type: TokenType::LabelText, + point: events[text_exit_index].point.clone(), + index: events[text_exit_index].index, + previous: None, + next: None, + }], + ); + } + + // Insert a label exit. + add( + &mut map, + label_exit_index + 1, + 0, + vec![Event { + event_type: EventType::Exit, + token_type: TokenType::Label, + point: events[label_exit_index].point.clone(), + index: events[label_exit_index].index, + previous: None, + next: None, + }], + ); + + // Insert a group exit. + add( + &mut map, + group_end_index + 1, + 0, + vec![Event { + event_type: EventType::Exit, + token_type: TokenType::Link, + point: events[group_end_index].point.clone(), + index: events[group_end_index].index, + previous: None, + next: None, + }], + ); + + index += 1; + } + + let mut indices: Vec<&usize> = map.keys().collect(); + indices.sort_unstable(); + let mut next_events: Vec = vec![]; + let mut index_into_indices = 0; + let mut start = 0; + let events = &mut tokenizer.events; + let mut shift: i32 = 0; + + while index_into_indices < indices.len() { + let index = *indices[index_into_indices]; + + if start < index { + let append = &mut events[start..index].to_vec(); + let mut index = 0; + + while index < append.len() { + let ev = &mut append[index]; + + if let Some(x) = ev.previous { + let next = (x as i32 + shift) as usize; + ev.previous = Some(next); + println!("todo: y: previous {:?} {:?} {:?}", x, shift, start); + } + + if let Some(x) = ev.next { + let next = (x as i32 + shift) as usize; + ev.next = Some(next); + println!("todo: y: next {:?} {:?} {:?}", x, shift, start); + } + + index += 1; + } + + next_events.append(append); + } + + let (remove, add) = map.get(&index).unwrap(); + shift += (add.len() as i32) - (*remove as i32); + + if !add.is_empty() { + let append = &mut add.clone(); + let mut index = 0; + + while index < append.len() { + let ev = &mut append[index]; + + if let Some(x) = ev.previous { + println!("todo: x: previous {:?} {:?} {:?}", x, shift, start); + } + + if let Some(x) = ev.next { + println!("todo: x: next {:?} {:?} {:?}", x, shift, start); + } + + index += 1; + } + + next_events.append(append); + } + + start = index + remove; + index_into_indices += 1; + } + + if start < events.len() { + next_events.append(&mut events[start..].to_vec()); + } + + next_events +} + +/// Start of label end. +/// +/// ```markdown +/// [a|](b) c +/// [a|][b] c +/// [a|][] b +/// [a|] b +/// +/// [a]: z +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if Code::Char(']') == code { + let mut label_start_index: Option = None; + let mut index = tokenizer.label_start_stack.len(); + + while index > 0 { + index -= 1; + + if !tokenizer.label_start_stack[index].balanced { + label_start_index = Some(index); + break; + } + } + + // If there is an okay opening: + if let Some(label_start_index) = label_start_index { + let label_start = tokenizer + .label_start_stack + .get_mut(label_start_index) + .unwrap(); + + // Mark as balanced if the info is inactive. + if label_start.inactive { + return nok(tokenizer, code, label_start_index); + } + + let label_end_start = tokenizer.events.len(); + let info = Info { + label_start_index, + media: Media { + start: label_start.start, + end: (label_end_start, label_end_start + 3), + id: normalize_identifier(&serialize( + &tokenizer.parse_state.codes, + &Span { + start_index: tokenizer.events[label_start.start.1].index, + end_index: tokenizer.events[label_end_start - 1].index, + }, + false, + )), + }, + }; + + tokenizer.enter(TokenType::LabelEnd); + tokenizer.enter(TokenType::LabelMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::LabelMarker); + tokenizer.exit(TokenType::LabelEnd); + + return (State::Fn(Box::new(move |t, c| after(t, c, info))), None); + } + } + + (State::Nok, None) +} + +/// After `]`. +/// +/// ```markdown +/// [a]|(b) c +/// [a]|[b] c +/// [a]|[] b +/// [a]| b +/// +/// [a]: z +/// ``` +fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + // let label_start = tokenizer + // .label_start_stack + // .get_mut(info.label_start_index) + // .unwrap(); + // To do: figure out if defined or not. + let defined = false; + println!("to do: is `{:?}` defined?", info); + match code { + // Resource (`[asd](fgh)`)? + Code::Char('(') => tokenizer.attempt(resource, move |is_ok| { + Box::new(move |t, c| { + // Also fine if `defined`, as then it’s a valid shortcut. + if is_ok || defined { + ok(t, c, info) + } else { + nok(t, c, info.label_start_index) + } + }) + })(tokenizer, code), + // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? + Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| { + Box::new(move |t, c| { + if is_ok { + ok(t, c, info) + } else if defined { + reference_not_full(t, c, info) + } else { + nok(t, c, info.label_start_index) + } + }) + })(tokenizer, code), + // Shortcut reference: `[asd]`? + _ => { + if defined { + ok(tokenizer, code, info) + } else { + nok(tokenizer, code, info.label_start_index) + } + } + } +} + +/// After `]`, at `[`, but not at a full reference. +/// +/// > 👉 **Note**: we only get here if the label is defined. +/// +/// ```markdown +/// [a]|[] b +/// +/// [a]: z +/// ``` +fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.attempt(collapsed_reference, move |is_ok| { + Box::new(move |t, c| { + if is_ok { + ok(t, c, info) + } else { + nok(t, c, info.label_start_index) + } + }) + })(tokenizer, code) +} + +/// Done, we found something. +/// +/// ```markdown +/// [a](b)| c +/// [a][b]| c +/// [a][]| b +/// [a]| b +/// +/// [a]: z +/// ``` +fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { + println!( + "ok res, ref full, ref, collapsed, or ref shortcut: {:?}", + info.media + ); + // Remove this one and everything after it. + let mut left: Vec = tokenizer + .label_start_stack + .drain(info.label_start_index..) + .collect(); + // Remove this one from `left`, as we’ll move it to `media_list`. + left.remove(0); + tokenizer.label_start_list_loose.append(&mut left); + + let is_link = tokenizer.events[info.media.start.0].token_type == TokenType::LabelLink; + + if is_link { + let mut index = 0; + while index < tokenizer.label_start_stack.len() { + let label_start = &mut tokenizer.label_start_stack[index]; + if tokenizer.events[label_start.start.0].token_type == TokenType::LabelLink { + label_start.inactive = true; + } + index += 1; + } + } + + info.media.end.1 = tokenizer.events.len() - 1; + tokenizer.media_list.push(info.media); + tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + (State::Ok, Some(vec![code])) +} + +/// Done, it’s nothing. +/// +/// There was an okay opening, but we didn’t match anything. +/// +/// ```markdown +/// [a]|(b c +/// [a]|[b c +/// [b]|[ c +/// [b]| c +/// +/// [a]: z +/// ``` +fn nok(tokenizer: &mut Tokenizer, _code: Code, label_start_index: usize) -> StateFnResult { + let label_start = tokenizer + .label_start_stack + .get_mut(label_start_index) + .unwrap(); + println!("just balanced braces: {:?}", label_start); + label_start.balanced = true; + // To do: pop things off the list? + (State::Nok, None) +} + +/// Before a resource, at `(`. +/// +/// ```markdown +/// [a]|(b) c +/// ``` +fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('(') => { + tokenizer.enter(TokenType::Resource); + tokenizer.enter(TokenType::ResourceMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::ResourceMarker); + (State::Fn(Box::new(resource_start)), None) + } + _ => unreachable!("expected `(`"), + } +} + +/// At the start of a resource, after `(`, before a definition. +/// +/// ```markdown +/// [a](|b) c +/// ``` +fn resource_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_open)(tokenizer, code) +} + +/// At the start of a resource, after optional whitespace. +/// +/// ```markdown +/// [a](|b) c +/// ``` +fn resource_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char(')') => resource_end(tokenizer, code), + _ => tokenizer.go( + |t, c| { + destination( + t, + c, + DestinationOptions { + limit: LINK_RESOURCE_DESTINATION_BALANCE_MAX, + destination: TokenType::ResourceDestination, + literal: TokenType::ResourceDestinationLiteral, + marker: TokenType::ResourceDestinationLiteralMarker, + raw: TokenType::ResourceDestinationRaw, + string: TokenType::ResourceDestinationString, + }, + ) + }, + destination_after, + )(tokenizer, code), + } +} + +/// In a resource, after a destination, before optional whitespace. +/// +/// ```markdown +/// [a](b|) c +/// [a](b| "c") d +/// ``` +fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt(space_or_tab_one_line_ending(), |ok| { + Box::new(if ok { resource_between } else { resource_end }) + })(tokenizer, code) +} + +/// In a resource, after a destination, after whitespace. +/// +/// ```markdown +/// [a](b |) c +/// [a](b |"c") d +/// ``` +fn resource_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('"' | '\'' | '(') => tokenizer.go( + |t, c| { + title( + t, + c, + TitleOptions { + title: TokenType::ResourceTitle, + marker: TokenType::ResourceTitleMarker, + string: TokenType::ResourceTitleString, + }, + ) + }, + title_after, + )(tokenizer, code), + _ => resource_end(tokenizer, code), + } +} + +/// In a resource, after a title. +/// +/// ```markdown +/// [a](b "c"|) d +/// ``` +fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_end)(tokenizer, code) +} + +/// In a resource, at the `)`. +/// +/// ```markdown +/// [a](b|) c +/// [a](b |) c +/// [a](b "c"|) d +/// ``` +fn resource_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char(')') => { + tokenizer.enter(TokenType::ResourceMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::ResourceMarker); + tokenizer.exit(TokenType::Resource); + (State::Ok, None) + } + _ => (State::Nok, None), + } +} + +/// In a reference (full), at the `[`. +/// +/// ```markdown +/// [a]|[b] +/// ``` +fn full_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => tokenizer.go( + |t, c| { + label( + t, + c, + LabelOptions { + label: TokenType::Reference, + marker: TokenType::ReferenceMarker, + string: TokenType::ReferenceString, + }, + ) + }, + full_reference_after, + )(tokenizer, code), + _ => unreachable!("expected `[`"), + } +} + +/// In a reference (full), after `]`. +/// +/// ```markdown +/// [a][b]| +/// ``` +fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let events = &tokenizer.events; + let mut index = events.len() - 1; + let mut start: Option = None; + let mut end: Option = None; + + while index > 0 { + index -= 1; + let event = &events[index]; + if event.token_type == TokenType::ReferenceString { + if event.event_type == EventType::Exit { + end = Some(event.index); + } else { + start = Some(event.index); + break; + } + } + } + + // Always found, otherwise we don’t get here. + let start = start.unwrap(); + let end = end.unwrap(); + + let id = normalize_identifier(&serialize( + &tokenizer.parse_state.codes, + &Span { + start_index: start, + end_index: end, + }, + false, + )); + println!("to do: is `{:?}` defined?", id); + let defined = false; + + if defined { + (State::Ok, Some(vec![code])) + } else { + (State::Nok, None) + } +} + +/// In a reference (collapsed), at the `[`. +/// +/// > 👉 **Note**: we only get here if the label is defined. +/// +/// ```markdown +/// [a]|[] +/// ``` +fn collapsed_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => { + tokenizer.enter(TokenType::Reference); + tokenizer.enter(TokenType::ReferenceMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::ReferenceMarker); + (State::Fn(Box::new(collapsed_reference_open)), None) + } + _ => (State::Nok, None), + } +} + +/// In a reference (collapsed), at the `]`. +/// +/// > 👉 **Note**: we only get here if the label is defined. +/// +/// ```markdown +/// [a][|] +/// ``` +fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char(']') => { + tokenizer.enter(TokenType::ReferenceMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::ReferenceMarker); + tokenizer.exit(TokenType::Reference); + (State::Ok, None) + } + _ => (State::Nok, None), + } +} + +pub fn add( + map: &mut HashMap)>, + index: usize, + mut remove: usize, + mut add: Vec, +) { + let curr = map.remove(&index); + + if let Some((curr_rm, mut curr_add)) = curr { + remove += curr_rm; + curr_add.append(&mut add); + add = curr_add; + } + + map.insert(index, (remove, add)); +} diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs new file mode 100644 index 0000000..2e96977 --- /dev/null +++ b/src/construct/label_start_image.rs @@ -0,0 +1,47 @@ +//! To do + +use super::label_end::resolve_media; +use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of label (image) start. +/// +/// ```markdown +/// a |![ b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('!') => { + tokenizer.enter(TokenType::LabelImage); + tokenizer.enter(TokenType::LabelImageMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::LabelImageMarker); + (State::Fn(Box::new(open)), None) + } + _ => (State::Nok, None), + } +} + +/// After `!`, before a `[`. +/// +/// ```markdown +/// a !|[ b +/// ``` +pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => { + tokenizer.enter(TokenType::LabelMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::LabelMarker); + tokenizer.exit(TokenType::LabelImage); + let end = tokenizer.events.len() - 1; + tokenizer.label_start_stack.push(LabelStart { + start: (end - 5, end), + balanced: false, + inactive: false, + }); + tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + (State::Ok, None) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs new file mode 100644 index 0000000..35c9dcd --- /dev/null +++ b/src/construct/label_start_link.rs @@ -0,0 +1,30 @@ +//! To do + +use super::label_end::resolve_media; +use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of label (link) start. +/// +/// ```markdown +/// a |[ b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => { + let start = tokenizer.events.len(); + tokenizer.enter(TokenType::LabelLink); + tokenizer.enter(TokenType::LabelMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::LabelMarker); + tokenizer.exit(TokenType::LabelLink); + tokenizer.label_start_stack.push(LabelStart { + start: (start, tokenizer.events.len() - 1), + balanced: false, + inactive: false, + }); + tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + (State::Ok, None) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 9e5da0e..8565b2f 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -30,9 +30,9 @@ //! * [heading (setext)][heading_setext] //! * [html (flow)][html_flow] //! * [html (text)][html_text] -//! * label end -//! * label start (image) -//! * label start (link) +//! * [label end][label_end] +//! * [label start (image)][label_start_image] +//! * [label start (link)][label_start_link] //! * list //! * [paragraph][] //! * [thematic break][thematic_break] @@ -59,8 +59,6 @@ //! They also contain references to character as defined by [char][], so for //! example `ascii_punctuation` refers to //! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. -//! -//! pub mod autolink; pub mod blank_line; @@ -76,6 +74,9 @@ pub mod heading_atx; pub mod heading_setext; pub mod html_flow; pub mod html_text; +pub mod label_end; +pub mod label_start_image; +pub mod label_start_link; pub mod paragraph; pub mod partial_data; pub mod partial_destination; diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 03dcbee..7887a44 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -267,11 +267,10 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { /// ```markdown /// a\|)b /// ``` -fn raw_escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { +fn raw_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('(' | ')' | '\\') => { tokenizer.consume(code); - info.balance += 1; (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) } _ => raw(tokenizer, code, info), diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 024a4b2..43bdc53 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -35,6 +35,45 @@ pub fn space_or_tab() -> Box { space_or_tab_min_max(1, usize::MAX) } +pub fn space_or_tab_one_line_ending() -> Box { + Box::new(|tokenizer, code| { + tokenizer.attempt(space_or_tab(), move |ok| { + Box::new(move |tokenizer, code| match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(tokenizer.attempt_opt( + space_or_tab(), + move |_t, code| { + if !matches!( + code, + Code::None + | Code::CarriageReturnLineFeed + | Code::Char('\r' | '\n') + ) { + (State::Ok, Some(vec![code])) + } else { + (State::Nok, None) + } + }, + ))), + None, + ) + } + _ => { + if ok { + (State::Ok, Some(vec![code])) + } else { + (State::Nok, None) + } + } + }) + })(tokenizer, code) + }) +} + /// Between `x` and `y` `space_or_tab` /// /// ```bnf diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 3e61788..78ae311 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -32,7 +32,7 @@ //! use crate::construct::partial_space_or_tab::space_or_tab; -use crate::subtokenize::link; +use crate::subtokenize::link_to; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Configuration. @@ -109,7 +109,7 @@ impl Kind { #[derive(Debug)] struct Info { /// Whether we’ve seen our first `ChunkString`. - connect: bool, + connect_index: Option, /// Kind of title. kind: Kind, /// Configuration. @@ -125,9 +125,9 @@ struct Info { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult { match code { - Code::Char(char) if char == '(' || char == '"' || char == '\'' => { + Code::Char(char) if char == '"' || char == '\'' || char == '(' => { let info = Info { - connect: false, + connect_index: None, kind: Kind::from_char(char), options, }; @@ -184,11 +184,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes _ => { tokenizer.enter(TokenType::ChunkString); - if info.connect { + if let Some(connect_index) = info.connect_index { let index = tokenizer.events.len() - 1; - link(&mut tokenizer.events, index); + link_to(&mut tokenizer.events, connect_index, index); } else { - info.connect = true; + info.connect_index = Some(tokenizer.events.len() - 1); } title(tokenizer, code, info) -- cgit