diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 18:06:47 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 18:06:47 +0200 |
commit | d1063268e62633a334bb9464f7cba2385da5812d (patch) | |
tree | 76035cdf260046cd2afe1841c0351b311ea66d99 /src/construct | |
parent | 56ff5c73c7ec19b349e7d60d04ce1057c006d6ec (diff) | |
download | markdown-rs-d1063268e62633a334bb9464f7cba2385da5812d.tar.gz markdown-rs-d1063268e62633a334bb9464f7cba2385da5812d.tar.bz2 markdown-rs-d1063268e62633a334bb9464f7cba2385da5812d.zip |
Add support for passing token types to destination, label, title
Diffstat (limited to '')
-rw-r--r-- | src/construct/definition.rs | 53 | ||||
-rw-r--r-- | src/construct/partial_destination.rs | 135 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 44 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 102 |
4 files changed, 225 insertions, 109 deletions
diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 61c4d34..69d8d9b 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -57,8 +57,10 @@ //! <!-- To do: describe how references and definitions match --> use crate::construct::{ - partial_destination::start as destination, partial_label::start as label, - partial_space_or_tab::space_or_tab_opt, partial_title::start as title, + partial_destination::{start as destination, Options as DestinationOptions}, + partial_label::{start as label, Options as LabelOptions}, + partial_space_or_tab::space_or_tab_opt, + partial_title::{start as title, Options as TitleOptions}, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -79,7 +81,20 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('[') => tokenizer.go(label, label_after)(tokenizer, code), + Code::Char('[') => tokenizer.go( + |t, c| { + label( + t, + c, + LabelOptions { + label: TokenType::DefinitionLabel, + marker: TokenType::DefinitionLabelMarker, + string: TokenType::DefinitionLabelString, + }, + ) + }, + label_after, + )(tokenizer, code), _ => (State::Nok, None), } } @@ -152,7 +167,22 @@ fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { code, Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') ) { - tokenizer.go(destination, destination_after)(tokenizer, code) + tokenizer.go( + |t, c| { + destination( + t, + c, + DestinationOptions { + destination: TokenType::DefinitionDestination, + literal: TokenType::DefinitionDestinationLiteral, + marker: TokenType::DefinitionDestinationLiteralMarker, + raw: TokenType::DefinitionDestinationRaw, + string: TokenType::DefinitionDestinationString, + }, + ) + }, + destination_after, + )(tokenizer, code) } else { (State::Nok, None) } @@ -243,7 +273,20 @@ fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let event = tokenizer.events.last().unwrap(); if event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace { - tokenizer.go(title, title_after)(tokenizer, code) + tokenizer.go( + |t, c| { + title( + t, + c, + TitleOptions { + title: TokenType::DefinitionTitle, + marker: TokenType::DefinitionTitleMarker, + string: TokenType::DefinitionTitleString, + }, + ) + }, + title_after, + )(tokenizer, code) } else { (State::Nok, None) } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 82e83fe..823ce99 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -40,36 +40,68 @@ //! //! <!-- To do: link label end. --> -// To do: pass token types in. - use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +/// Configuration. +/// +/// You must pass the token types in that are used. +#[derive(Debug)] +pub struct Options { + /// Token for the whole destination. + pub destination: TokenType, + /// Token for a literal (enclosed) destination. + pub literal: TokenType, + /// Token for a literal marker. + pub marker: TokenType, + /// Token for a raw destination. + pub raw: TokenType, + /// Token for a the string. + pub string: TokenType, +} + +/// State needed to parse destination. +#[derive(Debug)] +struct Info { + /// Paren balance (used in raw). + balance: usize, + /// Configuration. + options: Options, +} + /// Before a destination. /// /// ```markdown /// |<ab> /// |ab /// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult { + let info = Info { + balance: 0, + options, + }; + match code { Code::Char('<') => { - tokenizer.enter(TokenType::DefinitionDestination); - tokenizer.enter(TokenType::DefinitionDestinationLiteral); - tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker); + tokenizer.enter(info.options.destination.clone()); + tokenizer.enter(info.options.literal.clone()); + tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionDestinationLiteralMarker); - (State::Fn(Box::new(enclosed_before)), None) + tokenizer.exit(info.options.marker.clone()); + ( + State::Fn(Box::new(|t, c| enclosed_before(t, c, info))), + None, + ) } Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ' | ')') => { (State::Nok, None) } Code::Char(char) if char.is_ascii_control() => (State::Nok, None), Code::Char(_) => { - tokenizer.enter(TokenType::DefinitionDestination); - tokenizer.enter(TokenType::DefinitionDestinationRaw); - tokenizer.enter(TokenType::DefinitionDestinationString); + tokenizer.enter(info.options.destination.clone()); + tokenizer.enter(info.options.raw.clone()); + tokenizer.enter(info.options.string.clone()); tokenizer.enter(TokenType::ChunkString); - raw(tokenizer, code, 0) + raw(tokenizer, code, info) } } } @@ -79,18 +111,18 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ```markdown /// <|ab> /// ``` -fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { if let Code::Char('>') = code { - tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker); + tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionDestinationLiteralMarker); - tokenizer.exit(TokenType::DefinitionDestinationLiteral); - tokenizer.exit(TokenType::DefinitionDestination); + tokenizer.exit(info.options.marker.clone()); + tokenizer.exit(info.options.literal.clone()); + tokenizer.exit(info.options.destination); (State::Ok, None) } else { - tokenizer.enter(TokenType::DefinitionDestinationString); + tokenizer.enter(info.options.string.clone()); tokenizer.enter(TokenType::ChunkString); - enclosed(tokenizer, code) + enclosed(tokenizer, code, info) } } @@ -99,23 +131,26 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ```markdown /// <u|rl> /// ``` -fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn enclosed(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('>') => { tokenizer.exit(TokenType::ChunkString); - tokenizer.exit(TokenType::DefinitionDestinationString); - enclosed_before(tokenizer, code) + tokenizer.exit(info.options.string.clone()); + enclosed_before(tokenizer, code, info) } Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '<') => { (State::Nok, None) } Code::Char('\\') => { tokenizer.consume(code); - (State::Fn(Box::new(enclosed_escape)), None) + ( + State::Fn(Box::new(|t, c| enclosed_escape(t, c, info))), + None, + ) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(enclosed)), None) + (State::Fn(Box::new(|t, c| enclosed(t, c, info))), None) } } } @@ -125,13 +160,13 @@ fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ```markdown /// <a\|>b> /// ``` -fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('<' | '>' | '\\') => { tokenizer.consume(code); - (State::Fn(Box::new(enclosed)), None) + (State::Fn(Box::new(|t, c| enclosed(t, c, info))), None) } - _ => enclosed(tokenizer, code), + _ => enclosed(tokenizer, code, info), } } @@ -140,48 +175,44 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ```markdown /// a|b /// ``` -fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { +fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { // To do: configurable. let limit = usize::MAX; match code { Code::Char('(') => { - if balance >= limit { + if info.balance >= limit { (State::Nok, None) } else { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| raw(t, c, balance + 1))), - None, - ) + info.balance += 1; + (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) } } Code::Char(')') => { - if balance == 0 { + if info.balance == 0 { tokenizer.exit(TokenType::ChunkString); - tokenizer.exit(TokenType::DefinitionDestinationString); - tokenizer.exit(TokenType::DefinitionDestinationRaw); - tokenizer.exit(TokenType::DefinitionDestination); + tokenizer.exit(info.options.string.clone()); + tokenizer.exit(info.options.raw.clone()); + tokenizer.exit(info.options.destination); (State::Ok, Some(vec![code])) } else { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| raw(t, c, balance - 1))), - None, - ) + info.balance -= 1; + (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) } } Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\r' | '\n' | ' ') => { - if balance > 0 { + if info.balance > 0 { (State::Nok, None) } else { tokenizer.exit(TokenType::ChunkString); - tokenizer.exit(TokenType::DefinitionDestinationString); - tokenizer.exit(TokenType::DefinitionDestinationRaw); - tokenizer.exit(TokenType::DefinitionDestination); + tokenizer.exit(info.options.string.clone()); + tokenizer.exit(info.options.raw.clone()); + tokenizer.exit(info.options.destination); (State::Ok, Some(vec![code])) } } @@ -189,13 +220,13 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { Code::Char('\\') => { tokenizer.consume(code); ( - State::Fn(Box::new(move |t, c| raw_escape(t, c, balance))), + State::Fn(Box::new(move |t, c| raw_escape(t, c, info))), None, ) } Code::Char(_) => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| raw(t, c, balance))), None) + (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) } } } @@ -205,15 +236,13 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { /// ```markdown /// a\|)b /// ``` -fn raw_escape(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { +fn raw_escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char('(' | ')' | '\\') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| raw(t, c, balance + 1))), - None, - ) + info.balance += 1; + (State::Fn(Box::new(move |t, c| raw(t, c, info))), None) } - _ => raw(tokenizer, code, balance), + _ => raw(tokenizer, code, info), } } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 194165c..c831eaf 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -60,6 +60,19 @@ use crate::construct::partial_space_or_tab::space_or_tab_opt; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::link::link; +/// Configuration. +/// +/// You must pass the token types in that are used. +#[derive(Debug)] +pub struct Options { + /// Token for the whole label. + pub label: TokenType, + /// Token for the markers. + pub marker: TokenType, + /// Token for the string (inside the markers). + pub string: TokenType, +} + /// State needed to parse labels. #[derive(Debug)] struct Info { @@ -69,6 +82,8 @@ struct Info { data: bool, /// Number of characters in the label. size: usize, + /// Configuration. + options: Options, } /// Before a label. @@ -76,19 +91,20 @@ struct Info { /// ```markdown /// |[a] /// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult { match code { Code::Char('[') => { - tokenizer.enter(TokenType::DefinitionLabel); - tokenizer.enter(TokenType::DefinitionLabelMarker); - tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionLabelMarker); - tokenizer.enter(TokenType::DefinitionLabelData); let info = Info { connect: false, data: false, size: 0, + options, }; + tokenizer.enter(info.options.label.clone()); + tokenizer.enter(info.options.marker.clone()); + tokenizer.consume(code); + tokenizer.exit(info.options.marker.clone()); + tokenizer.enter(info.options.string.clone()); (State::Fn(Box::new(|t, c| at_break(t, c, info))), None) } _ => (State::Nok, None), @@ -101,17 +117,17 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// [|a] /// [a|] /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::None | Code::Char('[') => (State::Nok, None), Code::Char(']') if !info.data => (State::Nok, None), _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), Code::Char(']') => { - tokenizer.exit(TokenType::DefinitionLabelData); - tokenizer.enter(TokenType::DefinitionLabelMarker); + tokenizer.exit(info.options.string.clone()); + tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionLabelMarker); - tokenizer.exit(TokenType::DefinitionLabel); + tokenizer.exit(info.options.marker.clone()); + tokenizer.exit(info.options.label); (State::Ok, None) } _ => { @@ -120,6 +136,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult if info.connect { let index = tokenizer.events.len() - 1; link(&mut tokenizer.events, index); + } else { + info.connect = true; } label(tokenizer, code, info) @@ -157,10 +175,6 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul /// [a|b] /// ``` fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { - if !info.connect { - info.connect = true; - } - match code { Code::None | Code::Char('[' | ']') => { tokenizer.exit(TokenType::ChunkString); diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index aa1e067..d02ce60 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -31,12 +31,23 @@ //! //! <!-- To do: link label end. --> -// To do: pass token types in. - use crate::construct::partial_space_or_tab::space_or_tab_opt; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::link::link; +/// Configuration. +/// +/// You must pass the token types in that are used. +#[derive(Debug)] +pub struct Options { + /// Token for the whole title. + pub title: TokenType, + /// Token for the marker. + pub marker: TokenType, + /// Token for the string inside the quotes. + pub string: TokenType, +} + /// Type of title. #[derive(Debug, PartialEq)] enum Kind { @@ -94,6 +105,17 @@ impl Kind { } } +/// State needed to parse titles. +#[derive(Debug)] +struct Info { + /// Whether we’ve seen our first `ChunkString`. + connect: bool, + /// Kind of title. + kind: Kind, + /// Configuration. + options: Options, +} + /// Before a title. /// /// ```markdown @@ -101,15 +123,19 @@ impl Kind { /// |'a' /// |(a) /// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult { match code { Code::Char(char) if char == '(' || char == '"' || char == '\'' => { - let kind = Kind::from_char(char); - tokenizer.enter(TokenType::DefinitionTitle); - tokenizer.enter(TokenType::DefinitionTitleMarker); + let info = Info { + connect: false, + kind: Kind::from_char(char), + options, + }; + tokenizer.enter(info.options.title.clone()); + tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionTitleMarker); - (State::Fn(Box::new(|t, c| begin(t, c, kind))), None) + tokenizer.exit(info.options.marker.clone()); + (State::Fn(Box::new(|t, c| begin(t, c, info))), None) } _ => (State::Nok, None), } @@ -124,18 +150,18 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// '|a' /// (|a) /// ``` -fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { - Code::Char(char) if char == kind.as_char() => { - tokenizer.enter(TokenType::DefinitionTitleMarker); + Code::Char(char) if char == info.kind.as_char() => { + tokenizer.enter(info.options.marker.clone()); tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionTitleMarker); - tokenizer.exit(TokenType::DefinitionTitle); + tokenizer.exit(info.options.marker.clone()); + tokenizer.exit(info.options.title); (State::Ok, None) } _ => { - tokenizer.enter(TokenType::DefinitionTitleString); - at_break(tokenizer, code, kind, false) + tokenizer.enter(info.options.string.clone()); + at_break(tokenizer, code, info) } } } @@ -148,20 +174,24 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { /// (a| /// b) /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::Char(char) if char == kind.as_char() => { - tokenizer.exit(TokenType::DefinitionTitleString); - begin(tokenizer, code, kind) + Code::Char(char) if char == info.kind.as_char() => { + tokenizer.exit(info.options.string.clone()); + begin(tokenizer, code, info) } Code::None => (State::Nok, None), _ => { tokenizer.enter(TokenType::ChunkString); - if connect { + + if info.connect { let index = tokenizer.events.len() - 1; link(&mut tokenizer.events, index); + } else { + info.connect = true; } - title(tokenizer, code, kind) + + title(tokenizer, code, info) } } } @@ -172,8 +202,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> /// "a /// |b" /// ``` -fn line_start(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, kind))(tokenizer, code) +fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code) } /// After a line ending, after optional whitespace. @@ -182,11 +212,11 @@ fn line_start(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul /// "a /// |b" /// ``` -fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { // Blank line not allowed. Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), - _ => at_break(tokenizer, code, kind, true), + _ => at_break(tokenizer, code, info), } } @@ -195,28 +225,28 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul /// ```markdown /// "a|b" /// ``` -fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn title(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { - Code::Char(char) if char == kind.as_char() => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.exit(TokenType::ChunkString); - at_break(tokenizer, code, kind, true) + at_break(tokenizer, code, info) } Code::None => { tokenizer.exit(TokenType::ChunkString); - at_break(tokenizer, code, kind, true) + at_break(tokenizer, code, info) } Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { tokenizer.consume(code); tokenizer.exit(TokenType::ChunkString); - (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None) + (State::Fn(Box::new(|t, c| line_start(t, c, info))), None) } Code::Char('\\') => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| escape(t, c, kind))), None) + (State::Fn(Box::new(|t, c| escape(t, c, info))), None) } _ => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| title(t, c, kind))), None) + (State::Fn(Box::new(|t, c| title(t, c, info))), None) } } } @@ -226,12 +256,12 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { /// ```markdown /// "a\|"b" /// ``` -fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { - Code::Char(char) if char == kind.as_char() => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); - (State::Fn(Box::new(|t, c| title(t, c, kind))), None) + (State::Fn(Box::new(|t, c| title(t, c, info))), None) } - _ => title(tokenizer, code, kind), + _ => title(tokenizer, code, info), } } |