diff options
Diffstat (limited to 'src/construct')
-rw-r--r-- | src/construct/block_quote.rs | 5 | ||||
-rw-r--r-- | src/construct/list.rs | 358 |
2 files changed, 200 insertions, 163 deletions
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 483516c..8e8c8b0 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -114,8 +114,3 @@ fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } } - -/// End of a block quote. -pub fn end() -> Vec<Token> { - vec![Token::BlockQuote] -} diff --git a/src/construct/list.rs b/src/construct/list.rs index 2fe46b0..2655c20 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -1,4 +1,48 @@ -//! To do. +//! List is a construct that occurs in the [document][] content type. +//! +//! It forms with, roughly, the following BNF: +//! +//! ```bnf +//! ; Restriction: there must be `eol | space_or_tab` after the start. +//! ; Restriction: if the first line after the marker is not blank and starts with `5( space_or_tab )`, +//! ; only the first `space_or_tab` is part of the start. +//! list_item_start ::= '*' | '+' | '-' | 1*9( ascii_decimal ) ( '.' | ')' ) [ 1*4 space_or_tab ] +//! ; Restriction: blank line allowed, except when this is the first continuation after a blank start. +//! ; Restriction: if not blank, the line must be indented, exactly `n` times. +//! list_item_cont ::= [ n( space_or_tab ) ] +//! ``` +//! +//! Further lines that are not prefixed with `list_item_cont` cause the item +//! to be exited, except when those lines are lazy continuation. +//! Like so many things in markdown, list (items) too, are very complex. +//! See [*§ Phase 1: block structure*][commonmark-block] for more on parsing +//! details. +//! +//! Lists relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML. +//! See [*§ 4.4.8 The `li` element*][html-li], +//! [*§ 4.4.5 The `ol` element*][html-ol], and +//! [*§ 4.4.7 The `ul` element*][html-ul] in the HTML spec for more info. +//! +//! ## Tokens +//! +//! * [`ListItem`][Token::ListItem] +//! * [`ListItemMarker`][Token::ListItemMarker] +//! * [`ListItemPrefix`][Token::ListItemPrefix] +//! * [`ListItemValue`][Token::ListItemValue] +//! * [`ListOrdered`][Token::ListOrdered] +//! * [`ListUnordered`][Token::ListUnordered] +//! +//! ## References +//! +//! * [`list.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/list.js) +//! * [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.30/#list-items) +//! * [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.30/#lists) +//! +//! [document]: crate::content::document +//! [html-li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element +//! [html-ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element +//! [html-ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element +//! [commonmark-block]: https://spec.commonmark.org/0.30/#phase-1-block-structure use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; use crate::construct::{ @@ -59,16 +103,6 @@ enum Kind { } impl Kind { - // /// Turn the kind into a [char]. - // fn as_char(&self) -> char { - // match self { - // Kind::Dot => '.', - // Kind::Paren => ')', - // Kind::Asterisk => '*', - // Kind::Plus => '+', - // Kind::Dash => '-', - // } - // } /// Turn a [char] into a kind. /// /// ## Panics @@ -97,14 +131,24 @@ impl Kind { } } -/// To do. +/// Start of list item. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ListItem); // To do: allow arbitrary when code (indented) is turned off. tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) } -/// To do. +/// Start of list item, after whitespace. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { // Unordered. @@ -121,13 +165,25 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// Start of an unordered list item. +/// +/// The line is not a thematic break. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` fn before_unordered(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(Token::ListItemPrefix); marker(tokenizer, code) } -/// To do. +/// In an ordered list item value. +/// +/// ```markdown +/// > | 1. a +/// ^ +/// ``` fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult { size += 1; match code { @@ -135,7 +191,6 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResu tokenizer.consume(code); (State::Fn(Box::new(move |t, c| inside(t, c, size))), None) } - // To do: `(!self.interrupt || size < 2)` Code::Char('.' | ')') => { tokenizer.exit(Token::ListItemValue); marker(tokenizer, code) @@ -144,7 +199,14 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResu } } -/// To do. +/// At a list item marker. +/// +/// ```markdown +/// > | * a +/// ^ +/// > | 1. b +/// ^ +/// ``` fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let kind = Kind::from_code(code); println!("list item kind: {:?}", kind); @@ -154,172 +216,179 @@ fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Fn(Box::new(marker_after)), None) } -/// To do. +/// After a list item marker. +/// +/// ```markdown +/// > | * a +/// ^ +/// > | 1. b +/// ^ +/// ``` fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let interrupt = tokenizer.interrupt; - tokenizer.check(blank_line, move |ok| { - let func = if ok { - if interrupt { - nok - } else { - on_blank - } + if ok { + Box::new(|t, c| after(t, c, true)) } else { - marker_after_after - }; - Box::new(func) + Box::new(marker_after_not_blank) + } })(tokenizer, code) } -/// To do. -fn on_blank(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - if let Some(container) = tokenizer.container.as_mut() { - container.blank_initial = true; - } - - // self.containerState.initialBlankLine = true - prefix_end(tokenizer, code, true) -} - -/// To do. -fn marker_after_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let interrupt = tokenizer.interrupt; - tokenizer.attempt(list_item_prefix_whitespace, move |ok| { - println!("marker:after:after: {:?} {:?}", ok, interrupt); +/// After a list item marker, not followed by a blank line. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` +fn marker_after_not_blank(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace. + tokenizer.attempt(whitespace, move |ok| { if ok { - Box::new(|t, c| prefix_end(t, c, false)) + Box::new(|t, c| after(t, c, false)) } else { Box::new(prefix_other) } })(tokenizer, code) } -// To do: `on_blank`. +/// In whitespace after a marker. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` +fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.go(space_or_tab_min_max(1, TAB_SIZE), whitespace_after)(tokenizer, code) +} + +/// After acceptable whitespace. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` +fn whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { + (State::Nok, None) + } else { + (State::Ok, Some(vec![code])) + } +} -/// To do. +/// After a list item marker, followed by no indent or more indent that needed. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.enter(Token::SpaceOrTab); tokenizer.consume(code); tokenizer.exit(Token::SpaceOrTab); - (State::Fn(Box::new(|t, c| prefix_end(t, c, false))), None) + (State::Fn(Box::new(|t, c| after(t, c, false))), None) } _ => (State::Nok, None), } } -/// To do. -fn prefix_end(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { - let start = skip::to_back( - &tokenizer.events, - tokenizer.events.len() - 1, - &[Token::ListItem], - ); - let prefix = tokenizer.index - tokenizer.events[start].index + (if blank { 1 } else { 0 }); +/// After a list item prefix. +/// +/// ```markdown +/// > | * a +/// ^ +/// ``` +fn after(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { + if blank && tokenizer.interrupt { + (State::Nok, None) + } else { + let start = skip::to_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Token::ListItem], + ); + let prefix = tokenizer.index - tokenizer.events[start].index + (if blank { 1 } else { 0 }); - if let Some(container) = tokenizer.container.as_mut() { + let container = tokenizer.container.as_mut().unwrap(); + container.blank_initial = blank; container.size = prefix; - } - tokenizer.exit(Token::ListItemPrefix); - tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve)); - (State::Ok, Some(vec![code])) -} - -/// To do. -fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: check how big this should be? - tokenizer.go( - space_or_tab_min_max(1, TAB_SIZE), - list_item_prefix_whitespace_after, - )(tokenizer, code) -} - -fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { - (State::Nok, None) - } else { + tokenizer.exit(Token::ListItemPrefix); + tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve)); (State::Ok, Some(vec![code])) } } -/// To do. -fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { - (State::Nok, None) -} - -/// To do. +/// Start of list item continuation. +/// +/// ```markdown +/// | * a +/// > | b +/// ^ +/// ``` pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.check(blank_line, |ok| { - println!("cont:check:blank:after: {:?}", ok); Box::new(if ok { blank_cont } else { not_blank_cont }) })(tokenizer, code) } +/// Start of blank list item continuation. +/// +/// ```markdown +/// | * a +/// > | +/// ^ +/// | b +/// ``` pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let mut size = 0; - if let Some(container) = tokenizer.container.as_ref() { - size = container.size; + let container = tokenizer.container.as_ref().unwrap(); + let size = container.size; - if container.blank_initial { - return (State::Nok, None); - } + if container.blank_initial { + (State::Nok, None) + } else { + // Consume, optionally, at most `size`. + tokenizer.go(space_or_tab_min_max(0, size), ok)(tokenizer, code) } - - // We have a blank line. - // Still, try to consume at most the items size. - tokenizer.go(space_or_tab_min_max(0, size), cont_after)(tokenizer, code) } +/// Start of non-blank list item continuation. +/// +/// ```markdown +/// | * a +/// > | b +/// ^ +/// ``` pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let mut size = 0; + let container = tokenizer.container.as_mut().unwrap(); + let size = container.size; - if let Some(container) = tokenizer.container.as_mut() { - container.blank_initial = false; - size = container.size; - } + container.blank_initial = false; - tokenizer.go(space_or_tab_min_max(size, size), cont_after)(tokenizer, code) + // Consume exactly `size`. + tokenizer.go(space_or_tab_min_max(size, size), ok)(tokenizer, code) } -pub fn cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - println!("cont: blank: after"); +/// A state fn to yield [`State::Ok`]. +pub fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Ok, Some(vec![code])) } -/// To do. -pub fn end() -> Vec<Token> { - vec![Token::ListItem] +/// A state fn to yield [`State::Nok`]. +fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult { + (State::Nok, None) } -/// To do. +/// Find adjacent list items with the same marker. pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { let mut edit_map = EditMap::new(); - - let mut index = 0; - println!("list item:before: {:?}", tokenizer.events.len()); - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - println!( - "ev: {:?} {:?} {:?} {:?} {:?} {:?}", - index, - event.event_type, - event.token_type, - event.content_type, - event.previous, - event.next - ); - index += 1; - } - let mut index = 0; let mut balance = 0; let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![]; let mut lists: Vec<(Kind, usize, usize, usize)> = vec![]; - // To do: track balance? Or, check what’s between them? + // Merge list items. while index < tokenizer.events.len() { let event = &tokenizer.events[index]; @@ -353,7 +422,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { ], ) == current.2 { - println!("prev:match {:?} {:?}", previous, current); let previous_mut = &mut lists_wip[list_index]; previous_mut.3 = current.3; let mut remainder = lists_wip.drain((list_index + 1)..).collect::<Vec<_>>(); @@ -362,21 +430,16 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { break; } - println!( - "todo: move them over to `lists` at some point? {:?}", - previous - ); + // To do: move items that could never match anymore over to `lists`, + // This currently keeps on growing and growing! } if !matched { - println!("prev:!match {:?} {:?}", lists_wip, current); lists_wip.push(current); } - println!("enter: {:?}", event.token_type); balance += 1; } else { - println!("exit: {:?}", event.token_type); balance -= 1; } } @@ -386,19 +449,18 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { lists.append(&mut lists_wip); + // Inject events. let mut index = 0; while index < lists.len() { let list_item = &lists[index]; let mut list_start = tokenizer.events[list_item.2].clone(); - let token_type = if matches!(list_item.0, Kind::Paren | Kind::Dot) { - Token::ListOrdered - } else { - Token::ListUnordered + let mut list_end = tokenizer.events[list_item.3].clone(); + let token_type = match list_item.0 { + Kind::Paren | Kind::Dot => Token::ListOrdered, + _ => Token::ListUnordered, }; list_start.token_type = token_type.clone(); - let mut list_end = tokenizer.events[list_item.3].clone(); list_end.token_type = token_type; - println!("inject:list: {:?} {:?}", list_start, list_end); edit_map.add(list_item.2, 0, vec![list_start]); edit_map.add(list_item.3 + 1, 0, vec![list_end]); @@ -406,25 +468,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { index += 1; } - println!("list items: {:#?}", lists); - - let events = edit_map.consume(&mut tokenizer.events); - - let mut index = 0; - println!("list item:after: {:?}", events.len()); - while index < events.len() { - let event = &events[index]; - println!( - "ev: {:?} {:?} {:?} {:?} {:?} {:?}", - index, - event.event_type, - event.token_type, - event.content_type, - event.previous, - event.next - ); - index += 1; - } - - events + edit_map.consume(&mut tokenizer.events) } |