diff options
Diffstat (limited to '')
| -rw-r--r-- | src/content/content.rs | 84 | ||||
| -rw-r--r-- | src/content/flow.rs | 45 | ||||
| -rw-r--r-- | src/content/mod.rs | 2 | ||||
| -rw-r--r-- | src/content/string.rs | 42 | 
4 files changed, 118 insertions, 55 deletions
diff --git a/src/content/content.rs b/src/content/content.rs new file mode 100644 index 0000000..7bf692f --- /dev/null +++ b/src/content/content.rs @@ -0,0 +1,84 @@ +//! The `content`, ahum, content type. +//! +//! **Content** is zero or more definitions, and then zero or one paragraph. +//! It’s a weird one, and needed to make certain edge cases around definitions +//! spec compliant. +//! Definitions are unlike other things in markdown, in that they behave like +//! **text** in that they can contain arbitrary line endings, but *have* to end +//! at a line ending. +//! If they end in something else, the whole definition instead is seen as a +//! paragraph. +//! +//! The constructs found in content are: +//! +//! *   Definition +//! *   Paragraph + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Before content. +/// +/// ```markdown +/// |[x]: y +/// |asd +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            unreachable!("expected non-eol/eof"); +        } +        _ => paragraph_initial(tokenizer, code) +        // To do: definition. +        // _ => tokenizer.attempt(definition, |ok| { +        //     Box::new(if ok { +        //         a +        //     } else { +        //         b +        //     }) +        // })(tokenizer, code), +    } +} + +/// Before a paragraph. +/// +/// ```markdown +/// |asd +/// ``` +fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            unreachable!("expected non-eol/eof"); +        } +        _ => { +            tokenizer.enter(TokenType::Paragraph); +            tokenizer.enter(TokenType::ChunkText); +            data(tokenizer, code) +        } +    } +} + +/// In a line in a paragraph. +/// +/// ```markdown +/// |\& +/// |qwe +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::None => { +            tokenizer.exit(TokenType::ChunkText); +            tokenizer.exit(TokenType::Paragraph); +            (State::Ok, None) +        } +        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            tokenizer.consume(code); +            tokenizer.exit(TokenType::ChunkText); +            tokenizer.enter(TokenType::ChunkText); +            (State::Fn(Box::new(data)), None) +        } +        _ => { +            tokenizer.consume(code); +            (State::Fn(Box::new(data)), None) +        } +    } +} diff --git a/src/content/flow.rs b/src/content/flow.rs index 6f94424..0d1bd22 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -31,8 +31,6 @@ use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Toke  use crate::util::get_span;  /// Turn `codes` as the flow content type into events. -// To do: remove this `allow` when all the content types are glued together. -#[allow(dead_code)]  pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {      let mut tokenizer = Tokenizer::new(point, index);      tokenizer.feed(codes, Box::new(start), true); @@ -49,7 +47,7 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {  /// |    bravo  /// |***  /// ``` -fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None),          _ => tokenizer.attempt(blank_line, |ok| { @@ -168,7 +166,7 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          _ => {              tokenizer.enter(TokenType::Content);              tokenizer.enter(TokenType::ContentChunk); -            content(tokenizer, code) +            content(tokenizer, code, tokenizer.events.len() - 1)          }      }  } @@ -178,21 +176,26 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// al|pha  /// ```  // To do: lift limitations as documented above. -fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {      match code { -        Code::None => { -            tokenizer.exit(TokenType::ContentChunk); -            content_end(tokenizer, code) -        } +        Code::None => content_end(tokenizer, code),          Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.exit(TokenType::ContentChunk); -            tokenizer.check(continuation_construct, |ok| { -                Box::new(if ok { content_continue } else { content_end }) +            tokenizer.check(continuation_construct, move |ok| { +                Box::new(move |t, c| { +                    if ok { +                        content_continue(t, c, previous) +                    } else { +                        content_end(t, c) +                    } +                })              })(tokenizer, code)          }          _ => {              tokenizer.consume(code); -            (State::Fn(Box::new(content)), None) +            ( +                State::Fn(Box::new(move |t, c| content(t, c, previous))), +                None, +            )          }      }  } @@ -254,17 +257,21 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->      }  } -fn content_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: should this be part of the content chunk? -    // That’s what `micromark-js` does. -    tokenizer.enter(TokenType::LineEnding); +fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {      tokenizer.consume(code); -    tokenizer.exit(TokenType::LineEnding); +    tokenizer.exit(TokenType::ContentChunk);      tokenizer.enter(TokenType::ContentChunk); -    (State::Fn(Box::new(content)), None) +    let next_index = tokenizer.events.len() - 1; +    tokenizer.events[previous_index].next = Some(next_index); +    tokenizer.events[next_index].previous = Some(previous_index); +    ( +        State::Fn(Box::new(move |t, c| content(t, c, next_index))), +        None, +    )  }  fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    tokenizer.exit(TokenType::ContentChunk);      tokenizer.exit(TokenType::Content);      after(tokenizer, code)  } diff --git a/src/content/mod.rs b/src/content/mod.rs index d5771a3..4c0a7f4 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,4 +1,6 @@  //! Content types found in markdown. +#[allow(clippy::module_inception)] +pub mod content;  pub mod flow;  pub mod string; diff --git a/src/content/string.rs b/src/content/string.rs index 64f544b..ff9e3fc 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -5,7 +5,7 @@  //! It exists in things such as identifiers (media references, definitions),  //! titles, URLs, code (fenced) info and meta parts.  //! -//! The constructs found in strin are: +//! The constructs found in string are:  //!  //! *   [Character escape][crate::construct::character_escape]  //! *   [Character reference][crate::construct::character_reference] @@ -13,16 +13,7 @@  use crate::construct::{      character_escape::start as character_escape, character_reference::start as character_reference,  }; -use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer}; - -/// Turn `codes` as the string content type into events. -// To do: remove this `allow` when all the content types are glued together. -#[allow(dead_code)] -pub fn string(codes: &[Code], point: Point, index: usize) -> Vec<Event> { -    let mut tokenizer = Tokenizer::new(point, index); -    tokenizer.feed(codes, Box::new(before), true); -    tokenizer.events -} +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Before string.  /// @@ -33,33 +24,12 @@ pub fn string(codes: &[Code], point: Point, index: usize) -> Vec<Event> {  /// |\&  /// |qwe  /// ``` -fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None => (State::Ok, None), -        _ => tokenizer.attempt(character_reference, |ok| { -            Box::new(if ok { -                before -            } else { -                before_not_character_reference -            }) -        })(tokenizer, code), -    } -} - -/// Before string, not at a character reference. -/// -/// Assume character escape. -/// -/// ```markdown -/// |\& -/// |qwe -/// ``` -fn before_not_character_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None), -        _ => tokenizer.attempt(character_escape, |ok| { +        _ => tokenizer.attempt_2(character_reference, character_escape, |ok| {              Box::new(if ok { -                before +                start              } else {                  before_not_character_escape              }) @@ -98,7 +68,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          // To do: somehow get these markers from constructs.          Code::Char('&' | '\\') => {              tokenizer.exit(TokenType::Data); -            before(tokenizer, code) +            start(tokenizer, code)          }          _ => {              tokenizer.consume(code);  | 
