diff options
Diffstat (limited to '')
| -rw-r--r-- | readme.md | 4 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 27 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 6 | ||||
| -rw-r--r-- | src/construct/partial_destination.rs | 6 | ||||
| -rw-r--r-- | src/construct/partial_label.rs | 84 | ||||
| -rw-r--r-- | src/construct/partial_title.rs | 33 | ||||
| -rw-r--r-- | src/content/string.rs | 32 | ||||
| -rw-r--r-- | src/util/link.rs | 8 | ||||
| -rw-r--r-- | src/util/mod.rs | 1 | ||||
| -rw-r--r-- | tests/character_reference.rs | 2 | 
10 files changed, 136 insertions, 67 deletions
| @@ -66,8 +66,6 @@ cargo doc --document-private-items  ### Small things -- [ ] (1) Connect `ChunkString` in label, destination, title -- [ ] (1) Add support for line endings in `string`  - [ ] (1) Add docs to subtokenize  - [ ] (1) Add module docs to parser  - [ ] (1) Add overview docs on how everything works @@ -171,6 +169,8 @@ cargo doc --document-private-items  - [x] (1) Remove `content` content type, as it is no longer needed  - [x] (1) Paragraph  - [x] (1) Parse whitespace in each flow construct +- [x] (1) Connect `ChunkString` in label, destination, title +- [x] (1) Add support for line endings in `string`  ### Extensions diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 64647cb..579fa71 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -52,7 +52,7 @@  use crate::constant::TAB_SIZE;  use crate::construct::partial_space_or_tab::space_or_tab_opt;  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::span::from_exit_event; +use crate::util::{link::link, span::from_exit_event};  /// Kind of underline.  #[derive(Debug, Clone, PartialEq)] @@ -133,16 +133,12 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            let next = tokenizer.events.len(); -            let previous = next - 2; -              tokenizer.enter(TokenType::LineEnding); +            let index = tokenizer.events.len() - 1; +            link(&mut tokenizer.events, index);              tokenizer.consume(code);              tokenizer.exit(TokenType::LineEnding); -            tokenizer.events[previous].next = Some(next); -            tokenizer.events[next].previous = Some(previous); -              (                  State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), text_line_start))),                  None, @@ -160,27 +156,20 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ==  /// ```  fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    let next = tokenizer.events.len() - 2; -    let previous = next - 2; +    let index = tokenizer.events.len() - 2;      // Link the whitespace, if it exists. -    if tokenizer.events[next].token_type == TokenType::Whitespace { -        tokenizer.events[previous].next = Some(next); -        tokenizer.events[next].previous = Some(previous); +    if tokenizer.events[index].token_type == TokenType::Whitespace { +        link(&mut tokenizer.events, index);      }      match code {          // Blank lines not allowed.          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),          _ => { -            let next = tokenizer.events.len(); -            let previous = next - 2; -              tokenizer.enter(TokenType::ChunkText); - -            tokenizer.events[previous].next = Some(next); -            tokenizer.events[next].previous = Some(previous); - +            let index = tokenizer.events.len() - 1; +            link(&mut tokenizer.events, index);              text_inside(tokenizer, code)          }      } diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index fa18f28..b00188d 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -35,6 +35,7 @@ use crate::construct::{      partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,  };  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::link::link;  /// Before a paragraph.  /// @@ -83,9 +84,8 @@ fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.consume(code);      tokenizer.exit(TokenType::ChunkText);      tokenizer.enter(TokenType::ChunkText); -    let next_index = tokenizer.events.len() - 1; -    tokenizer.events[next_index - 2].next = Some(next_index); -    tokenizer.events[next_index].previous = Some(next_index - 2); +    let index = tokenizer.events.len() - 1; +    link(&mut tokenizer.events, index);      (State::Fn(Box::new(inside)), None)  } diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index bc95055..901a10d 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -18,8 +18,8 @@  //! They are counted with a counter that starts at `0`, and is incremented  //! every time `(` occurs and decremented every time `)` occurs.  //! If `)` is found when the counter is `0`, the destination closes immediately -//! after it. -//! Escaped parens do not count. +//! before it. +//! Escaped parens do not count in balancing.  //!  //! It is recommended to use the enclosed variant of destinations, as it allows  //! arbitrary parens, and also allows for whitespace and other characters in @@ -68,7 +68,6 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.enter(TokenType::DefinitionDestination);              tokenizer.enter(TokenType::DefinitionDestinationRaw);              tokenizer.enter(TokenType::DefinitionDestinationString); -            // To do: link.              tokenizer.enter(TokenType::ChunkString);              raw(tokenizer, code, 0)          } @@ -90,7 +89,6 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          (State::Ok, None)      } else {          tokenizer.enter(TokenType::DefinitionDestinationString); -        // To do: link.          tokenizer.enter(TokenType::ChunkString);          enclosed(tokenizer, code)      } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 4997390..55efd13 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -56,7 +56,9 @@  // To do: pass token types in.  use crate::constant::LINK_REFERENCE_SIZE_MAX; +use crate::construct::partial_space_or_tab::space_or_tab_opt;  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::link::link;  /// Before a label.  /// @@ -71,7 +73,10 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.consume(code);              tokenizer.exit(TokenType::DefinitionLabelMarker);              tokenizer.enter(TokenType::DefinitionLabelData); -            (State::Fn(Box::new(|t, c| at_break(t, c, false, 0))), None) +            ( +                State::Fn(Box::new(|t, c| at_break(t, c, false, 0, false))), +                None, +            )          }          _ => (State::Nok, None),      } @@ -83,7 +88,13 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// [|a]  /// [a|]  /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { +fn at_break( +    tokenizer: &mut Tokenizer, +    code: Code, +    data: bool, +    size: usize, +    connect: bool, +) -> StateFnResult {      match code {          Code::None | Code::Char('[') => (State::Nok, None),          Code::Char(']') if !data => (State::Nok, None), @@ -96,24 +107,57 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S              tokenizer.exit(TokenType::DefinitionLabel);              (State::Ok, None)          } -        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            // To do: limit blank lines. -            ( -                State::Fn(Box::new(move |t, c| at_break(t, c, data, size))), -                None, -            ) -        }          _ => {              tokenizer.enter(TokenType::ChunkString); -            // To do: link. + +            if connect { +                let index = tokenizer.events.len() - 1; +                link(&mut tokenizer.events, index); +            } +              label(tokenizer, code, data, size)          }      }  } +/// After a line ending. +/// +/// ```markdown +/// [a +/// |b] +/// ``` +fn line_start( +    tokenizer: &mut Tokenizer, +    code: Code, +    data: bool, +    size: usize, +    connect: bool, +) -> StateFnResult { +    tokenizer.go(space_or_tab_opt(), move |t, c| { +        line_begin(t, c, data, size, connect) +    })(tokenizer, code) +} + +/// After a line ending, after optional whitespace. +/// +/// ```markdown +/// [a +/// |b] +/// ``` +fn line_begin( +    tokenizer: &mut Tokenizer, +    code: Code, +    data: bool, +    size: usize, +    connect: bool, +) -> StateFnResult { +    match code { +        // Blank line not allowed. +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), +        _ => at_break(tokenizer, code, data, size, connect), +    } +} +  /// In a label, in text.  ///  /// ```markdown @@ -121,13 +165,21 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S  /// ```  fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {      match code { -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => { +        Code::None | Code::Char('[' | ']') => {              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, data, size) +            at_break(tokenizer, code, data, size, true)          }          _ if size > LINK_REFERENCE_SIZE_MAX => {              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, data, size) +            at_break(tokenizer, code, data, size, true) +        } +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +            tokenizer.consume(code); +            tokenizer.exit(TokenType::ChunkString); +            ( +                State::Fn(Box::new(move |t, c| line_start(t, c, data, size + 1, true))), +                None, +            )          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code); diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 0669c8e..322a3e6 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -35,6 +35,7 @@  use crate::construct::partial_space_or_tab::space_or_tab_opt;  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::link::link;  /// Type of title.  #[derive(Debug, Clone, PartialEq)] @@ -102,7 +103,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {          }          _ => {              tokenizer.enter(TokenType::DefinitionTitleString); -            at_break(tokenizer, code, kind) +            at_break(tokenizer, code, kind, false)          }      }  } @@ -115,22 +116,19 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {  /// (a|  /// b)  /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult {      match code {          Code::Char(char) if char == kind_to_marker(&kind) => {              tokenizer.exit(TokenType::DefinitionTitleString);              begin(tokenizer, code, kind)          }          Code::None => (State::Nok, None), -        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None) -        }          _ => { -            // To do: link.              tokenizer.enter(TokenType::ChunkString); +            if connect { +                let index = tokenizer.events.len() - 1; +                link(&mut tokenizer.events, index); +            }              title(tokenizer, code, kind)          }      } @@ -156,7 +154,7 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul      match code {          // Blank line not allowed.          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), -        _ => at_break(tokenizer, code, kind), +        _ => at_break(tokenizer, code, kind, true),      }  } @@ -169,11 +167,20 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {      match code {          Code::Char(char) if char == kind_to_marker(&kind) => {              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, kind) +            at_break(tokenizer, code, kind, true)          } -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +        Code::None => { +            tokenizer.exit(TokenType::ChunkString); +            at_break(tokenizer, code, kind, true) +        } +        // To do: limit blank lines. +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +            tokenizer.consume(code);              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, kind) +            ( +                State::Fn(Box::new(move |t, c| line_start(t, c, kind))), +                None, +            )          }          Code::Char('\\') => {              tokenizer.consume(code); diff --git a/src/content/string.rs b/src/content/string.rs index f591cd7..efb6e60 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -43,13 +43,28 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// |qwe  /// ```  fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    if let Code::None = code { -        (State::Ok, None) -    } else { -        tokenizer.enter(TokenType::Data); -        tokenizer.consume(code); -        (State::Fn(Box::new(in_data)), None) +    match code { +        Code::None => (State::Ok, None), +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +            tokenizer.enter(TokenType::LineEnding); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::LineEnding); +            (State::Fn(Box::new(start)), None) +        } +        _ => { +            tokenizer.enter(TokenType::Data); +            tokenizer.consume(code); +            (State::Fn(Box::new(in_data)), None) +        }      } + +    // if let Code::None = code { +    //     (State::Ok, None) +    // } else { +    //     tokenizer.enter(TokenType::Data); +    //     tokenizer.consume(code); +    //     (State::Fn(Box::new(in_data)), None) +    // }  }  /// In data. @@ -59,10 +74,9 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code { -        // To do: line endings. -        Code::None => { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.exit(TokenType::Data); -            (State::Ok, None) +            before_data(tokenizer, code)          }          // To do: somehow get these markers from constructs.          Code::Char('&' | '\\') => { diff --git a/src/util/link.rs b/src/util/link.rs new file mode 100644 index 0000000..917ce4d --- /dev/null +++ b/src/util/link.rs @@ -0,0 +1,8 @@ +//! To do. + +use crate::tokenizer::Event; + +pub fn link(events: &mut [Event], index: usize) { +    events[index - 2].next = Some(index); +    events[index].previous = Some(index - 2); +} diff --git a/src/util/mod.rs b/src/util/mod.rs index c3db267..5439c62 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -2,5 +2,6 @@  pub mod decode_character_reference;  pub mod encode; +pub mod link;  pub mod sanitize_uri;  pub mod span; diff --git a/tests/character_reference.rs b/tests/character_reference.rs index bcd0aca..f2337ab 100644 --- a/tests/character_reference.rs +++ b/tests/character_reference.rs @@ -100,7 +100,7 @@ fn character_reference() {      //     "should not support character references as construct markers (2)"      // ); -    // To do: link. +    // To do: link (resource).      // assert_eq!(      //     micromark("[a](url "tit")"),      //     "<p>[a](url "tit")</p>", | 
