diff options
Diffstat (limited to '')
| -rw-r--r-- | src/content/flow.rs | 23 | ||||
| -rw-r--r-- | src/content/string.rs | 20 | ||||
| -rw-r--r-- | src/lib.rs | 1 | ||||
| -rw-r--r-- | src/parser.rs | 12 | ||||
| -rw-r--r-- | src/subtokenize.rs | 67 | ||||
| -rw-r--r-- | src/tokenizer.rs | 15 | ||||
| -rw-r--r-- | src/util.rs | 8 | ||||
| -rw-r--r-- | tests/code_fenced.rs | 11 | 
8 files changed, 105 insertions, 52 deletions
| diff --git a/src/content/flow.rs b/src/content/flow.rs index 693ffb5..6f94424 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -26,28 +26,17 @@ use crate::construct::{      html_flow::start as html_flow, partial_whitespace::start as whitespace,      thematic_break::start as thematic_break,  }; -use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer}; +use crate::subtokenize::subtokenize; +use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer};  use crate::util::get_span;  /// Turn `codes` as the flow content type into events.  // To do: remove this `allow` when all the content types are glued together.  #[allow(dead_code)] -pub fn flow(codes: &[Code]) -> Vec<Event> { -    let mut tokenizer = Tokenizer::new(); -    let (state, remainder) = tokenizer.feed(codes, Box::new(start), true); - -    if let Some(ref x) = remainder { -        if !x.is_empty() { -            unreachable!("expected no final remainder {:?}", x); -        } -    } - -    match state { -        State::Ok => {} -        _ => unreachable!("expected final state to be `State::Ok`"), -    } - -    tokenizer.events +pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> { +    let mut tokenizer = Tokenizer::new(point, index); +    tokenizer.feed(codes, Box::new(start), true); +    subtokenize(tokenizer.events, codes)  }  /// Before flow. diff --git a/src/content/string.rs b/src/content/string.rs index 1239a36..64f544b 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -13,26 +13,14 @@  use crate::construct::{      character_escape::start as character_escape, character_reference::start as character_reference,  }; -use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer};  /// Turn `codes` as the string content type into events.  // To do: remove this `allow` when all the content types are glued together.  #[allow(dead_code)] -pub fn string(codes: &[Code]) -> Vec<Event> { -    let mut tokenizer = Tokenizer::new(); -    let (state, remainder) = tokenizer.feed(codes, Box::new(before), true); - -    if let Some(ref x) = remainder { -        if !x.is_empty() { -            unreachable!("expected no final remainder {:?}", x); -        } -    } - -    match state { -        State::Ok => {} -        _ => unreachable!("expected final state to be `State::Ok`"), -    } - +pub fn string(codes: &[Code], point: Point, index: usize) -> Vec<Event> { +    let mut tokenizer = Tokenizer::new(point, index); +    tokenizer.feed(codes, Box::new(before), true);      tokenizer.events  } @@ -9,6 +9,7 @@ mod constant;  mod construct;  mod content;  mod parser; +mod subtokenize;  mod tokenizer;  mod util; diff --git a/src/parser.rs b/src/parser.rs index e156e33..5648942 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,13 +2,21 @@  // To do: this should start with `containers`, when they’re done.  // To do: definitions and such will mean more data has to be passed around.  use crate::content::flow::flow; -use crate::tokenizer::{as_codes, Code, Event}; +use crate::tokenizer::{as_codes, Code, Event, Point};  /// Turn a string of markdown into events.  /// Passes the codes back so the compiler can access the source.  pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {      let codes = as_codes(value);      // To do: pass a reference to this around, and slices in the (back)feeding. Might be tough. -    let events = flow(&codes); +    let events = flow( +        &codes, +        Point { +            line: 1, +            column: 1, +            offset: 0, +        }, +        0, +    );      (events, codes)  } diff --git a/src/subtokenize.rs b/src/subtokenize.rs new file mode 100644 index 0000000..c1a8435 --- /dev/null +++ b/src/subtokenize.rs @@ -0,0 +1,67 @@ +use crate::content::string::string; +use crate::tokenizer::{Code, Event, EventType, TokenType}; +use crate::util::{slice_codes, Span}; + +pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> Vec<Event> { +    let mut events = events; +    let mut index = 0; + +    // println!("before"); +    // while index < events.len() { +    //     let event = &events[index]; +    //     println!( +    //         "ev1: {:?} {:?} {:?}", +    //         event.event_type, event.token_type, index +    //     ); +    //     index += 1; +    // } +    // +    // index = 0; +    // +    // println!("change"); + +    while index < events.len() { +        let event = &events[index]; + +        // println!( +        //     "ev2: {:?} {:?} {:?}", +        //     event.event_type, event.token_type, index +        // ); + +        if event.event_type == EventType::Enter && event.token_type == TokenType::ChunkString { +            let exit = &events[index + 1]; + +            assert_eq!( +                exit.event_type, +                EventType::Exit, +                "expected `enter` of `{:?}` to be follow by an `exit` event", +                event.token_type +            ); +            assert_eq!( +                exit.token_type, event.token_type, +                "expected `exit` of `{:?}` to follow its `enter` event", +                event.token_type +            ); + +            let subevents = string( +                slice_codes( +                    codes, +                    &Span { +                        start_index: event.index, +                        end_index: exit.index, +                    }, +                ), +                event.point.clone(), +                event.index, +            ); +            let len = subevents.len(); +            // To do: recursion needed? +            events.splice(index..(index + 2), subevents); +            index += len; +        } else { +            index += 1; +        } +    } + +    events +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index faee8d9..35e768e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -177,16 +177,12 @@ pub struct Tokenizer {  impl Tokenizer {      /// Create a new tokenizer. -    pub fn new() -> Tokenizer { +    pub fn new(point: Point, index: usize) -> Tokenizer {          Tokenizer {              current: Code::None, -            index: 0, +            index,              consumed: true, -            point: Point { -                line: 1, -                column: 1, -                offset: 0, -            }, +            point,              stack: vec![],              events: vec![],          } @@ -499,6 +495,11 @@ impl Tokenizer {              }          } +        match state { +            State::Ok => {} +            _ => unreachable!("expected final state to be `State::Ok`"), +        } +          check_statefn_result((state, None))      }  } diff --git a/src/util.rs b/src/util.rs index 47359a3..5a916cd 100644 --- a/src/util.rs +++ b/src/util.rs @@ -165,12 +165,12 @@ pub fn get_span(events: &[Event], index: usize) -> Span {      assert_eq!(          exit.event_type,          EventType::Exit, -        "expected get_span to be called on `exit` event" +        "expected `get_span` to be called on `exit` event"      ); -    let mut start_index = index - 1; +    let mut enter_index = index - 1;      loop { -        let enter = &events[start_index]; +        let enter = &events[enter_index];          if enter.event_type == EventType::Enter && enter.token_type == token_type {              return Span {                  // start: enter.point.clone(), @@ -181,7 +181,7 @@ pub fn get_span(events: &[Event], index: usize) -> Span {              };          } -        start_index -= 1; +        enter_index -= 1;      }  } diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index 46fa9cb..6419f67 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -219,12 +219,11 @@ fn code_fenced() {          "should support an eof in the prefix, in content"      ); -    // To do: strings. -    // assert_eq!( -    //     micromark("```j\\+s©"), -    //     "<pre><code class=\"language-j+s©\"></code></pre>\n", -    //     "should support character escapes and character references in info strings" -    // ); +    assert_eq!( +        micromark("```j\\+s©"), +        "<pre><code class=\"language-j+s©\"></code></pre>\n", +        "should support character escapes and character references in info strings" +    );      assert_eq!(        micromark("   ```\naaa\n    ```"), | 
