diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/definition.rs | 37 | ||||
| -rw-r--r-- | src/content/document.rs | 37 | ||||
| -rw-r--r-- | src/tokenizer.rs | 14 | 
3 files changed, 50 insertions, 38 deletions
| diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 6f63c79..ee930b1 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -98,7 +98,11 @@ use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;  use crate::event::Name;  use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer; -use crate::util::skip::opt_back as skip_opt_back; +use crate::util::{ +    normalize_identifier::normalize_identifier, +    skip, +    slice::{Position, Slice}, +};  /// At the start of a definition.  /// @@ -110,7 +114,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      // Do not interrupt paragraphs (but do follow definitions).      let possible = !tokenizer.interrupt          || (!tokenizer.events.is_empty() -            && tokenizer.events[skip_opt_back( +            && tokenizer.events[skip::opt_back(                  &tokenizer.events,                  tokenizer.events.len() - 1,                  &[Name::LineEnding, Name::SpaceOrTab], @@ -165,6 +169,12 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_2 = Name::Data;      tokenizer.tokenize_state.token_3 = Name::Data; +    tokenizer.tokenize_state.end = skip::to_back( +        &tokenizer.events, +        tokenizer.events.len() - 1, +        &[Name::DefinitionLabelString], +    ); +      match tokenizer.current {          Some(b':') => {              tokenizer.enter(Name::DefinitionMarker); @@ -239,6 +249,7 @@ pub fn destination_missing(tokenizer: &mut Tokenizer) -> State {      tokenizer.tokenize_state.token_4 = Name::Data;      tokenizer.tokenize_state.token_5 = Name::Data;      tokenizer.tokenize_state.size_b = 0; +    tokenizer.tokenize_state.end = 0;      State::Nok  } @@ -271,11 +282,31 @@ pub fn after_whitespace(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None | Some(b'\n') => {              tokenizer.exit(Name::Definition); + +            // Note: we don’t care about uniqueness. +            // It’s likely that that doesn’t happen very frequently. +            // It is more likely that it wastes precious time. +            tokenizer.tokenize_state.definitions.push( +                // Note: we don’t care about virtual spaces, so `as_str` is fine. +                normalize_identifier( +                    Slice::from_position( +                        tokenizer.parse_state.bytes, +                        &Position::from_exit_event(&tokenizer.events, tokenizer.tokenize_state.end), +                    ) +                    .as_str(), +                ), +            ); + +            tokenizer.tokenize_state.end = 0; +              // You’d be interrupting.              tokenizer.interrupt = true;              State::Ok          } -        _ => State::Nok, +        _ => { +            tokenizer.tokenize_state.end = 0; +            State::Nok +        },      }  } diff --git a/src/content/document.rs b/src/content/document.rs index b990ba5..f2890f3 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -13,11 +13,7 @@ use crate::parser::ParseState;  use crate::state::{Name as StateName, State};  use crate::subtokenize::{divide_events, subtokenize};  use crate::tokenizer::{Container, ContainerState, Tokenizer}; -use crate::util::{ -    normalize_identifier::normalize_identifier, -    skip, -    slice::{Position, Slice}, -}; +use crate::util::skip;  /// Phases where we can exit containers.  #[derive(Debug, PartialEq)] @@ -61,33 +57,9 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {      );      tokenizer.flush(state, true); -    let mut index = 0; -    let mut definitions = vec![]; - -    while index < tokenizer.events.len() { -        let event = &tokenizer.events[index]; - -        if event.kind == Kind::Exit && event.name == Name::DefinitionLabelString { -            // Note: we don’t care about virtual spaces, so `as_str` is fine. -            let id = normalize_identifier( -                Slice::from_position( -                    tokenizer.parse_state.bytes, -                    &Position::from_exit_event(&tokenizer.events, index), -                ) -                .as_str(), -            ); - -            if !definitions.contains(&id) { -                definitions.push(id); -            } -        } - -        index += 1; -    } -      let mut events = tokenizer.events; -    parse_state.definitions = definitions; +    parse_state.definitions = tokenizer.tokenize_state.definitions;      while !subtokenize(&mut events, parse_state) {} @@ -531,4 +503,9 @@ fn resolve(tokenizer: &mut Tokenizer) {      tokenizer          .resolvers          .append(&mut child.resolvers.split_off(0)); + +    tokenizer +        .tokenize_state +        .definitions +        .append(&mut child.tokenize_state.definitions.split_off(0));  } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8ff19c3..fdca6c5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -165,6 +165,9 @@ pub struct TokenizeState<'a> {      /// Used when tokenizing [text content][crate::content::text].      pub media_list: Vec<Media>, +    /// List of defined identifiers. +    pub definitions: Vec<String>, +      /// Whether to connect tokens.      pub connect: bool,      /// Marker. @@ -274,18 +277,18 @@ impl<'a> Tokenizer<'a> {                  document_data_index: None,                  document_child_state: None,                  document_child: None, +                definitions: vec![], +                end: 0, +                label_start_stack: vec![], +                label_start_list_loose: vec![],                  marker: 0,                  marker_b: 0,                  markers: &[], +                media_list: vec![],                  seen: false,                  size: 0,                  size_b: 0,                  size_c: 0, -                start: 0, -                end: 0, -                label_start_stack: vec![], -                label_start_list_loose: vec![], -                media_list: vec![],                  space_or_tab_eol_content_type: None,                  space_or_tab_eol_connect: false,                  space_or_tab_eol_ok: false, @@ -295,6 +298,7 @@ impl<'a> Tokenizer<'a> {                  space_or_tab_max: 0,                  space_or_tab_size: 0,                  space_or_tab_token: Name::SpaceOrTab, +                start: 0,                  token_1: Name::Data,                  token_2: Name::Data,                  token_3: Name::Data, | 
