diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/construct/attention.rs | 32 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 8 | ||||
-rw-r--r-- | src/construct/label_end.rs | 32 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 10 | ||||
-rw-r--r-- | src/content/document.rs | 4 | ||||
-rw-r--r-- | src/subtokenize.rs | 71 | ||||
-rw-r--r-- | src/tokenizer.rs | 35 | ||||
-rw-r--r-- | src/util/edit_map.rs | 8 |
8 files changed, 90 insertions, 110 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index f0de63f..ac6aecf 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -400,9 +400,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_open_enter.0.clone(), index: seq_open_enter.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Enter, @@ -413,9 +411,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_open_enter.0.clone(), index: seq_open_enter.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Exit, @@ -426,9 +422,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_open_exit.0.clone(), index: seq_open_exit.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Enter, @@ -439,9 +433,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_open_exit.0.clone(), index: seq_open_exit.1, - previous: None, - next: None, - content_type: None, + link: None, }, ], ); @@ -459,9 +451,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_close_enter.0.clone(), index: seq_close_enter.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Enter, @@ -472,9 +462,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_close_enter.0.clone(), index: seq_close_enter.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Exit, @@ -485,9 +473,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_close_exit.0.clone(), index: seq_close_exit.1, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Exit, @@ -498,9 +484,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: seq_close_exit.0.clone(), index: seq_close_exit.1, - previous: None, - next: None, - content_type: None, + link: None, }, ], ); diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index bf3de23..0e4cda5 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -217,9 +217,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::HeadingAtxText, point: tokenizer.events[start].point.clone(), index: tokenizer.events[start].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); @@ -234,9 +232,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::HeadingAtxText, point: tokenizer.events[end].point.clone(), index: tokenizer.events[end].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index bcfe343..883e314 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -636,18 +636,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::Data, point: events[data_enter_index].point.clone(), index: events[data_enter_index].index, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Exit, token_type: Token::Data, point: events[data_exit_index].point.clone(), index: events[data_exit_index].index, - previous: None, - next: None, - content_type: None, + link: None, }, ], ); @@ -690,18 +686,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { }, point: group_enter_event.point.clone(), index: group_enter_event.index, - previous: None, - next: None, - content_type: None, + link: None, }, Event { event_type: EventType::Enter, token_type: Token::Label, point: group_enter_event.point.clone(), index: group_enter_event.index, - previous: None, - next: None, - content_type: None, + link: None, }, ], ); @@ -717,9 +709,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::LabelText, point: events[text_enter_index].point.clone(), index: events[text_enter_index].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); @@ -732,9 +722,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::LabelText, point: events[text_exit_index].point.clone(), index: events[text_exit_index].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); } @@ -748,9 +736,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::Label, point: events[label_exit_index].point.clone(), index: events[label_exit_index].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); @@ -763,9 +749,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { token_type: Token::Link, point: events[group_end_index].point.clone(), index: events[group_end_index].index, - previous: None, - next: None, - content_type: None, + link: None, }], ); diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 134a4b5..3932ad8 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -118,10 +118,12 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool { data_exit.index = line_ending_index; // Link Enter:Data on the previous line to Enter:Data on this line. - let data_enter_prev = &mut tokenizer.events[exit_index - 2]; - data_enter_prev.next = Some(enter_next_index + 1); - let data_enter_next = &mut tokenizer.events[enter_next_index + 1]; - data_enter_next.previous = Some(exit_index - 2); + if let Some(link) = &mut tokenizer.events[exit_index - 2].link { + link.next = Some(enter_next_index + 1); + } + if let Some(link) = &mut tokenizer.events[enter_next_index + 1].link { + link.previous = Some(exit_index - 2); + } // Potential next start. exit_index = enter_next_index + 3; diff --git a/src/content/document.rs b/src/content/document.rs index dc5c3a0..46da830 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -465,9 +465,7 @@ fn exit_containers( // Note: positions are fixed later. point: tokenizer.point.clone(), index: tokenizer.index, - previous: None, - next: None, - content_type: None, + link: None, }); } diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 272978d..ad13022 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -37,25 +37,22 @@ pub fn link(events: &mut [Event], index: usize) { /// Link two arbitrary [`Event`][]s together. pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { - let prev = &mut events[pevious]; - assert!( - prev.content_type.is_some(), - "expected `content_type` on previous" - ); - assert_eq!(prev.event_type, EventType::Enter); - prev.next = Some(next); - - let prev_ref = &events[pevious]; - let prev_exit_ref = &events[pevious + 1]; - let curr_ref = &events[next]; - assert_eq!(prev_exit_ref.event_type, EventType::Exit); - assert_eq!(prev_exit_ref.token_type, prev_ref.token_type); - assert_eq!(curr_ref.content_type, prev_ref.content_type); - - let curr = &mut events[next]; - assert_eq!(curr.event_type, EventType::Enter); - curr.previous = Some(pevious); + assert_eq!(events[pevious].event_type, EventType::Enter); + assert_eq!(events[pevious + 1].event_type, EventType::Exit); + assert_eq!(events[pevious + 1].token_type, events[pevious].token_type); + assert_eq!(events[next].event_type, EventType::Enter); // Note: the exit of this event may not exist, so don’t check for that. + + let link_previous = events[pevious] + .link + .as_mut() + .expect("expected `link` on previous"); + let conten_type_previous = link_previous.content_type; + link_previous.next = Some(next); + let link_next = events[next].link.as_mut().expect("expected `link` on next"); + link_next.previous = Some(pevious); + + assert_eq!(conten_type_previous, link_next.content_type); } /// Parse linked events. @@ -70,18 +67,18 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { let event = &events[index]; // Find each first opening chunk. - if let Some(ref content_type) = event.content_type { + if let Some(ref link) = event.link { assert_eq!(event.event_type, EventType::Enter); // No need to enter linked events again. - if event.previous == None { + if link.previous == None { // Index into `events` pointing to a chunk. let mut link_index: Option<usize> = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state); // Substate. let mut result: StateFnResult = ( - State::Fn(Box::new(if *content_type == ContentType::String { + State::Fn(Box::new(if link.content_type == ContentType::String { string } else { text @@ -92,13 +89,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { // Loop through links to pass them in order to the subtokenizer. while let Some(index) = link_index { let enter = &events[index]; + let link_curr = enter.link.as_ref().expect("expected link"); assert_eq!(enter.event_type, EventType::Enter); let span = span::Span { start_index: enter.index, end_index: events[index + 1].index, }; - if enter.previous != None { + if link_curr.previous != None { tokenizer.define_skip(&enter.point, enter.index); } @@ -110,10 +108,10 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { result = tokenizer.push( span::codes(&parse_state.codes, &span), func, - enter.next == None, + link_curr.next == None, ); assert!(result.1.is_none(), "expected no remainder"); - link_index = enter.next; + link_index = link_curr.next; } // Now, loop through all subevents to figure out which parts @@ -133,10 +131,10 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { { slices.push((link_index, slice_start)); slice_start = subindex; - link_index = events[link_index].next.unwrap(); + link_index = events[link_index].link.as_ref().unwrap().next.unwrap(); } - if subevent.content_type.is_some() { + if subevent.link.is_some() { // Need to call `subtokenize` again. done = false; } @@ -145,15 +143,18 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool { // its index to account for the shifted events. // If it points to a next event, we also change the next event’s // reference back to *this* event. - if let Some(next) = subevent.next { - // The `index` in `events` where the current link is, - // minus 2 events (the enter and exit) for each removed - // link. - let shift = link_index - (slices.len() * 2); - subevent.next = Some(next + shift); - let next_ev = &mut tokenizer.events[next]; - let previous = next_ev.previous.unwrap(); - next_ev.previous = Some(previous + shift); + if let Some(sublink_curr) = &mut subevent.link { + if let Some(next) = sublink_curr.next { + // The `index` in `events` where the current link is, + // minus 2 events (the enter and exit) for each removed + // link. + let shift = link_index - (slices.len() * 2); + sublink_curr.next = sublink_curr.next.map(|next| next + shift); + let next_ev = &mut tokenizer.events[next]; + let sublink_next = next_ev.link.as_mut().unwrap(); + sublink_next.previous = + sublink_next.previous.map(|previous| previous + shift); + } } subindex += 1; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5d03c92..b745ac8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -66,6 +66,14 @@ pub enum EventType { Exit, } +/// A link to another event. +#[derive(Debug, Clone)] +pub struct Link { + pub previous: Option<usize>, + pub next: Option<usize>, + pub content_type: ContentType, +} + /// Something semantic happening somewhere. #[derive(Debug, Clone)] pub struct Event { @@ -73,9 +81,7 @@ pub struct Event { pub token_type: Token, pub point: Point, pub index: usize, - pub previous: Option<usize>, - pub next: Option<usize>, - pub content_type: Option<ContentType>, + pub link: Option<Link>, } /// The essence of the state machine are functions: `StateFn`. @@ -357,19 +363,28 @@ impl<'a> Tokenizer<'a> { /// Mark the start of a semantic label. pub fn enter(&mut self, token_type: Token) { - self.enter_with_content(token_type, None); + self.enter_with_link(token_type, None); + } + + pub fn enter_with_content(&mut self, token_type: Token, content_type_opt: Option<ContentType>) { + self.enter_with_link( + token_type, + content_type_opt.map(|content_type| Link { + content_type, + previous: None, + next: None, + }), + ); } - pub fn enter_with_content(&mut self, token_type: Token, content_type: Option<ContentType>) { + pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) { log::debug!("enter: `{:?}` ({:?})", token_type, self.point); self.events.push(Event { event_type: EventType::Enter, token_type: token_type.clone(), point: self.point.clone(), index: self.index, - previous: None, - next: None, - content_type, + link, }); self.stack.push(token_type); } @@ -423,9 +438,7 @@ impl<'a> Tokenizer<'a> { token_type, point, index, - previous: None, - next: None, - content_type: None, + link: None, }); } diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index b1b5064..6085f18 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -35,9 +35,11 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) { let mut index = 0; while index < events.len() { - let event = &mut events[index]; - event.previous = event.previous.map(map); - event.next = event.next.map(map); + if let Some(link) = &mut events[index].link { + link.previous = link.previous.map(map); + link.next = link.next.map(map); + } + index += 1; } } |