aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs32
-rw-r--r--src/construct/heading_atx.rs8
-rw-r--r--src/construct/label_end.rs32
-rw-r--r--src/construct/paragraph.rs10
-rw-r--r--src/content/document.rs4
-rw-r--r--src/subtokenize.rs71
-rw-r--r--src/tokenizer.rs35
-rw-r--r--src/util/edit_map.rs8
8 files changed, 90 insertions, 110 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index f0de63f..ac6aecf 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -400,9 +400,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_open_enter.0.clone(),
index: seq_open_enter.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Enter,
@@ -413,9 +411,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_open_enter.0.clone(),
index: seq_open_enter.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Exit,
@@ -426,9 +422,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_open_exit.0.clone(),
index: seq_open_exit.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Enter,
@@ -439,9 +433,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_open_exit.0.clone(),
index: seq_open_exit.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
],
);
@@ -459,9 +451,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_close_enter.0.clone(),
index: seq_close_enter.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Enter,
@@ -472,9 +462,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_close_enter.0.clone(),
index: seq_close_enter.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Exit,
@@ -485,9 +473,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_close_exit.0.clone(),
index: seq_close_exit.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Exit,
@@ -498,9 +484,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: seq_close_exit.0.clone(),
index: seq_close_exit.1,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
],
);
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index bf3de23..0e4cda5 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -217,9 +217,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::HeadingAtxText,
point: tokenizer.events[start].point.clone(),
index: tokenizer.events[start].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
@@ -234,9 +232,7 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::HeadingAtxText,
point: tokenizer.events[end].point.clone(),
index: tokenizer.events[end].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index bcfe343..883e314 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -636,18 +636,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::Data,
point: events[data_enter_index].point.clone(),
index: events[data_enter_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Exit,
token_type: Token::Data,
point: events[data_exit_index].point.clone(),
index: events[data_exit_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
],
);
@@ -690,18 +686,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
},
point: group_enter_event.point.clone(),
index: group_enter_event.index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
Event {
event_type: EventType::Enter,
token_type: Token::Label,
point: group_enter_event.point.clone(),
index: group_enter_event.index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
},
],
);
@@ -717,9 +709,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::LabelText,
point: events[text_enter_index].point.clone(),
index: events[text_enter_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
@@ -732,9 +722,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::LabelText,
point: events[text_exit_index].point.clone(),
index: events[text_exit_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
}
@@ -748,9 +736,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::Label,
point: events[label_exit_index].point.clone(),
index: events[label_exit_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
@@ -763,9 +749,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
token_type: Token::Link,
point: events[group_end_index].point.clone(),
index: events[group_end_index].index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
}],
);
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 134a4b5..3932ad8 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -118,10 +118,12 @@ pub fn resolve(tokenizer: &mut Tokenizer, map: &mut EditMap) -> bool {
data_exit.index = line_ending_index;
// Link Enter:Data on the previous line to Enter:Data on this line.
- let data_enter_prev = &mut tokenizer.events[exit_index - 2];
- data_enter_prev.next = Some(enter_next_index + 1);
- let data_enter_next = &mut tokenizer.events[enter_next_index + 1];
- data_enter_next.previous = Some(exit_index - 2);
+ if let Some(link) = &mut tokenizer.events[exit_index - 2].link {
+ link.next = Some(enter_next_index + 1);
+ }
+ if let Some(link) = &mut tokenizer.events[enter_next_index + 1].link {
+ link.previous = Some(exit_index - 2);
+ }
// Potential next start.
exit_index = enter_next_index + 3;
diff --git a/src/content/document.rs b/src/content/document.rs
index dc5c3a0..46da830 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -465,9 +465,7 @@ fn exit_containers(
// Note: positions are fixed later.
point: tokenizer.point.clone(),
index: tokenizer.index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
});
}
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 272978d..ad13022 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -37,25 +37,22 @@ pub fn link(events: &mut [Event], index: usize) {
/// Link two arbitrary [`Event`][]s together.
pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
- let prev = &mut events[pevious];
- assert!(
- prev.content_type.is_some(),
- "expected `content_type` on previous"
- );
- assert_eq!(prev.event_type, EventType::Enter);
- prev.next = Some(next);
-
- let prev_ref = &events[pevious];
- let prev_exit_ref = &events[pevious + 1];
- let curr_ref = &events[next];
- assert_eq!(prev_exit_ref.event_type, EventType::Exit);
- assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
- assert_eq!(curr_ref.content_type, prev_ref.content_type);
-
- let curr = &mut events[next];
- assert_eq!(curr.event_type, EventType::Enter);
- curr.previous = Some(pevious);
+ assert_eq!(events[pevious].event_type, EventType::Enter);
+ assert_eq!(events[pevious + 1].event_type, EventType::Exit);
+ assert_eq!(events[pevious + 1].token_type, events[pevious].token_type);
+ assert_eq!(events[next].event_type, EventType::Enter);
// Note: the exit of this event may not exist, so don’t check for that.
+
+ let link_previous = events[pevious]
+ .link
+ .as_mut()
+ .expect("expected `link` on previous");
+ let conten_type_previous = link_previous.content_type;
+ link_previous.next = Some(next);
+ let link_next = events[next].link.as_mut().expect("expected `link` on next");
+ link_next.previous = Some(pevious);
+
+ assert_eq!(conten_type_previous, link_next.content_type);
}
/// Parse linked events.
@@ -70,18 +67,18 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
let event = &events[index];
// Find each first opening chunk.
- if let Some(ref content_type) = event.content_type {
+ if let Some(ref link) = event.link {
assert_eq!(event.event_type, EventType::Enter);
// No need to enter linked events again.
- if event.previous == None {
+ if link.previous == None {
// Index into `events` pointing to a chunk.
let mut link_index: Option<usize> = Some(index);
// Subtokenizer.
let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
// Substate.
let mut result: StateFnResult = (
- State::Fn(Box::new(if *content_type == ContentType::String {
+ State::Fn(Box::new(if link.content_type == ContentType::String {
string
} else {
text
@@ -92,13 +89,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
// Loop through links to pass them in order to the subtokenizer.
while let Some(index) = link_index {
let enter = &events[index];
+ let link_curr = enter.link.as_ref().expect("expected link");
assert_eq!(enter.event_type, EventType::Enter);
let span = span::Span {
start_index: enter.index,
end_index: events[index + 1].index,
};
- if enter.previous != None {
+ if link_curr.previous != None {
tokenizer.define_skip(&enter.point, enter.index);
}
@@ -110,10 +108,10 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
result = tokenizer.push(
span::codes(&parse_state.codes, &span),
func,
- enter.next == None,
+ link_curr.next == None,
);
assert!(result.1.is_none(), "expected no remainder");
- link_index = enter.next;
+ link_index = link_curr.next;
}
// Now, loop through all subevents to figure out which parts
@@ -133,10 +131,10 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
{
slices.push((link_index, slice_start));
slice_start = subindex;
- link_index = events[link_index].next.unwrap();
+ link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
}
- if subevent.content_type.is_some() {
+ if subevent.link.is_some() {
// Need to call `subtokenize` again.
done = false;
}
@@ -145,15 +143,18 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> bool {
// its index to account for the shifted events.
// If it points to a next event, we also change the next event’s
// reference back to *this* event.
- if let Some(next) = subevent.next {
- // The `index` in `events` where the current link is,
- // minus 2 events (the enter and exit) for each removed
- // link.
- let shift = link_index - (slices.len() * 2);
- subevent.next = Some(next + shift);
- let next_ev = &mut tokenizer.events[next];
- let previous = next_ev.previous.unwrap();
- next_ev.previous = Some(previous + shift);
+ if let Some(sublink_curr) = &mut subevent.link {
+ if let Some(next) = sublink_curr.next {
+ // The `index` in `events` where the current link is,
+ // minus 2 events (the enter and exit) for each removed
+ // link.
+ let shift = link_index - (slices.len() * 2);
+ sublink_curr.next = sublink_curr.next.map(|next| next + shift);
+ let next_ev = &mut tokenizer.events[next];
+ let sublink_next = next_ev.link.as_mut().unwrap();
+ sublink_next.previous =
+ sublink_next.previous.map(|previous| previous + shift);
+ }
}
subindex += 1;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 5d03c92..b745ac8 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -66,6 +66,14 @@ pub enum EventType {
Exit,
}
+/// A link to another event.
+#[derive(Debug, Clone)]
+pub struct Link {
+ pub previous: Option<usize>,
+ pub next: Option<usize>,
+ pub content_type: ContentType,
+}
+
/// Something semantic happening somewhere.
#[derive(Debug, Clone)]
pub struct Event {
@@ -73,9 +81,7 @@ pub struct Event {
pub token_type: Token,
pub point: Point,
pub index: usize,
- pub previous: Option<usize>,
- pub next: Option<usize>,
- pub content_type: Option<ContentType>,
+ pub link: Option<Link>,
}
/// The essence of the state machine are functions: `StateFn`.
@@ -357,19 +363,28 @@ impl<'a> Tokenizer<'a> {
/// Mark the start of a semantic label.
pub fn enter(&mut self, token_type: Token) {
- self.enter_with_content(token_type, None);
+ self.enter_with_link(token_type, None);
+ }
+
+ pub fn enter_with_content(&mut self, token_type: Token, content_type_opt: Option<ContentType>) {
+ self.enter_with_link(
+ token_type,
+ content_type_opt.map(|content_type| Link {
+ content_type,
+ previous: None,
+ next: None,
+ }),
+ );
}
- pub fn enter_with_content(&mut self, token_type: Token, content_type: Option<ContentType>) {
+ pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) {
log::debug!("enter: `{:?}` ({:?})", token_type, self.point);
self.events.push(Event {
event_type: EventType::Enter,
token_type: token_type.clone(),
point: self.point.clone(),
index: self.index,
- previous: None,
- next: None,
- content_type,
+ link,
});
self.stack.push(token_type);
}
@@ -423,9 +438,7 @@ impl<'a> Tokenizer<'a> {
token_type,
point,
index,
- previous: None,
- next: None,
- content_type: None,
+ link: None,
});
}
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index b1b5064..6085f18 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -35,9 +35,11 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, usize, usize)]) {
let mut index = 0;
while index < events.len() {
- let event = &mut events[index];
- event.previous = event.previous.map(map);
- event.next = event.next.map(map);
+ if let Some(link) = &mut events[index].link {
+ link.previous = link.previous.map(map);
+ link.next = link.next.map(map);
+ }
+
index += 1;
}
}