aboutsummaryrefslogtreecommitdiffstats
path: root/src/subtokenize.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/subtokenize.rs')
-rw-r--r--src/subtokenize.rs12
1 files changed, 9 insertions, 3 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index d72eb69..ee826b8 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -36,10 +36,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut result: StateFnResult = (
State::Fn(Box::new(if event.token_type == TokenType::ChunkContent {
content
- } else if event.token_type == TokenType::ChunkText {
- text
- } else {
+ } else if event.token_type == TokenType::ChunkString {
string
+ } else {
+ text
})),
None,
);
@@ -49,6 +49,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
// Loop through chunks to pass them in order to the subtokenizer.
while let Some(index_ptr) = index_opt {
let enter = &events[index_ptr];
+ assert_eq!(enter.event_type, EventType::Enter);
let span = Span {
start_index: enter.index,
end_index: events[index_ptr + 1].index,
@@ -119,6 +120,11 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
// from each slice and slices from events?
let mut index = events.len() - 1;
+ // To do: this is broken, because it can inject linked events, which point
+ // to their links through indices, and this messes with all indices.
+ // We should try walking front to end instead, keep a count of the shifted
+ // index.
+ // It’s a bit complex but should work?
while index > 0 {
let slice_opt = link_to_info.get(&index);