aboutsummaryrefslogtreecommitdiffstats
path: root/src/subtokenize.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-10 16:47:43 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-10 16:47:43 +0200
commit17f4eec55ad0a5f74aedbcff6c2f0119ad52e584 (patch)
tree1839c796de977421456d1b9006f2f2c1e23cf809 /src/subtokenize.rs
parent5133042973f31a3992f216e591d840bb491bfd45 (diff)
downloadmarkdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.gz
markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.bz2
markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.zip
Add text content type
* Add character reference and character escapes in text * Add recursive subtokenization
Diffstat (limited to 'src/subtokenize.rs')
-rw-r--r--src/subtokenize.rs14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index adf843f..d72eb69 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -1,37 +1,43 @@
use crate::content::content::start as content;
use crate::content::string::start as string;
+use crate::content::text::start as text;
use crate::tokenizer::{
Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
};
use crate::util::{slice_codes, Span};
use std::collections::HashMap;
-pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> Vec<Event> {
+pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut events = events;
let mut index = 0;
// Map of first chunks its tokenizer.
let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new();
// Map of chunks to their head and corresponding range of events.
let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();
+ let mut done = true;
while index < events.len() {
let event = &events[index];
// Find each first opening chunk.
if (event.token_type == TokenType::ChunkString
- || event.token_type == TokenType::ContentChunk) &&
+ || event.token_type == TokenType::ChunkText
+ || event.token_type == TokenType::ChunkContent) &&
event.event_type == EventType::Enter &&
// No need to enter linked events again.
event.previous == None
{
+ done = false;
// Index into `events` pointing to a chunk.
let mut index_opt: Option<usize> = Some(index);
// Subtokenizer.
let mut tokenizer = Tokenizer::new(event.point.clone(), event.index);
// Substate.
let mut result: StateFnResult = (
- State::Fn(Box::new(if event.token_type == TokenType::ContentChunk {
+ State::Fn(Box::new(if event.token_type == TokenType::ChunkContent {
content
+ } else if event.token_type == TokenType::ChunkText {
+ text
} else {
string
})),
@@ -129,5 +135,5 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> Vec<Event> {
index -= 1;
}
- events
+ (events, done)
}