diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-05 13:31:46 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-05 13:31:46 +0200 |
commit | f2d62d446a7b1e03ef14b580e2aca0fad8aa23ef (patch) | |
tree | b2a837d99cb0e4a4de0bfd82051e10a2db8c6456 /src | |
parent | fd860a975b84da9a79abfa247787e6adbd5ea34c (diff) | |
download | markdown-rs-f2d62d446a7b1e03ef14b580e2aca0fad8aa23ef.tar.gz markdown-rs-f2d62d446a7b1e03ef14b580e2aca0fad8aa23ef.tar.bz2 markdown-rs-f2d62d446a7b1e03ef14b580e2aca0fad8aa23ef.zip |
Refactor to do some to dos
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler.rs | 2 | ||||
-rw-r--r-- | src/construct/label_end.rs | 31 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 3 | ||||
-rw-r--r-- | src/lib.rs | 4 | ||||
-rw-r--r-- | src/parser.rs | 5 | ||||
-rw-r--r-- | src/subtokenize.rs | 5 | ||||
-rw-r--r-- | src/tokenizer.rs | 148 | ||||
-rw-r--r-- | src/util/edit_map.rs | 6 |
8 files changed, 156 insertions, 48 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 2c6fe68..7e47f95 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -544,7 +544,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { let mut definition_inside = false; // Handle all definitions first. - // We have to do two passes because we need to compile the events in + // We must do two passes because we need to compile the events in // definitions which come after references already. // // To speed things up, we collect the places we can jump over for the diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 6f747db..24ac3a5 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -531,9 +531,7 @@ fn nok(tokenizer: &mut Tokenizer, _code: Code, label_start_index: usize) -> Stat .label_start_stack .get_mut(label_start_index) .unwrap(); - println!("just balanced braces: {:?}", label_start); label_start.balanced = true; - // To do: pop things off the list? (State::Nok, None) } @@ -708,22 +706,19 @@ fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } - // Always found, otherwise we don’t get here. - let start = start.unwrap(); - let end = end.unwrap(); - - let id = normalize_identifier(&serialize( - &tokenizer.parse_state.codes, - &Span { - start_index: start, - end_index: end, - }, - false, - )); - let defined = tokenizer.parse_state.definitions.contains(&id); - // To do: set `id` on the media somehow? - - if defined { + if tokenizer + .parse_state + .definitions + .contains(&normalize_identifier(&serialize( + &tokenizer.parse_state.codes, + &Span { + // Always found, otherwise we don’t get here. + start_index: start.unwrap(), + end_index: end.unwrap(), + }, + false, + ))) + { (State::Ok, Some(vec![code])) } else { (State::Nok, None) diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 5ec278e..4f5e662 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -93,7 +93,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { // Enter:Paragraph let mut enter_next_index = exit_index + 3; - // To do: assert that `LineEnding` between? + // Find future `Paragraphs`. + // There will be `LineEnding` between. while enter_next_index < len && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph { @@ -51,6 +51,6 @@ pub fn micromark(value: &str) -> String { /// ``` #[must_use] pub fn micromark_with_options(value: &str, options: &Options) -> String { - let (events, codes) = parse(value); - compile(&events, &codes, options) + let (events, result) = parse(value); + compile(&events, &result.codes, options) } diff --git a/src/parser.rs b/src/parser.rs index 32689d6..69dd355 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -21,7 +21,7 @@ pub struct ParseState { /// Turn a string of markdown into events. /// /// Passes the codes back so the compiler can access the source. -pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) { +pub fn parse(value: &str) -> (Vec<Event>, ParseState) { let mut parse_state = ParseState { codes: parse_codes(value), definitions: HashSet::new(), @@ -37,6 +37,5 @@ pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) { 0, ); - // To do: pass whole `parse_state` back? - (events, parse_state.codes) + (events, parse_state) } diff --git a/src/subtokenize.rs b/src/subtokenize.rs index f3e9ae0..6b0460c 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -36,7 +36,7 @@ pub fn link(events: &mut [Event], index: usize) { link_to(events, index - 2, index); } -/// To do +/// Link two arbitrary [`Event`][]s together. pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { let prev = &mut events[pevious]; assert!( @@ -111,7 +111,7 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve ends.push(span.end_index); if enter.previous != None { - tokenizer.define_skip(&enter.point, span.start_index); + tokenizer.define_skip(&enter.point); } let func: Box<StateFn> = match result.0 { @@ -143,7 +143,6 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve // Find the first event that starts after the end we’re looking // for. - // To do: is this logic correct? if subevent.event_type == EventType::Enter && subevent.index >= ends[end_index] { let link = index_opt.unwrap(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f0f9ff0..34d6e9e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -15,8 +15,6 @@ use crate::parser::ParseState; use std::collections::HashMap; /// Semantic label of a span. -// To do: figure out how to share this so extensions can add their own stuff, -// though perhaps that’s impossible and we should inline all extensions? #[derive(Debug, Clone, PartialEq, Hash, Eq)] pub enum TokenType { /// Whole autolink. @@ -1590,14 +1588,119 @@ pub enum TokenType { /// ^ ^ ^ /// ``` ThematicBreakSequence, + /// Strong. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::content::text] + /// * **Content model**: + /// [`StrongSequence`][TokenType::StrongSequence], + /// [`StrongText`][TokenType::StrongText] + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | **a** + /// ^^^^^ + /// ``` Strong, + /// Strong sequence. + /// + /// ## Info + /// + /// * **Context**: + /// [`Strong`][TokenType::Strong] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | **a** + /// ^^ ^^ + /// ``` StrongSequence, + /// Strong text. + /// + /// ## Info + /// + /// * **Context**: + /// [`Strong`][TokenType::Strong] + /// * **Content model**: + /// [text content][crate::content::text] + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | **a** + /// ^ + /// ``` StrongText, + /// Emphasis. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::content::text] + /// * **Content model**: + /// [`EmphasisSequence`][TokenType::EmphasisSequence], + /// [`EmphasisText`][TokenType::EmphasisText] + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | *a* + /// ^^^ + /// ``` Emphasis, + /// Emphasis sequence. + /// + /// ## Info + /// + /// * **Context**: + /// [`Emphasis`][TokenType::Emphasis] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | *a* + /// ^ ^ + /// ``` EmphasisSequence, + /// Emphasis text. + /// + /// ## Info + /// + /// * **Context**: + /// [`Emphasis`][TokenType::Emphasis] + /// * **Content model**: + /// [text content][crate::content::text] + /// * **Construct**: + /// [`attention`][crate::construct::attention] + /// + /// ## Example + /// + /// ```markdown + /// > | *a* + /// ^ + /// ``` EmphasisText, - // To do: this is removed. - // Should it reuse something e.g., emphasis? Data? + /// Attention sequence. + /// + /// > 👉 **Note**: this is used while parsing but compiled away. AttentionSequence, } @@ -1759,19 +1862,29 @@ pub struct Tokenizer<'a> { index: usize, /// Current relative and absolute place in the file. point: Point, - /// To do. + /// List of attached resolvers, which will be called when done feeding, + /// to clean events. + resolvers: Vec<Box<Resolver>>, + /// List of names associated with attached resolvers. + resolver_ids: Vec<String>, + /// Shared parsing state across tokenizers. pub parse_state: &'a ParseState, - /// To do. + /// Stack of label (start) that could form images and links. + /// + /// Used when tokenizing [text content][crate::content::text]. pub label_start_stack: Vec<LabelStart>, - /// To do. + /// Stack of label (start) that cannot form images and links. + /// + /// Used when tokenizing [text content][crate::content::text]. pub label_start_list_loose: Vec<LabelStart>, - /// To do. - pub interrupt: bool, - /// To do. + /// Stack of images and links. + /// + /// Used when tokenizing [text content][crate::content::text]. pub media_list: Vec<Media>, - /// To do. - resolvers: Vec<Box<Resolver>>, - resolver_ids: Vec<String>, + /// Whether we would be interrupting something. + /// + /// Used when tokenizing [flow content][crate::content::flow]. + pub interrupt: bool, } impl<'a> Tokenizer<'a> { @@ -1797,7 +1910,7 @@ impl<'a> Tokenizer<'a> { } } - /// To do. + /// Register a resolver. pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) { if !self.resolver_ids.contains(&id) { self.resolver_ids.push(id); @@ -1805,6 +1918,7 @@ impl<'a> Tokenizer<'a> { } } + /// Register a resolver, before others. pub fn register_resolver_before(&mut self, id: String, resolver: Box<Resolver>) { if !self.resolver_ids.contains(&id) { self.resolver_ids.push(id); @@ -1823,12 +1937,10 @@ impl<'a> Tokenizer<'a> { /// /// This defines how much columns are increased when consuming a line /// ending. - /// `index` is currently not used (yet). - // To do: remove `index` as a parameter if not needed. - pub fn define_skip(&mut self, point: &Point, index: usize) { + pub fn define_skip(&mut self, point: &Point) { self.column_start.insert(point.line, point.column); self.account_for_potential_skip(); - log::debug!("position: define skip: `{:?}` ({:?})", point, index); + log::debug!("position: define skip: `{:?}`", point); } /// Increment the current positional info if we’re right after a line diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index eba667d..ebc20b7 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -67,6 +67,7 @@ impl EditMap { pub fn add(&mut self, index: usize, remove: usize, add: Vec<Event>) { add_impl(self, index, remove, add, false); } + /// Create an edit: but insert `add` before existing additions. pub fn add_before(&mut self, index: usize, remove: usize, add: Vec<Event>) { add_impl(self, index, remove, add, true); } @@ -134,7 +135,7 @@ impl EditMap { } } -/// To do. +/// Create an edit. fn add_impl( edit_map: &mut EditMap, index: usize, @@ -145,9 +146,10 @@ fn add_impl( assert!(!edit_map.consumed, "cannot add after consuming"); if let Some((curr_remove, mut curr_add)) = edit_map.map.remove(&index) { - // To do: these might have to be split in several chunks instead + // To do: these might have to be split into several chunks instead // of one, if links in `curr_add` are supported. remove += curr_remove; + if before { add.append(&mut curr_add); } else { |