aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs111
1 files changed, 106 insertions, 5 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7b71308..a692a4d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -15,6 +15,7 @@
use std::collections::HashMap;
use crate::constant::TAB_SIZE;
+use crate::parser::ParseState;
/// Semantic label of a span.
// To do: figure out how to share this so extensions can add their own stuff,
@@ -1073,6 +1074,32 @@ pub enum TokenType {
/// ^^^
/// ```
HtmlTextData,
+ /// To do,
+ LabelImage,
+ /// To do,
+ LabelImageMarker,
+ /// To do,
+ LabelLink,
+ /// To do,
+ LabelMarker,
+ LabelEnd,
+ Resource,
+ ResourceMarker,
+ ResourceDestination,
+ ResourceDestinationLiteral,
+ ResourceDestinationLiteralMarker,
+ ResourceDestinationRaw,
+ ResourceDestinationString,
+ ResourceTitle,
+ ResourceTitleMarker,
+ ResourceTitleString,
+ Reference,
+ ReferenceMarker,
+ ReferenceString,
+ Link,
+ Image,
+ Label,
+ LabelText,
/// Line ending.
///
/// ## Info
@@ -1243,6 +1270,9 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;
/// In certain cases, it can also yield back up parsed codes that were passed down.
pub type StateFnResult = (State, Option<Vec<Code>>);
+/// To do.
+pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>;
+
/// The result of a state.
pub enum State {
/// There is a future state: a boxed [`StateFn`][] to pass the next code to.
@@ -1253,6 +1283,30 @@ pub enum State {
Nok,
}
+/// To do.
+#[derive(Debug)]
+pub struct LabelStart {
+ /// To do.
+ pub start: (usize, usize),
+ /// A boolean used internally to figure out if a label start link can’t be
+ /// used (because links in links are incorrect).
+ pub inactive: bool,
+ /// A boolean used internally to figure out if a label is balanced: they’re
+ /// not media, it’s just balanced braces.
+ pub balanced: bool,
+}
+
+/// To do.
+#[derive(Debug)]
+pub struct Media {
+ /// To do.
+ pub start: (usize, usize),
+ /// To do.
+ pub end: (usize, usize),
+ /// To do.
+ pub id: String,
+}
+
/// The internal state of a tokenizer, not to be confused with states from the
/// state machine, this instead is all the information about where we currently
/// are and what’s going on.
@@ -1272,9 +1326,10 @@ struct InternalState {
point: Point,
}
+// #[derive(Debug)]
+
/// A tokenizer itself.
-#[derive(Debug)]
-pub struct Tokenizer {
+pub struct Tokenizer<'a> {
column_start: HashMap<usize, usize>,
/// Track whether a character is expected to be consumed, and whether it’s
/// actually consumed
@@ -1295,11 +1350,22 @@ pub struct Tokenizer {
index: usize,
/// Current relative and absolute place in the file.
point: Point,
+ /// To do.
+ pub parse_state: &'a ParseState,
+ /// To do.
+ pub label_start_stack: Vec<LabelStart>,
+ /// To do.
+ pub label_start_list_loose: Vec<LabelStart>,
+ /// To do.
+ pub media_list: Vec<Media>,
+ /// To do.
+ resolvers: Vec<Box<Resolver>>,
+ resolver_ids: Vec<String>,
}
-impl Tokenizer {
+impl<'a> Tokenizer<'a> {
/// Create a new tokenizer.
- pub fn new(point: Point, index: usize) -> Tokenizer {
+ pub fn new(point: Point, index: usize, parse_state: &'a ParseState) -> Tokenizer {
Tokenizer {
previous: Code::None,
current: Code::None,
@@ -1309,6 +1375,20 @@ impl Tokenizer {
point,
stack: vec![],
events: vec![],
+ parse_state,
+ label_start_stack: vec![],
+ label_start_list_loose: vec![],
+ media_list: vec![],
+ resolvers: vec![],
+ resolver_ids: vec![],
+ }
+ }
+
+ /// To do.
+ pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) {
+ if !self.resolver_ids.contains(&id) {
+ self.resolver_ids.push(id);
+ self.resolvers.push(resolver);
}
}
@@ -1582,7 +1662,8 @@ impl Tokenizer {
/// This is set up to support repeatedly calling `feed`, and thus streaming
/// markdown into the state machine, and normally pauses after feeding.
/// When `done: true` is passed, the EOF is fed.
- pub fn feed(
+ // To do: call this `feed_impl`, and rename `push` to `feed`?
+ fn feed(
&mut self,
codes: &[Code],
start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
@@ -1643,6 +1724,26 @@ impl Tokenizer {
check_statefn_result((state, None))
}
+
+ /// To do.
+ // To do: set a `drained` to prevent passing after draining?
+ pub fn push(
+ &mut self,
+ codes: &[Code],
+ start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ drain: bool,
+ ) -> StateFnResult {
+ let result = self.feed(codes, start, drain);
+
+ if drain {
+ while !self.resolvers.is_empty() {
+ let resolver = self.resolvers.remove(0);
+ self.events = resolver(self);
+ }
+ }
+
+ result
+ }
}
/// Internal utility to wrap states to also capture codes.