Refactor to move some code to `event.rs`

author: Titus Wormer <tituswormer@gmail.com> 2022-08-11 14:53:42 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-11 14:53:42 +0200
commit: d87dc75500a2e73d9521135054b42c18e6eae987 (patch)
tree: 4bc3f4a72894db3ec5ed4069841c6be5be69713d /src/tokenizer.rs
parent: 6eb2f644057f371841fe25330a57ee185f91c7af (diff)
download: markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.tar.gz
markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.tar.bz2
markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.zip
1 files changed, 38 insertions, 96 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index baad6ed..b48351d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1,6 +1,6 @@
 //! The tokenizer glues states from the state machine together.
 //!
-//! It facilitates everything needed to turn codes into tokens and events with
+//! It facilitates everything needed to turn codes into tokens and  with
 //! a state machine.
 //! It also enables logic needed for parsing markdown, such as an [`attempt`][]
 //! to parse something, which can succeed or, when unsuccessful, revert the
@@ -12,22 +12,11 @@
 //! [`check`]: Tokenizer::check
 
 use crate::constant::TAB_SIZE;
+use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS};
 use crate::parser::ParseState;
-use crate::state::{call, Name, State};
-use crate::token::{Token, VOID_TOKENS};
+use crate::state::{call, Name as StateName, State};
 use crate::util::edit_map::EditMap;
 
-/// Embedded content type.
-#[derive(Debug, Clone, PartialEq)]
-pub enum ContentType {
-    /// Represents [flow content][crate::content::flow].
-    Flow,
-    /// Represents [string content][crate::content::string].
-    String,
-    /// Represents [text content][crate::content::text].
-    Text,
-}
-
 /// How to handle a byte.
 #[derive(Debug, PartialEq)]
 pub enum ByteAction {
@@ -41,53 +30,6 @@ pub enum ByteAction {
     Ignore,
 }
 
-/// A location in the document (`line`/`column`/`offset`).
-///
-/// The interface for the location in the document comes from unist `Point`:
-/// <https://github.com/syntax-tree/unist#point>.
-#[derive(Debug, Clone)]
-pub struct Point {
-    /// 1-indexed line number.
-    pub line: usize,
-    /// 1-indexed column number.
-    /// This is increases up to a tab stop for tabs.
-    /// Some editors count tabs as 1 character, so this position is not the
-    /// same as editors.
-    pub column: usize,
-    /// 0-indexed position in the document.
-    ///
-    /// Also an `index` into `bytes`.
-    pub index: usize,
-    /// Virtual step on the same `index`.
-    pub vs: usize,
-}
-
-/// Possible event types.
-#[derive(Debug, PartialEq, Clone)]
-pub enum EventType {
-    /// The start of something.
-    Enter,
-    /// The end of something.
-    Exit,
-}
-
-/// A link to another event.
-#[derive(Debug, Clone)]
-pub struct Link {
-    pub previous: Option<usize>,
-    pub next: Option<usize>,
-    pub content_type: ContentType,
-}
-
-/// Something semantic happening somewhere.
-#[derive(Debug, Clone)]
-pub struct Event {
-    pub event_type: EventType,
-    pub token_type: Token,
-    pub point: Point,
-    pub link: Option<Link>,
-}
-
 /// Callback that can be registered and is called when the tokenizer is done.
 ///
 /// Resolvers are supposed to change the list of events, because parsing is
@@ -205,15 +147,15 @@ pub struct TokenizeState<'a> {
     pub document_paragraph_before: bool,
 
     // Couple of very frequent settings for parsing whitespace.
-    pub space_or_tab_eol_content_type: Option<ContentType>,
+    pub space_or_tab_eol_content_type: Option<Content>,
     pub space_or_tab_eol_connect: bool,
     pub space_or_tab_eol_ok: bool,
     pub space_or_tab_connect: bool,
-    pub space_or_tab_content_type: Option<ContentType>,
+    pub space_or_tab_content_type: Option<Content>,
     pub space_or_tab_min: usize,
     pub space_or_tab_max: usize,
     pub space_or_tab_size: usize,
-    pub space_or_tab_token: Token,
+    pub space_or_tab_token: Name,
 
     // Couple of media related fields.
     /// Stack of label (start) that could form images and links.
@@ -250,15 +192,15 @@ pub struct TokenizeState<'a> {
     /// Index.
     pub end: usize,
     /// Slot for a token type.
-    pub token_1: Token,
+    pub token_1: Name,
     /// Slot for a token type.
-    pub token_2: Token,
+    pub token_2: Name,
     /// Slot for a token type.
-    pub token_3: Token,
+    pub token_3: Name,
     /// Slot for a token type.
-    pub token_4: Token,
+    pub token_4: Name,
     /// Slot for a token type.
-    pub token_5: Token,
+    pub token_5: Name,
 }
 
 /// A tokenizer itself.
@@ -290,7 +232,7 @@ pub struct Tokenizer<'a> {
     /// Hierarchy of semantic labels.
     ///
     /// Tracked to make sure everything’s valid.
-    pub stack: Vec<Token>,
+    pub stack: Vec<Name>,
     /// Edit map, to batch changes.
     pub map: EditMap,
     /// List of attached resolvers, which will be called when done feeding,
@@ -363,12 +305,12 @@ impl<'a> Tokenizer<'a> {
                 space_or_tab_min: 0,
                 space_or_tab_max: 0,
                 space_or_tab_size: 0,
-                space_or_tab_token: Token::SpaceOrTab,
-                token_1: Token::Data,
-                token_2: Token::Data,
-                token_3: Token::Data,
-                token_4: Token::Data,
-                token_5: Token::Data,
+                space_or_tab_token: Name::SpaceOrTab,
+                token_1: Name::Data,
+                token_2: Name::Data,
+                token_3: Name::Data,
+                token_4: Name::Data,
+                token_5: Name::Data,
             },
             map: EditMap::new(),
             interrupt: false,
@@ -491,13 +433,13 @@ impl<'a> Tokenizer<'a> {
     }
 
     /// Mark the start of a semantic label.
-    pub fn enter(&mut self, token_type: Token) {
-        self.enter_with_link(token_type, None);
+    pub fn enter(&mut self, name: Name) {
+        self.enter_with_link(name, None);
     }
 
-    pub fn enter_with_content(&mut self, token_type: Token, content_type_opt: Option<ContentType>) {
+    pub fn enter_with_content(&mut self, name: Name, content_type_opt: Option<Content>) {
         self.enter_with_link(
-            token_type,
+            name,
             content_type_opt.map(|content_type| Link {
                 content_type,
                 previous: None,
@@ -506,26 +448,26 @@ impl<'a> Tokenizer<'a> {
         );
     }
 
-    pub fn enter_with_link(&mut self, token_type: Token, link: Option<Link>) {
+    pub fn enter_with_link(&mut self, name: Name, link: Option<Link>) {
         let mut point = self.point.clone();
         move_point_back(self, &mut point);
 
-        log::debug!("enter:   `{:?}`", token_type);
+        log::debug!("enter:   `{:?}`", name);
         self.events.push(Event {
-            event_type: EventType::Enter,
-            token_type: token_type.clone(),
+            kind: Kind::Enter,
+            name: name.clone(),
             point,
             link,
         });
-        self.stack.push(token_type);
+        self.stack.push(name);
     }
 
     /// Mark the end of a semantic label.
-    pub fn exit(&mut self, token_type: Token) {
+    pub fn exit(&mut self, name: Name) {
         let current_token = self.stack.pop().expect("cannot close w/o open tokens");
 
         debug_assert_eq!(
-            current_token, token_type,
+            current_token, name,
             "expected exit token to match current token"
         );
 
@@ -533,18 +475,18 @@ impl<'a> Tokenizer<'a> {
         let mut point = self.point.clone();
 
         debug_assert!(
-            current_token != previous.token_type
+            current_token != previous.name
                 || previous.point.index != point.index
                 || previous.point.vs != point.vs,
             "expected non-empty token"
         );
 
-        if VOID_TOKENS.iter().any(|d| d == &token_type) {
+        if VOID_EVENTS.iter().any(|d| d == &name) {
             debug_assert!(
-                current_token == previous.token_type,
+                current_token == previous.name,
                 "expected token to be void (`{:?}`), instead of including `{:?}`",
                 current_token,
-                previous.token_type
+                previous.name
             );
         }
 
@@ -556,10 +498,10 @@ impl<'a> Tokenizer<'a> {
             move_point_back(self, &mut point);
         }
 
-        log::debug!("exit:    `{:?}`", token_type);
+        log::debug!("exit:    `{:?}`", name);
         self.events.push(Event {
-            event_type: EventType::Exit,
-            token_type,
+            kind: Kind::Exit,
+            name,
             point,
             link: None,
         });
@@ -595,7 +537,7 @@ impl<'a> Tokenizer<'a> {
 
     /// Parse with `name` and its future states, to see if that results in
     /// [`State::Ok`][] or [`State::Nok`][], then revert in both cases.
-    pub fn check(&mut self, name: Name, ok: State, nok: State) -> State {
+    pub fn check(&mut self, name: StateName, ok: State, nok: State) -> State {
         // Always capture (and restore) when checking.
         // No need to capture (and restore) when `nok` is `State::Nok`, because the
         // parent attempt will do it.
@@ -614,7 +556,7 @@ impl<'a> Tokenizer<'a> {
     /// Parse with `name` and its future states, to see if that results in
     /// [`State::Ok`][] or [`State::Nok`][], revert in the case of
     /// `State::Nok`.
-    pub fn attempt(&mut self, name: Name, ok: State, nok: State) -> State {
+    pub fn attempt(&mut self, name: StateName, ok: State, nok: State) -> State {
         // Always capture (and restore) when checking.
         // No need to capture (and restore) when `nok` is `State::Nok`, because the
         // parent attempt will do it.
author	Titus Wormer <tituswormer@gmail.com>	2022-08-11 14:53:42 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-11 14:53:42 +0200
commit	d87dc75500a2e73d9521135054b42c18e6eae987 (patch)
tree	4bc3f4a72894db3ec5ed4069841c6be5be69713d /src/tokenizer.rs
parent	6eb2f644057f371841fe25330a57ee185f91c7af (diff)
download	markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.tar.gz markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.tar.bz2 markdown-rs-d87dc75500a2e73d9521135054b42c18e6eae987.zip