Add support for sharing identifiers, references before definitions

author: Titus Wormer <tituswormer@gmail.com> 2022-06-29 10:26:39 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-29 10:26:39 +0200
commit: 7721f210c16e19b1c2af90f69130386b89bb5104 (patch)
tree: c47ff3d9c974ccd0c81a2c5b8ccbce9f7635975d
parent: 7bb1008f508f61b51dd80086a91ada347be36c68 (diff)
download: markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.tar.gz
markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.tar.bz2
markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.zip
6 files changed, 62 insertions, 46 deletions
diff --git a/readme.md b/readme.md
index 155e71c..1626a6a 100644
--- a/readme.md
+++ b/readme.md
@@ -180,6 +180,8 @@ cargo doc --document-private-items
 - [ ] (1) Remove todos in `span.rs` if not needed
 - [ ] (1) Get markers from constructs (`string`, `text`)
 - [ ] (3) Clean compiler
+- [ ] (3) Read through rust docs to figure out what useful functions there are,
+      and fix stuff I’m doing manually now
 - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
       how to integrate with streams or so?
 - [ ] (1) Go through clippy rules, and such, to add strict code styles
diff --git a/src/compiler.rs b/src/compiler.rs
index bb7aedc..6f4d1a6 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -91,6 +91,14 @@ struct Media {
     title: Option<String>,
 }
 
+/// To do.
+#[derive(Debug, Clone, PartialEq)]
+struct DefinitionInfo {
+    id: Option<String>,
+    destination: Option<String>,
+    title: Option<String>,
+}
+
 /// Configuration (optional).
 #[derive(Default, Debug)]
 pub struct Options {
@@ -226,17 +234,39 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
     let mut media_stack: Vec<Media> = vec![];
 
     // let mut slurp_all_line_endings = false;
+    let mut definition: Option<DefinitionInfo> = None;
+
+    // To do: actually do a compile pass, so that `buffer`, `resume`, etc can be used.
     while index < events.len() {
         let event = &events[index];
 
-        if event.event_type == EventType::Exit
+        // Find the used line ending style.
+        if line_ending_inferred.is_none()
+            && event.event_type == EventType::Exit
             && (event.token_type == TokenType::BlankLineEnding
                 || event.token_type == TokenType::CodeTextLineEnding
                 || event.token_type == TokenType::LineEnding)
         {
             let codes = codes_from_span(codes, &from_exit_event(events, index));
             line_ending_inferred = Some(LineEnding::from_code(*codes.first().unwrap()));
-            break;
+        }
+
+        if event.event_type == EventType::Enter {
+            if event.token_type == TokenType::Definition {
+                definition = Some(DefinitionInfo {
+                    id: None,
+                    destination: None,
+                    title: None,
+                });
+            }
+        } else if event.token_type == TokenType::Definition {
+            definition = None;
+        } else if event.token_type == TokenType::DefinitionLabelString
+            || event.token_type == TokenType::DefinitionDestinationString
+            || event.token_type == TokenType::DefinitionTitleString
+        {
+            let slice = serialize(codes, &from_exit_event(events, index), false);
+            println!("set: {:?} {:?}", slice, definition);
         }
 
         index += 1;
@@ -250,7 +280,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
         LineEnding::LineFeed
     };
 
-    index = 0;
+    let mut index = 0;
 
     while index < events.len() {
         let event = &events[index];
@@ -483,7 +513,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
                     let label = media.label.unwrap();
                     let buf = buf_tail_mut(buffers);
                     // To do: get from definition.
-                    let destination = media.destination.unwrap();
+                    let destination = media.destination.unwrap_or_else(|| "".to_string());
                     let title = if let Some(title) = media.title {
                         format!(" title=\"{}\"", title)
                     } else {
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 581e5e9..f43a740 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -413,13 +413,8 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// [a]: z
 /// ```
 fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
-    // let label_start = tokenizer
-    //     .label_start_stack
-    //     .get_mut(info.label_start_index)
-    //     .unwrap();
-    // To do: figure out if defined or not.
-    let defined = false;
-    println!("to do: is `{:?}` defined?", info);
+    let defined = tokenizer.parse_state.definitions.contains(&info.media.id);
+
     match code {
         // Resource (`[asd](fgh)`)?
         Code::Char('(') => tokenizer.attempt(resource, move |is_ok| {
@@ -487,10 +482,6 @@ fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat
 /// [a]: z
 /// ```
 fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
-    println!(
-        "ok res, ref full, ref, collapsed, or ref shortcut: {:?}",
-        info.media
-    );
     // Remove this one and everything after it.
     let mut left: Vec<LabelStart> = tokenizer
         .label_start_stack
@@ -725,8 +716,8 @@ fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
         },
         false,
     ));
-    println!("to do: is `{:?}` defined?", id);
-    let defined = false;
+    let defined = tokenizer.parse_state.definitions.contains(&id);
+    // To do: set `id` on the media somehow?
 
     if defined {
         (State::Ok, Some(vec![code]))
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 546712f..0d3ede0 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -33,12 +33,13 @@ use crate::util::{
     normalize_identifier::normalize_identifier,
     span::{from_exit_event, serialize},
 };
+use std::collections::HashSet;
 
 /// Turn `codes` as the flow content type into events.
-pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec<Event> {
+pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> {
     let mut tokenizer = Tokenizer::new(point, index, parse_state);
-
     tokenizer.push(&parse_state.codes, Box::new(start), true);
+    let mut next_definitions: HashSet<String> = HashSet::new();
 
     let mut index = 0;
 
@@ -48,15 +49,14 @@ pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec<Event>
         if event.event_type == EventType::Exit
             && event.token_type == TokenType::DefinitionLabelString
         {
-            let id = normalize_identifier(
+            next_definitions.insert(normalize_identifier(
                 serialize(
                     &parse_state.codes,
                     &from_exit_event(&tokenizer.events, index),
                     false,
                 )
                 .as_str(),
-            );
-            println!("to do: use definition identifier {:?}", id);
+            ));
         }
 
         index += 1;
@@ -64,6 +64,8 @@ pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec<Event>
 
     let mut result = (tokenizer.events, false);
 
+    parse_state.definitions = next_definitions;
+
     while !result.1 {
         result = subtokenize(result.0, parse_state);
     }
diff --git a/src/parser.rs b/src/parser.rs
index 32b7f36..f11f0d1 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,24 +4,32 @@
 use crate::content::flow::flow;
 use crate::tokenizer::{as_codes, Code, Event, Point};
 
+/// To do: could we do without `HashSet`, so we don’t need `std`?
+use std::collections::HashSet;
+
+/// Information needed, in all content types, when parsing markdown.
+///
+/// Importantly, this contains a set of known definitions.
+/// It also references the input value as [`Code`][]s.
+#[derive(Debug)]
 pub struct ParseState {
-    /// To do.
+    /// List of codes.
     pub codes: Vec<Code>,
-    /// To do.
-    pub definitions: Vec<String>,
+    /// Set of defined identifiers.
+    pub definitions: HashSet<String>,
 }
 
 /// Turn a string of markdown into events.
 ///
 /// Passes the codes back so the compiler can access the source.
 pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
-    let parse_state = ParseState {
+    let mut parse_state = ParseState {
         codes: as_codes(value),
-        definitions: vec![],
+        definitions: HashSet::new(),
     };
 
     let events = flow(
-        &parse_state,
+        &mut parse_state,
         Point {
             line: 1,
             column: 1,
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 8136306..db9c887 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -71,31 +71,14 @@ impl EditMap {
         assert!(!self.consumed, "cannot consume after consuming");
         self.consumed = true;
 
-        let mut index = 0;
-
-        while index < events.len() {
-            let event = &events[index];
-            println!(
-                "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
-                index,
-                event.event_type,
-                event.token_type,
-                event.content_type,
-                event.previous,
-                event.next
-            );
-            index += 1;
-        }
-
         indices.sort_unstable();
 
         let mut jumps: Vec<(usize, isize)> = vec![];
         let mut index_into_indices = 0;
-        let mut shift: isize = 0;
+        let mut shift = 0;
         while index_into_indices < indices.len() {
             let index = *indices[index_into_indices];
             let edit = self.map.get(&index).unwrap();
-            println!("?? {:?} {:?} {:?}", shift, edit.1.len(), edit.0);
 
             #[allow(clippy::pedantic)]
             let next = shift + (edit.1.len() as isize) - (edit.0 as isize);
author	Titus Wormer <tituswormer@gmail.com>	2022-06-29 10:26:39 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-29 10:26:39 +0200
commit	7721f210c16e19b1c2af90f69130386b89bb5104 (patch)
tree	c47ff3d9c974ccd0c81a2c5b8ccbce9f7635975d
parent	7bb1008f508f61b51dd80086a91ada347be36c68 (diff)
download	markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.tar.gz markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.tar.bz2 markdown-rs-7721f210c16e19b1c2af90f69130386b89bb5104.zip