Add code (text)

author: Titus Wormer <tituswormer@gmail.com> 2022-06-15 18:17:01 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-15 18:17:01 +0200
commit: acc35758778bfda5cb01951533868eb8baa2e2d2 (patch)
tree: 82ded1c1f88d04a1c37e40822ad7e5a6bba8a717
parent: 75dcb48f78a8a798fde525d2d39e20cffec48e50 (diff)
download: markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.tar.gz
markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.tar.bz2
markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.zip
13 files changed, 507 insertions, 93 deletions
diff --git a/readme.md b/readme.md
index b7fcbfe..07b224e 100644
--- a/readme.md
+++ b/readme.md
@@ -109,7 +109,7 @@ cargo doc --document-private-items
 - [x] character reference
 - [x] code (fenced)
 - [x] code (indented)
-- [ ] (1) code (text)
+- [x] (1) code (text)
 - [ ] (3) content
 - [ ] (3) definition
 - [ ] (1) hard break escape
@@ -146,7 +146,7 @@ cargo doc --document-private-items
   - [x] autolink
   - [x] character escape
   - [x] character reference
-  - [ ] code (text)
+  - [x] code (text)
   - [ ] hard break escape
   - [x] html (text)
   - [ ] label end
diff --git a/src/compiler.rs b/src/compiler.rs
index 6f0215c..d3d935b 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -96,8 +96,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
     // let mut slurp_all_line_endings = false;
 
-    println!("events: {:#?}", events);
-
     while index < events.len() {
         let event = &events[index];
         let token_type = &event.token_type;
@@ -133,6 +131,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                         ignore_encode = true;
                     }
                 }
+                TokenType::CodeText => {
+                    buf_tail_mut(buffers).push("<code>".to_string());
+                    buffer(buffers);
+                }
                 TokenType::Content
                 | TokenType::AtxHeading
                 | TokenType::AtxHeadingSequence
@@ -152,6 +154,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::CodeFencedFence
                 | TokenType::CodeFencedFenceSequence
                 | TokenType::CodeFencedFenceWhitespace
+                | TokenType::CodeTextSequence
+                | TokenType::CodeTextData
+                | TokenType::CodeTextLineEnding
                 | TokenType::Data
                 | TokenType::CharacterEscape
                 | TokenType::CharacterEscapeMarker
@@ -181,6 +186,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::Whitespace
                 | TokenType::CodeFencedFenceSequence
                 | TokenType::CodeFencedFenceWhitespace
+                | TokenType::CodeTextSequence
                 | TokenType::CharacterEscape
                 | TokenType::CharacterEscapeMarker
                 | TokenType::CharacterReference
@@ -264,6 +270,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                         false,
                     )));
                 }
+
                 // `AtxHeadingWhitespace` is ignored after the opening sequence,
                 // before the closing sequence, and after the closing sequence.
                 // But it is used around intermediate sequences.
@@ -290,7 +297,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     }
                 }
                 TokenType::AtxHeadingText => {
-                    println!("text: {:?}", atx_heading_buffer);
                     if let Some(ref buf) = atx_heading_buffer {
                         if !buf.is_empty() {
                             buf_tail_mut(buffers).push(encode(buf));
@@ -301,7 +307,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     }
 
                     let slice = encode(&serialize(codes, &from_exit_event(events, index), false));
-                    println!("slice: {:?}", slice);
                     buf_tail_mut(buffers).push(slice);
                 }
                 TokenType::AtxHeading => {
@@ -340,8 +345,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     // } else
                     if slurp_one_line_ending {
                         slurp_one_line_ending = false;
-                    // } else if code_text_inside {
-                    //     buf_tail_mut(buffers).push(" ".to_string());
                     } else {
                         buf_tail_mut(buffers).push(encode(&serialize(
                             codes,
@@ -378,8 +381,33 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
                     character_reference_kind = None;
                 }
+                TokenType::CodeText => {
+                    let result = resume(buffers);
+                    let mut chars = result.chars();
+                    let mut trim = false;
+
+                    if Some(' ') == chars.next() && Some(' ') == chars.next_back() {
+                        let mut next = chars.next();
+                        while next != None && !trim {
+                            if Some(' ') != next {
+                                trim = true;
+                            }
+                            next = chars.next();
+                        }
+                    }
+
+                    buf_tail_mut(buffers).push(if trim {
+                        result[1..(result.len() - 1)].to_string()
+                    } else {
+                        result
+                    });
+                    buf_tail_mut(buffers).push("</code>".to_string());
+                }
+                TokenType::CodeTextLineEnding => {
+                    buf_tail_mut(buffers).push(" ".to_string());
+                }
                 // This branch below currently acts as the resulting `data` tokens.
-                TokenType::Data | TokenType::CharacterEscapeValue => {
+                TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
                     // last_was_tag = false;
                     buf_tail_mut(buffers).push(encode(&serialize(
                         codes,
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index c852e8d..12c8bd6 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -66,10 +66,10 @@
 //! The `info` and `meta` parts are interpreted as the [string][] content type.
 //! That means that character escapes and character reference are allowed.
 //!
-//! In markdown, it is also possible to use code (text) in the [text][] content
-//! type.
+//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! [text][] content type.
 //! It is also possible to create code with the
-//! [code (indented)][code-indented] construct.
+//! [code (indented)][code_indented] construct.
 //! That construct is less explicit, different from code (text), and has no
 //! support for specifying the programming language, so it is recommended to
 //! use code (fenced) instead of code (indented).
@@ -82,11 +82,10 @@
 //! [flow]: crate::content::flow
 //! [string]: crate::content::string
 //! [text]: crate::content::text
-//! [code-indented]: crate::construct::code_indented
+//! [code_indented]: crate::construct::code_indented
+//! [code_text]: crate::construct::code_text
 //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-//!
-//! <!-- To do: link `code_text` -->
 
 use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
 use crate::construct::partial_whitespace::start as whitespace;
@@ -251,14 +250,12 @@ fn info_inside(
 ) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            println!("to do: subtokenize: {:?}", codes);
             tokenizer.exit(TokenType::ChunkString);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
             tokenizer.exit(TokenType::CodeFencedFence);
             at_break(tokenizer, info, code)
         }
         Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            println!("to do: subtokenize: {:?}", codes);
             tokenizer.exit(TokenType::ChunkString);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
             tokenizer.attempt(
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 936f174..55b8901 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -18,9 +18,9 @@
 //! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code`
 //! element*][html-code] in the HTML spec for more info.
 //!
-//! In markdown, it is also possible to use code (text) in the text content
-//! type.
-//! It is also possible to create code with the [code (fenced)][code-fenced]
+//! In markdown, it is also possible to use [code (text)][code_text] in the
+//! [text][] content type.
+//! It is also possible to create code with the [code (fenced)][code_fenced]
 //! construct.
 //! That construct is more explicit, more similar to code (text), and has
 //! support for specifying the programming language that the code is in, so it
@@ -32,11 +32,11 @@
 //! *   [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks)
 //!
 //! [flow]: crate::content::flow
-//! [code-fenced]: crate::construct::code_fenced
+//! [text]: crate::content::text
+//! [code_text]: crate::construct::code_text
+//! [code_fenced]: crate::construct::code_fenced
 //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-//!
-//! <!-- To do: link `code_text` -->
 
 use crate::constant::TAB_SIZE;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
new file mode 100644
index 0000000..3c01070
--- /dev/null
+++ b/src/construct/code_text.rs
@@ -0,0 +1,217 @@
+//! Code (text) is a construct that occurs in the [text][] content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: the number of markers in the closing sequence must be equal
+//! ; to the number of markers in the opening sequence.
+//! code_text ::= sequence 1*code sequence
+//!
+//! sequence ::= 1*'`'
+//! ```
+//!
+//! The above grammar shows that it is not possible to create empty code.
+//! It is possible to include grave accents (ticks) in code, by wrapping it
+//! in bigger or smaller sequences:
+//!
+//! ```markdown
+//! Include more: `a``b` or include less: ``a`b``.
+//! ```
+//!
+//! When turning markdown into HTML, each line ending is turned into a space.
+//!
+//! It is also possible to include just one grave accent (tick):
+//!
+//! ```markdown
+//! Include just one: `` ` ``.
+//! ```
+//!
+//! Sequences are “gready”, in that they cannot be preceded or succeeded by
+//! more grave accents (ticks).
+//! To illustrate:
+//!
+//! ```markdown
+//! Not code: ``x`.
+//!
+//! Not code: `x``.
+//!
+//! Escapes work, this is code: \``x`.
+//!
+//! Escapes work, this is code: `x`\`.
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p>Not code: ``x`.</p>
+//! <p>Not code: `x``.</p>
+//! <p>Escapes work, this is code: `<code>x</code>.</p>
+//! <p>Escapes work, this is code: <code>x</code>`.</p>
+//! ```
+//!
+//! That is because, when turning markdown into HTML, the first and last space,
+//! if both exist and there is also a non-space in the code, are removed.
+//! Line endings, at that stage, are considered as spaces.
+//!
+//! Code (text) relates to the `<code>` element in HTML.
+//! See [*§ 4.5.15 The `code` element*][html-code] in the HTML spec for more
+//! info.
+//!
+//! In markdown, it is possible to create code with the
+//! [code (fenced)][code_fenced] or [code (indented)][code_indented] constructs
+//! in the [flow][] content type.
+//! Compared to code (indented), fenced code is more explicit and more similar
+//! to code (text), and it has support for specifying the programming language
+//! that the code is in, so it is recommended to use that instead of indented
+//! code.
+//!
+//! ## References
+//!
+//! *   [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js)
+//! *   [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans)
+//!
+//! [flow]: crate::content::flow
+//! [text]: crate::content::text
+//! [code_indented]: crate::construct::code_indented
+//! [code_fenced]: crate::construct::code_fenced
+//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of code (text).
+///
+/// ```markdown
+/// |`a`
+///
+/// |\``a`
+///
+/// |``a`
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let len = tokenizer.events.len();
+
+    match code {
+        Code::Char('`')
+            if tokenizer.previous != Code::Char('`')
+                || (len > 0
+                    && tokenizer.events[len - 1].token_type == TokenType::CharacterEscape) =>
+        {
+            tokenizer.enter(TokenType::CodeText);
+            tokenizer.enter(TokenType::CodeTextSequence);
+            sequence_open(tokenizer, code, 0)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In the opening sequence.
+///
+/// ```markdown
+/// `|`a``
+/// ```
+pub fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+    if let Code::Char('`') = code {
+        tokenizer.consume(code);
+        (
+            State::Fn(Box::new(move |tokenizer, code| {
+                sequence_open(tokenizer, code, size + 1)
+            })),
+            None,
+        )
+    } else {
+        tokenizer.exit(TokenType::CodeTextSequence);
+        between(tokenizer, code, size)
+    }
+}
+
+/// Between something and something else
+///
+/// ```markdown
+/// `|a`
+/// `a|`
+/// ```
+pub fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResult {
+    match code {
+        Code::None => (State::Nok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+            tokenizer.enter(TokenType::CodeTextLineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::CodeTextLineEnding);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    between(tokenizer, code, size_open)
+                })),
+                None,
+            )
+        }
+        Code::Char('`') => {
+            tokenizer.enter(TokenType::CodeTextSequence);
+            sequence_close(tokenizer, code, size_open, 0)
+        }
+        _ => {
+            tokenizer.enter(TokenType::CodeTextData);
+            data(tokenizer, code, size_open)
+        }
+    }
+}
+
+/// In data.
+///
+/// ```markdown
+/// `a|b`
+/// ```
+pub fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '`') => {
+            tokenizer.exit(TokenType::CodeTextData);
+            between(tokenizer, code, size_open)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    data(tokenizer, code, size_open)
+                })),
+                None,
+            )
+        }
+    }
+}
+
+/// In the closing sequence.
+///
+/// ```markdown
+/// ``a`|`
+/// ```
+pub fn sequence_close(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    size_open: usize,
+    size: usize,
+) -> StateFnResult {
+    match code {
+        Code::Char('`') => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    sequence_close(tokenizer, code, size_open, size + 1)
+                })),
+                None,
+            )
+        }
+        _ if size_open == size => {
+            tokenizer.exit(TokenType::CodeTextSequence);
+            tokenizer.exit(TokenType::CodeText);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => {
+            let tail_index = tokenizer.events.len();
+            let head_index = tokenizer.events.len() - 1;
+            tokenizer.exit(TokenType::CodeTextSequence);
+            // Change the token type.
+            tokenizer.events[head_index].token_type = TokenType::CodeTextData;
+            tokenizer.events[tail_index].token_type = TokenType::CodeTextData;
+            between(tokenizer, code, size_open)
+        }
+    }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 14f53a0..1fa57d5 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -23,7 +23,7 @@
 //! *   [character reference][character_reference]
 //! *   [code (fenced)][code_fenced]
 //! *   [code (indented)][code_indented]
-//! *   code (text)
+//! *   [code (text)][code_text]
 //! *   content
 //! *   definition
 //! *   hard break escape
@@ -59,6 +59,7 @@ pub mod character_escape;
 pub mod character_reference;
 pub mod code_fenced;
 pub mod code_indented;
+pub mod code_text;
 pub mod heading_atx;
 pub mod html_flow;
 pub mod html_text;
diff --git a/src/content/text.rs b/src/content/text.rs
index 433d030..9d510cb 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -9,7 +9,7 @@
 //! *   Attention
 //! *   [HTML (text)][crate::construct::html_text]
 //! *   Hard break escape
-//! *   Code (text)
+//! *   [Code (text)][crate::construct::code_text]
 //! *   Line ending
 //! *   Label start (image)
 //! *   Label start (link)
@@ -18,7 +18,8 @@
 
 use crate::construct::{
     autolink::start as autolink, character_escape::start as character_escape,
-    character_reference::start as character_reference, html_text::start as html_text,
+    character_reference::start as character_reference, code_text::start as code_text,
+    html_text::start as html_text,
 };
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
@@ -34,11 +35,12 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Ok, None),
-        _ => tokenizer.attempt_4(
+        _ => tokenizer.attempt_5(
             character_reference,
             character_escape,
             autolink,
             html_text,
+            code_text,
             |ok| Box::new(if ok { start } else { before_data }),
         )(tokenizer, code),
     }
@@ -80,7 +82,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             (State::Ok, None)
         }
         // To do: somehow get these markers from constructs.
-        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '\\' | '<') => {
+        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '<' | '\\' | '`') => {
             tokenizer.exit(TokenType::Data);
             start(tokenizer, code)
         }
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 486bc75..c5df42b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -51,6 +51,11 @@ pub enum TokenType {
     CodeIndented,
     CodeIndentedPrefixWhitespace,
 
+    CodeText,
+    CodeTextSequence,
+    CodeTextLineEnding,
+    CodeTextData,
+
     CodeFlowChunk,
 
     Data,
@@ -159,6 +164,8 @@ struct InternalState {
     events_len: usize,
     /// Length of the stack. It’s not allowed to decrease the stack in a check or an attempt.
     stack_len: usize,
+    /// Previous code.
+    previous: Code,
     /// Current code.
     current: Code,
     /// `index` in codes of the current code.
@@ -182,6 +189,8 @@ pub struct Tokenizer {
     ///
     /// Tracked to make sure everything’s valid.
     stack: Vec<TokenType>,
+    /// Previous character code.
+    pub previous: Code,
     /// Current character code.
     current: Code,
     /// `index` in codes of the current code.
@@ -194,6 +203,7 @@ impl Tokenizer {
     /// Create a new tokenizer.
     pub fn new(point: Point, index: usize) -> Tokenizer {
         Tokenizer {
+            previous: Code::None,
             current: Code::None,
             column_start: HashMap::new(),
             index,
@@ -218,7 +228,6 @@ impl Tokenizer {
     }
 
     fn account_for_potential_skip(&mut self) {
-        println!("account?: {:?} {:?}", self.point, self.index);
         match self.column_start.get(&self.point.line) {
             None => {}
             Some(next_column) => {
@@ -227,7 +236,6 @@ impl Tokenizer {
                     self.point.column = col;
                     self.point.offset += col - 1;
                     self.index += col - 1;
-                    println!("account! {:?} {:?}", self.point, self.index);
                 }
             }
         };
@@ -266,6 +274,7 @@ impl Tokenizer {
         }
 
         self.index += 1;
+        self.previous = code;
         // Mark as consumed.
         self.consumed = true;
     }
@@ -321,6 +330,7 @@ impl Tokenizer {
     fn capture(&mut self) -> InternalState {
         InternalState {
             index: self.index,
+            previous: self.previous,
             current: self.current,
             point: self.point.clone(),
             events_len: self.events.len(),
@@ -331,6 +341,7 @@ impl Tokenizer {
     /// Apply the internal state.
     fn free(&mut self, previous: InternalState) {
         self.index = previous.index;
+        self.previous = previous.previous;
         self.current = previous.current;
         self.point = previous.point;
         assert!(
@@ -429,6 +440,7 @@ impl Tokenizer {
             Some(Box::new(b)),
             None,
             None,
+            None,
             done,
         )
     }
@@ -446,16 +458,19 @@ impl Tokenizer {
             Some(Box::new(b)),
             Some(Box::new(c)),
             None,
+            None,
             done,
         )
     }
 
-    pub fn attempt_4(
+    #[allow(clippy::many_single_char_names)]
+    pub fn attempt_5(
         &mut self,
         a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+        e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         done: impl FnOnce(bool) -> Box<StateFn> + 'static,
     ) -> Box<StateFn> {
         self.call_multiple(
@@ -464,10 +479,12 @@ impl Tokenizer {
             Some(Box::new(b)),
             Some(Box::new(c)),
             Some(Box::new(d)),
+            Some(Box::new(e)),
             done,
         )
     }
 
+    #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
     pub fn call_multiple(
         &mut self,
         check: bool,
@@ -475,6 +492,7 @@ impl Tokenizer {
         b: Option<Box<StateFn>>,
         c: Option<Box<StateFn>>,
         d: Option<Box<StateFn>>,
+        e: Option<Box<StateFn>>,
         done: impl FnOnce(bool) -> Box<StateFn> + 'static,
     ) -> Box<StateFn> {
         if let Some(head) = a {
@@ -483,7 +501,7 @@ impl Tokenizer {
                     done(ok)
                 } else {
                     Box::new(move |tokenizer: &mut Tokenizer, code| {
-                        tokenizer.call_multiple(check, b, c, d, None, done)(tokenizer, code)
+                        tokenizer.call_multiple(check, b, c, d, e, None, done)(tokenizer, code)
                     })
                 }
             };
@@ -640,7 +658,6 @@ pub fn as_codes(value: &str) -> Vec<Code> {
                 '\t' => {
                     // To do: is this correct?
                     let virtual_spaces = TAB_SIZE - (column % TAB_SIZE);
-                    println!("tabs, expand {:?}, {:?}", column, virtual_spaces);
                     codes.push(Code::Char(char));
                     column += 1;
                     let mut index = 0;
diff --git a/tests/character_escape.rs b/tests/character_escape.rs
index 5fdc445..aae0b58 100644
--- a/tests/character_escape.rs
+++ b/tests/character_escape.rs
@@ -37,12 +37,11 @@ fn character_escape() {
     //     "should escape a line break"
     // );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`` \\[\\` ``"),
-    //     "<p><code>\\[\\`</code></p>",
-    //     "should not escape in text code"
-    // );
+    assert_eq!(
+        micromark("`` \\[\\` ``"),
+        "<p><code>\\[\\`</code></p>",
+        "should not escape in text code"
+    );
 
     assert_eq!(
         micromark("    \\[\\]"),
diff --git a/tests/character_reference.rs b/tests/character_reference.rs
index 5e71792..e351088 100644
--- a/tests/character_reference.rs
+++ b/tests/character_reference.rs
@@ -74,12 +74,11 @@ fn character_reference() {
         "should support character references in code language"
     );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`f&ouml;&ouml;`"),
-    //     "<p><code>f&amp;ouml;&amp;ouml;</code></p>",
-    //     "should not support character references in text code"
-    // );
+    assert_eq!(
+        micromark("`f&ouml;&ouml;`"),
+        "<p><code>f&amp;ouml;&amp;ouml;</code></p>",
+        "should not support character references in text code"
+    );
 
     assert_eq!(
         micromark("    f&ouml;f&ouml;"),
diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs
index 6419f67..82ac088 100644
--- a/tests/code_fenced.rs
+++ b/tests/code_fenced.rs
@@ -15,12 +15,11 @@ fn code_fenced() {
         "should support fenced code w/ tildes"
     );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("``\nfoo\n``"),
-    //     "<p><code>foo</code></p>",
-    //     "should not support fenced code w/ less than three markers"
-    // );
+    assert_eq!(
+        micromark("``\nfoo\n``"),
+        "<p><code>foo</code></p>",
+        "should not support fenced code w/ less than three markers"
+    );
 
     assert_eq!(
         micromark("```\naaa\n~~~\n```"),
@@ -119,12 +118,11 @@ fn code_fenced() {
         "should not support an indented closing sequence w/ 4 spaces"
     );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("``` ```\naaa"),
-    //     "<p><code> </code>\naaa</p>",
-    //     "should not support grave accents in the opening fence after the opening sequence"
-    // );
+    assert_eq!(
+        micromark("``` ```\naaa"),
+        "<p><code> </code>\naaa</p>",
+        "should not support grave accents in the opening fence after the opening sequence"
+    );
 
     assert_eq!(
         micromark("~~~~~~\naaa\n~~~ ~~\n"),
@@ -163,12 +161,11 @@ fn code_fenced() {
         "should support the info string as a `language-` class, but not the meta string"
     );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("``` aa ```\nfoo"),
-    //     "<p><code>aa</code>\nfoo</p>",
-    //     "should not support grave accents in the meta string"
-    // );
+    assert_eq!(
+        micromark("``` aa ```\nfoo"),
+        "<p><code>aa</code>\nfoo</p>",
+        "should not support grave accents in the meta string"
+    );
 
     assert_eq!(
         micromark("~~~ aa ``` ~~~\nfoo\n~~~"),
diff --git a/tests/code_text.rs b/tests/code_text.rs
new file mode 100644
index 0000000..bab6dd6
--- /dev/null
+++ b/tests/code_text.rs
@@ -0,0 +1,162 @@
+extern crate micromark;
+use micromark::{micromark, micromark_with_options, CompileOptions};
+
+const DANGER: &CompileOptions = &CompileOptions {
+    allow_dangerous_html: true,
+    allow_dangerous_protocol: false,
+};
+
+#[test]
+fn code_text() {
+    assert_eq!(
+        micromark("`foo`"),
+        "<p><code>foo</code></p>",
+        "should support code"
+    );
+
+    assert_eq!(
+        micromark("`` foo ` bar ``"),
+        "<p><code>foo ` bar</code></p>",
+        "should support code w/ more accents"
+    );
+
+    assert_eq!(
+        micromark("` `` `"),
+        "<p><code>``</code></p>",
+        "should support code w/ fences inside, and padding"
+    );
+
+    assert_eq!(
+        micromark("`  ``  `"),
+        "<p><code> `` </code></p>",
+        "should support code w/ extra padding"
+    );
+
+    assert_eq!(
+        micromark("` a`"),
+        "<p><code> a</code></p>",
+        "should support code w/ unbalanced padding"
+    );
+
+    assert_eq!(
+        micromark("`\u{a0}b\u{a0}`"),
+        "<p><code>\u{a0}b\u{a0}</code></p>",
+        "should support code w/ non-padding whitespace"
+    );
+
+    assert_eq!(
+        micromark("` `\n`  `"),
+        "<p><code> </code>\n<code>  </code></p>",
+        "should support code w/o data"
+    );
+
+    assert_eq!(
+        micromark("``\nfoo\nbar  \nbaz\n``"),
+        "<p><code>foo bar   baz</code></p>",
+        "should support code w/o line endings (1)"
+    );
+
+    assert_eq!(
+        micromark("``\nfoo \n``"),
+        "<p><code>foo </code></p>",
+        "should support code w/o line endings (2)"
+    );
+
+    assert_eq!(
+        micromark("`foo   bar \nbaz`"),
+        "<p><code>foo   bar  baz</code></p>",
+        "should not support whitespace collapsing"
+    );
+
+    assert_eq!(
+        micromark("`foo\\`bar`"),
+        "<p><code>foo\\</code>bar`</p>",
+        "should not support character escapes"
+    );
+
+    assert_eq!(
+        micromark("``foo`bar``"),
+        "<p><code>foo`bar</code></p>",
+        "should support more accents"
+    );
+
+    assert_eq!(
+        micromark("` foo `` bar `"),
+        "<p><code>foo `` bar</code></p>",
+        "should support less accents"
+    );
+
+    assert_eq!(
+        micromark("*foo`*`"),
+        "<p>*foo<code>*</code></p>",
+        "should precede over emphasis"
+    );
+
+    assert_eq!(
+        micromark("[not a `link](/foo`)"),
+        "<p>[not a <code>link](/foo</code>)</p>",
+        "should precede over links"
+    );
+
+    assert_eq!(
+        micromark("`<a href=\"`\">`"),
+        "<p><code>&lt;a href=&quot;</code>&quot;&gt;`</p>",
+        "should have same precedence as HTML (1)"
+    );
+
+    assert_eq!(
+        micromark_with_options("<a href=\"`\">`", DANGER),
+        "<p><a href=\"`\">`</p>",
+        "should have same precedence as HTML (2)"
+    );
+
+    assert_eq!(
+        micromark("`<http://foo.bar.`baz>`"),
+        "<p><code>&lt;http://foo.bar.</code>baz&gt;`</p>",
+        "should have same precedence as autolinks (1)"
+    );
+
+    assert_eq!(
+        micromark("<http://foo.bar.`baz>`"),
+        "<p><a href=\"http://foo.bar.%60baz\">http://foo.bar.`baz</a>`</p>",
+        "should have same precedence as autolinks (2)"
+    );
+
+    assert_eq!(
+        micromark("```foo``"),
+        "<p>```foo``</p>",
+        "should not support more accents before a fence"
+    );
+
+    assert_eq!(
+        micromark("`foo"),
+        "<p>`foo</p>",
+        "should not support no closing fence (1)"
+    );
+
+    assert_eq!(
+        micromark("`foo``bar``"),
+        "<p>`foo<code>bar</code></p>",
+        "should not support no closing fence (2)"
+    );
+
+    // Extra:
+    assert_eq!(
+        micromark("`foo\t\tbar`"),
+        "<p><code>foo\t\tbar</code></p>",
+        "should support tabs in code"
+    );
+
+    assert_eq!(
+        micromark("\\``x`"),
+        "<p>`<code>x</code></p>",
+        "should support an escaped initial grave accent"
+    );
+
+    // To do: turning things off.
+    // assert_eq!(
+    //   micromark("`a`", {extensions: [{disable: {null: ["codeText"]}}]}),
+    //   "<p>`a`</p>",
+    //   "should support turning off code (text)"
+    // );
+}
diff --git a/tests/misc_tabs.rs b/tests/misc_tabs.rs
index bdd88cf..0d05be9 100644
--- a/tests/misc_tabs.rs
+++ b/tests/misc_tabs.rs
@@ -181,40 +181,35 @@ fn tabs_text() {
         "should support character reference resolving to a tab"
     );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`\tx`"),
-    //     "<p><code>\tx</code></p>",
-    //     "should support a tab starting code"
-    // );
+    assert_eq!(
+        micromark("`\tx`"),
+        "<p><code>\tx</code></p>",
+        "should support a tab starting code"
+    );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`x\t`"),
-    //     "<p><code>x\t</code></p>",
-    //     "should support a tab ending code"
-    // );
+    assert_eq!(
+        micromark("`x\t`"),
+        "<p><code>x\t</code></p>",
+        "should support a tab ending code"
+    );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`\tx\t`"),
-    //     "<p><code>\tx\t</code></p>",
-    //     "should support tabs around code"
-    // );
+    assert_eq!(
+        micromark("`\tx\t`"),
+        "<p><code>\tx\t</code></p>",
+        "should support tabs around code"
+    );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("`\tx `"),
-    //     "<p><code>\tx </code></p>",
-    //     "should support a tab starting, and a space ending, code"
-    // );
+    assert_eq!(
+        micromark("`\tx `"),
+        "<p><code>\tx </code></p>",
+        "should support a tab starting, and a space ending, code"
+    );
 
-    // To do: code (text).
-    // assert_eq!(
-    //     micromark("` x\t`"),
-    //     "<p><code> x\t</code></p>",
-    //     "should support a space starting, and a tab ending, code"
-    // );
+    assert_eq!(
+        micromark("` x\t`"),
+        "<p><code> x\t</code></p>",
+        "should support a space starting, and a tab ending, code"
+    );
 
     // To do: trim trailing whitespace.
     // // Note: CM does not strip it in this case.
author	Titus Wormer <tituswormer@gmail.com>	2022-06-15 18:17:01 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-15 18:17:01 +0200
commit	acc35758778bfda5cb01951533868eb8baa2e2d2 (patch)
tree	82ded1c1f88d04a1c37e40822ad7e5a6bba8a717
parent	75dcb48f78a8a798fde525d2d39e20cffec48e50 (diff)
download	markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.tar.gz markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.tar.bz2 markdown-rs-acc35758778bfda5cb01951533868eb8baa2e2d2.zip