diff options
-rw-r--r-- | readme.md | 4 | ||||
-rw-r--r-- | src/compiler.rs | 42 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 13 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 12 | ||||
-rw-r--r-- | src/construct/code_text.rs | 217 | ||||
-rw-r--r-- | src/construct/mod.rs | 3 | ||||
-rw-r--r-- | src/content/text.rs | 10 | ||||
-rw-r--r-- | src/tokenizer.rs | 27 | ||||
-rw-r--r-- | tests/character_escape.rs | 11 | ||||
-rw-r--r-- | tests/character_reference.rs | 11 | ||||
-rw-r--r-- | tests/code_fenced.rs | 33 | ||||
-rw-r--r-- | tests/code_text.rs | 162 | ||||
-rw-r--r-- | tests/misc_tabs.rs | 55 |
13 files changed, 507 insertions, 93 deletions
@@ -109,7 +109,7 @@ cargo doc --document-private-items - [x] character reference - [x] code (fenced) - [x] code (indented) -- [ ] (1) code (text) +- [x] (1) code (text) - [ ] (3) content - [ ] (3) definition - [ ] (1) hard break escape @@ -146,7 +146,7 @@ cargo doc --document-private-items - [x] autolink - [x] character escape - [x] character reference - - [ ] code (text) + - [x] code (text) - [ ] hard break escape - [x] html (text) - [ ] label end diff --git a/src/compiler.rs b/src/compiler.rs index 6f0215c..d3d935b 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -96,8 +96,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St // let mut slurp_all_line_endings = false; - println!("events: {:#?}", events); - while index < events.len() { let event = &events[index]; let token_type = &event.token_type; @@ -133,6 +131,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St ignore_encode = true; } } + TokenType::CodeText => { + buf_tail_mut(buffers).push("<code>".to_string()); + buffer(buffers); + } TokenType::Content | TokenType::AtxHeading | TokenType::AtxHeadingSequence @@ -152,6 +154,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeFencedFence | TokenType::CodeFencedFenceSequence | TokenType::CodeFencedFenceWhitespace + | TokenType::CodeTextSequence + | TokenType::CodeTextData + | TokenType::CodeTextLineEnding | TokenType::Data | TokenType::CharacterEscape | TokenType::CharacterEscapeMarker @@ -181,6 +186,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::Whitespace | TokenType::CodeFencedFenceSequence | TokenType::CodeFencedFenceWhitespace + | TokenType::CodeTextSequence | TokenType::CharacterEscape | TokenType::CharacterEscapeMarker | TokenType::CharacterReference @@ -264,6 +270,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St false, ))); } + // `AtxHeadingWhitespace` is ignored after the opening sequence, // before the closing sequence, and after the closing sequence. // But it is used around intermediate sequences. @@ -290,7 +297,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } } TokenType::AtxHeadingText => { - println!("text: {:?}", atx_heading_buffer); if let Some(ref buf) = atx_heading_buffer { if !buf.is_empty() { buf_tail_mut(buffers).push(encode(buf)); @@ -301,7 +307,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } let slice = encode(&serialize(codes, &from_exit_event(events, index), false)); - println!("slice: {:?}", slice); buf_tail_mut(buffers).push(slice); } TokenType::AtxHeading => { @@ -340,8 +345,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St // } else if slurp_one_line_ending { slurp_one_line_ending = false; - // } else if code_text_inside { - // buf_tail_mut(buffers).push(" ".to_string()); } else { buf_tail_mut(buffers).push(encode(&serialize( codes, @@ -378,8 +381,33 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St character_reference_kind = None; } + TokenType::CodeText => { + let result = resume(buffers); + let mut chars = result.chars(); + let mut trim = false; + + if Some(' ') == chars.next() && Some(' ') == chars.next_back() { + let mut next = chars.next(); + while next != None && !trim { + if Some(' ') != next { + trim = true; + } + next = chars.next(); + } + } + + buf_tail_mut(buffers).push(if trim { + result[1..(result.len() - 1)].to_string() + } else { + result + }); + buf_tail_mut(buffers).push("</code>".to_string()); + } + TokenType::CodeTextLineEnding => { + buf_tail_mut(buffers).push(" ".to_string()); + } // This branch below currently acts as the resulting `data` tokens. - TokenType::Data | TokenType::CharacterEscapeValue => { + TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => { // last_was_tag = false; buf_tail_mut(buffers).push(encode(&serialize( codes, diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index c852e8d..12c8bd6 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -66,10 +66,10 @@ //! The `info` and `meta` parts are interpreted as the [string][] content type. //! That means that character escapes and character reference are allowed. //! -//! In markdown, it is also possible to use code (text) in the [text][] content -//! type. +//! In markdown, it is also possible to use [code (text)][code_text] in the +//! [text][] content type. //! It is also possible to create code with the -//! [code (indented)][code-indented] construct. +//! [code (indented)][code_indented] construct. //! That construct is less explicit, different from code (text), and has no //! support for specifying the programming language, so it is recommended to //! use code (fenced) instead of code (indented). @@ -82,11 +82,10 @@ //! [flow]: crate::content::flow //! [string]: crate::content::string //! [text]: crate::content::text -//! [code-indented]: crate::construct::code_indented +//! [code_indented]: crate::construct::code_indented +//! [code_text]: crate::construct::code_text //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element -//! -//! <!-- To do: link `code_text` --> use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; use crate::construct::partial_whitespace::start as whitespace; @@ -251,14 +250,12 @@ fn info_inside( ) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - println!("to do: subtokenize: {:?}", codes); tokenizer.exit(TokenType::ChunkString); tokenizer.exit(TokenType::CodeFencedFenceInfo); tokenizer.exit(TokenType::CodeFencedFence); at_break(tokenizer, info, code) } Code::VirtualSpace | Code::Char('\t' | ' ') => { - println!("to do: subtokenize: {:?}", codes); tokenizer.exit(TokenType::ChunkString); tokenizer.exit(TokenType::CodeFencedFenceInfo); tokenizer.attempt( diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 936f174..55b8901 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -18,9 +18,9 @@ //! See [*§ 4.4.3 The `pre` element*][html-pre] and the [*§ 4.5.15 The `code` //! element*][html-code] in the HTML spec for more info. //! -//! In markdown, it is also possible to use code (text) in the text content -//! type. -//! It is also possible to create code with the [code (fenced)][code-fenced] +//! In markdown, it is also possible to use [code (text)][code_text] in the +//! [text][] content type. +//! It is also possible to create code with the [code (fenced)][code_fenced] //! construct. //! That construct is more explicit, more similar to code (text), and has //! support for specifying the programming language that the code is in, so it @@ -32,11 +32,11 @@ //! * [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#indented-code-blocks) //! //! [flow]: crate::content::flow -//! [code-fenced]: crate::construct::code_fenced +//! [text]: crate::content::text +//! [code_text]: crate::construct::code_text +//! [code_fenced]: crate::construct::code_fenced //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element -//! -//! <!-- To do: link `code_text` --> use crate::constant::TAB_SIZE; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs new file mode 100644 index 0000000..3c01070 --- /dev/null +++ b/src/construct/code_text.rs @@ -0,0 +1,217 @@ +//! Code (text) is a construct that occurs in the [text][] content type. +//! +//! It forms with the following BNF: +//! +//! ```bnf +//! ; Restriction: the number of markers in the closing sequence must be equal +//! ; to the number of markers in the opening sequence. +//! code_text ::= sequence 1*code sequence +//! +//! sequence ::= 1*'`' +//! ``` +//! +//! The above grammar shows that it is not possible to create empty code. +//! It is possible to include grave accents (ticks) in code, by wrapping it +//! in bigger or smaller sequences: +//! +//! ```markdown +//! Include more: `a``b` or include less: ``a`b``. +//! ``` +//! +//! When turning markdown into HTML, each line ending is turned into a space. +//! +//! It is also possible to include just one grave accent (tick): +//! +//! ```markdown +//! Include just one: `` ` ``. +//! ``` +//! +//! Sequences are “gready”, in that they cannot be preceded or succeeded by +//! more grave accents (ticks). +//! To illustrate: +//! +//! ```markdown +//! Not code: ``x`. +//! +//! Not code: `x``. +//! +//! Escapes work, this is code: \``x`. +//! +//! Escapes work, this is code: `x`\`. +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p>Not code: ``x`.</p> +//! <p>Not code: `x``.</p> +//! <p>Escapes work, this is code: `<code>x</code>.</p> +//! <p>Escapes work, this is code: <code>x</code>`.</p> +//! ``` +//! +//! That is because, when turning markdown into HTML, the first and last space, +//! if both exist and there is also a non-space in the code, are removed. +//! Line endings, at that stage, are considered as spaces. +//! +//! Code (text) relates to the `<code>` element in HTML. +//! See [*§ 4.5.15 The `code` element*][html-code] in the HTML spec for more +//! info. +//! +//! In markdown, it is possible to create code with the +//! [code (fenced)][code_fenced] or [code (indented)][code_indented] constructs +//! in the [flow][] content type. +//! Compared to code (indented), fenced code is more explicit and more similar +//! to code (text), and it has support for specifying the programming language +//! that the code is in, so it is recommended to use that instead of indented +//! code. +//! +//! ## References +//! +//! * [`code-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-text.js) +//! * [*§ 6.1 Code spans* in `CommonMark`](https://spec.commonmark.org/0.30/#code-spans) +//! +//! [flow]: crate::content::flow +//! [text]: crate::content::text +//! [code_indented]: crate::construct::code_indented +//! [code_fenced]: crate::construct::code_fenced +//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of code (text). +/// +/// ```markdown +/// |`a` +/// +/// |\``a` +/// +/// |``a` +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let len = tokenizer.events.len(); + + match code { + Code::Char('`') + if tokenizer.previous != Code::Char('`') + || (len > 0 + && tokenizer.events[len - 1].token_type == TokenType::CharacterEscape) => + { + tokenizer.enter(TokenType::CodeText); + tokenizer.enter(TokenType::CodeTextSequence); + sequence_open(tokenizer, code, 0) + } + _ => (State::Nok, None), + } +} + +/// In the opening sequence. +/// +/// ```markdown +/// `|`a`` +/// ``` +pub fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { + if let Code::Char('`') = code { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + sequence_open(tokenizer, code, size + 1) + })), + None, + ) + } else { + tokenizer.exit(TokenType::CodeTextSequence); + between(tokenizer, code, size) + } +} + +/// Between something and something else +/// +/// ```markdown +/// `|a` +/// `a|` +/// ``` +pub fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResult { + match code { + Code::None => (State::Nok, None), + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::CodeTextLineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::CodeTextLineEnding); + ( + State::Fn(Box::new(move |tokenizer, code| { + between(tokenizer, code, size_open) + })), + None, + ) + } + Code::Char('`') => { + tokenizer.enter(TokenType::CodeTextSequence); + sequence_close(tokenizer, code, size_open, 0) + } + _ => { + tokenizer.enter(TokenType::CodeTextData); + data(tokenizer, code, size_open) + } + } +} + +/// In data. +/// +/// ```markdown +/// `a|b` +/// ``` +pub fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '`') => { + tokenizer.exit(TokenType::CodeTextData); + between(tokenizer, code, size_open) + } + _ => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + data(tokenizer, code, size_open) + })), + None, + ) + } + } +} + +/// In the closing sequence. +/// +/// ```markdown +/// ``a`|` +/// ``` +pub fn sequence_close( + tokenizer: &mut Tokenizer, + code: Code, + size_open: usize, + size: usize, +) -> StateFnResult { + match code { + Code::Char('`') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |tokenizer, code| { + sequence_close(tokenizer, code, size_open, size + 1) + })), + None, + ) + } + _ if size_open == size => { + tokenizer.exit(TokenType::CodeTextSequence); + tokenizer.exit(TokenType::CodeText); + (State::Ok, Some(vec![code])) + } + _ => { + let tail_index = tokenizer.events.len(); + let head_index = tokenizer.events.len() - 1; + tokenizer.exit(TokenType::CodeTextSequence); + // Change the token type. + tokenizer.events[head_index].token_type = TokenType::CodeTextData; + tokenizer.events[tail_index].token_type = TokenType::CodeTextData; + between(tokenizer, code, size_open) + } + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 14f53a0..1fa57d5 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -23,7 +23,7 @@ //! * [character reference][character_reference] //! * [code (fenced)][code_fenced] //! * [code (indented)][code_indented] -//! * code (text) +//! * [code (text)][code_text] //! * content //! * definition //! * hard break escape @@ -59,6 +59,7 @@ pub mod character_escape; pub mod character_reference; pub mod code_fenced; pub mod code_indented; +pub mod code_text; pub mod heading_atx; pub mod html_flow; pub mod html_text; diff --git a/src/content/text.rs b/src/content/text.rs index 433d030..9d510cb 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -9,7 +9,7 @@ //! * Attention //! * [HTML (text)][crate::construct::html_text] //! * Hard break escape -//! * Code (text) +//! * [Code (text)][crate::construct::code_text] //! * Line ending //! * Label start (image) //! * Label start (link) @@ -18,7 +18,8 @@ use crate::construct::{ autolink::start as autolink, character_escape::start as character_escape, - character_reference::start as character_reference, html_text::start as html_text, + character_reference::start as character_reference, code_text::start as code_text, + html_text::start as html_text, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -34,11 +35,12 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), - _ => tokenizer.attempt_4( + _ => tokenizer.attempt_5( character_reference, character_escape, autolink, html_text, + code_text, |ok| Box::new(if ok { start } else { before_data }), )(tokenizer, code), } @@ -80,7 +82,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Ok, None) } // To do: somehow get these markers from constructs. - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '\\' | '<') => { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '&' | '<' | '\\' | '`') => { tokenizer.exit(TokenType::Data); start(tokenizer, code) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 486bc75..c5df42b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -51,6 +51,11 @@ pub enum TokenType { CodeIndented, CodeIndentedPrefixWhitespace, + CodeText, + CodeTextSequence, + CodeTextLineEnding, + CodeTextData, + CodeFlowChunk, Data, @@ -159,6 +164,8 @@ struct InternalState { events_len: usize, /// Length of the stack. It’s not allowed to decrease the stack in a check or an attempt. stack_len: usize, + /// Previous code. + previous: Code, /// Current code. current: Code, /// `index` in codes of the current code. @@ -182,6 +189,8 @@ pub struct Tokenizer { /// /// Tracked to make sure everything’s valid. stack: Vec<TokenType>, + /// Previous character code. + pub previous: Code, /// Current character code. current: Code, /// `index` in codes of the current code. @@ -194,6 +203,7 @@ impl Tokenizer { /// Create a new tokenizer. pub fn new(point: Point, index: usize) -> Tokenizer { Tokenizer { + previous: Code::None, current: Code::None, column_start: HashMap::new(), index, @@ -218,7 +228,6 @@ impl Tokenizer { } fn account_for_potential_skip(&mut self) { - println!("account?: {:?} {:?}", self.point, self.index); match self.column_start.get(&self.point.line) { None => {} Some(next_column) => { @@ -227,7 +236,6 @@ impl Tokenizer { self.point.column = col; self.point.offset += col - 1; self.index += col - 1; - println!("account! {:?} {:?}", self.point, self.index); } } }; @@ -266,6 +274,7 @@ impl Tokenizer { } self.index += 1; + self.previous = code; // Mark as consumed. self.consumed = true; } @@ -321,6 +330,7 @@ impl Tokenizer { fn capture(&mut self) -> InternalState { InternalState { index: self.index, + previous: self.previous, current: self.current, point: self.point.clone(), events_len: self.events.len(), @@ -331,6 +341,7 @@ impl Tokenizer { /// Apply the internal state. fn free(&mut self, previous: InternalState) { self.index = previous.index; + self.previous = previous.previous; self.current = previous.current; self.point = previous.point; assert!( @@ -429,6 +440,7 @@ impl Tokenizer { Some(Box::new(b)), None, None, + None, done, ) } @@ -446,16 +458,19 @@ impl Tokenizer { Some(Box::new(b)), Some(Box::new(c)), None, + None, done, ) } - pub fn attempt_4( + #[allow(clippy::many_single_char_names)] + pub fn attempt_5( &mut self, a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { self.call_multiple( @@ -464,10 +479,12 @@ impl Tokenizer { Some(Box::new(b)), Some(Box::new(c)), Some(Box::new(d)), + Some(Box::new(e)), done, ) } + #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] pub fn call_multiple( &mut self, check: bool, @@ -475,6 +492,7 @@ impl Tokenizer { b: Option<Box<StateFn>>, c: Option<Box<StateFn>>, d: Option<Box<StateFn>>, + e: Option<Box<StateFn>>, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { if let Some(head) = a { @@ -483,7 +501,7 @@ impl Tokenizer { done(ok) } else { Box::new(move |tokenizer: &mut Tokenizer, code| { - tokenizer.call_multiple(check, b, c, d, None, done)(tokenizer, code) + tokenizer.call_multiple(check, b, c, d, e, None, done)(tokenizer, code) }) } }; @@ -640,7 +658,6 @@ pub fn as_codes(value: &str) -> Vec<Code> { '\t' => { // To do: is this correct? let virtual_spaces = TAB_SIZE - (column % TAB_SIZE); - println!("tabs, expand {:?}, {:?}", column, virtual_spaces); codes.push(Code::Char(char)); column += 1; let mut index = 0; diff --git a/tests/character_escape.rs b/tests/character_escape.rs index 5fdc445..aae0b58 100644 --- a/tests/character_escape.rs +++ b/tests/character_escape.rs @@ -37,12 +37,11 @@ fn character_escape() { // "should escape a line break" // ); - // To do: code (text). - // assert_eq!( - // micromark("`` \\[\\` ``"), - // "<p><code>\\[\\`</code></p>", - // "should not escape in text code" - // ); + assert_eq!( + micromark("`` \\[\\` ``"), + "<p><code>\\[\\`</code></p>", + "should not escape in text code" + ); assert_eq!( micromark(" \\[\\]"), diff --git a/tests/character_reference.rs b/tests/character_reference.rs index 5e71792..e351088 100644 --- a/tests/character_reference.rs +++ b/tests/character_reference.rs @@ -74,12 +74,11 @@ fn character_reference() { "should support character references in code language" ); - // To do: code (text). - // assert_eq!( - // micromark("`föö`"), - // "<p><code>f&ouml;&ouml;</code></p>", - // "should not support character references in text code" - // ); + assert_eq!( + micromark("`föö`"), + "<p><code>f&ouml;&ouml;</code></p>", + "should not support character references in text code" + ); assert_eq!( micromark(" föfö"), diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index 6419f67..82ac088 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -15,12 +15,11 @@ fn code_fenced() { "should support fenced code w/ tildes" ); - // To do: code (text). - // assert_eq!( - // micromark("``\nfoo\n``"), - // "<p><code>foo</code></p>", - // "should not support fenced code w/ less than three markers" - // ); + assert_eq!( + micromark("``\nfoo\n``"), + "<p><code>foo</code></p>", + "should not support fenced code w/ less than three markers" + ); assert_eq!( micromark("```\naaa\n~~~\n```"), @@ -119,12 +118,11 @@ fn code_fenced() { "should not support an indented closing sequence w/ 4 spaces" ); - // To do: code (text). - // assert_eq!( - // micromark("``` ```\naaa"), - // "<p><code> </code>\naaa</p>", - // "should not support grave accents in the opening fence after the opening sequence" - // ); + assert_eq!( + micromark("``` ```\naaa"), + "<p><code> </code>\naaa</p>", + "should not support grave accents in the opening fence after the opening sequence" + ); assert_eq!( micromark("~~~~~~\naaa\n~~~ ~~\n"), @@ -163,12 +161,11 @@ fn code_fenced() { "should support the info string as a `language-` class, but not the meta string" ); - // To do: code (text). - // assert_eq!( - // micromark("``` aa ```\nfoo"), - // "<p><code>aa</code>\nfoo</p>", - // "should not support grave accents in the meta string" - // ); + assert_eq!( + micromark("``` aa ```\nfoo"), + "<p><code>aa</code>\nfoo</p>", + "should not support grave accents in the meta string" + ); assert_eq!( micromark("~~~ aa ``` ~~~\nfoo\n~~~"), diff --git a/tests/code_text.rs b/tests/code_text.rs new file mode 100644 index 0000000..bab6dd6 --- /dev/null +++ b/tests/code_text.rs @@ -0,0 +1,162 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, CompileOptions}; + +const DANGER: &CompileOptions = &CompileOptions { + allow_dangerous_html: true, + allow_dangerous_protocol: false, +}; + +#[test] +fn code_text() { + assert_eq!( + micromark("`foo`"), + "<p><code>foo</code></p>", + "should support code" + ); + + assert_eq!( + micromark("`` foo ` bar ``"), + "<p><code>foo ` bar</code></p>", + "should support code w/ more accents" + ); + + assert_eq!( + micromark("` `` `"), + "<p><code>``</code></p>", + "should support code w/ fences inside, and padding" + ); + + assert_eq!( + micromark("` `` `"), + "<p><code> `` </code></p>", + "should support code w/ extra padding" + ); + + assert_eq!( + micromark("` a`"), + "<p><code> a</code></p>", + "should support code w/ unbalanced padding" + ); + + assert_eq!( + micromark("`\u{a0}b\u{a0}`"), + "<p><code>\u{a0}b\u{a0}</code></p>", + "should support code w/ non-padding whitespace" + ); + + assert_eq!( + micromark("` `\n` `"), + "<p><code> </code>\n<code> </code></p>", + "should support code w/o data" + ); + + assert_eq!( + micromark("``\nfoo\nbar \nbaz\n``"), + "<p><code>foo bar baz</code></p>", + "should support code w/o line endings (1)" + ); + + assert_eq!( + micromark("``\nfoo \n``"), + "<p><code>foo </code></p>", + "should support code w/o line endings (2)" + ); + + assert_eq!( + micromark("`foo bar \nbaz`"), + "<p><code>foo bar baz</code></p>", + "should not support whitespace collapsing" + ); + + assert_eq!( + micromark("`foo\\`bar`"), + "<p><code>foo\\</code>bar`</p>", + "should not support character escapes" + ); + + assert_eq!( + micromark("``foo`bar``"), + "<p><code>foo`bar</code></p>", + "should support more accents" + ); + + assert_eq!( + micromark("` foo `` bar `"), + "<p><code>foo `` bar</code></p>", + "should support less accents" + ); + + assert_eq!( + micromark("*foo`*`"), + "<p>*foo<code>*</code></p>", + "should precede over emphasis" + ); + + assert_eq!( + micromark("[not a `link](/foo`)"), + "<p>[not a <code>link](/foo</code>)</p>", + "should precede over links" + ); + + assert_eq!( + micromark("`<a href=\"`\">`"), + "<p><code><a href="</code>">`</p>", + "should have same precedence as HTML (1)" + ); + + assert_eq!( + micromark_with_options("<a href=\"`\">`", DANGER), + "<p><a href=\"`\">`</p>", + "should have same precedence as HTML (2)" + ); + + assert_eq!( + micromark("`<http://foo.bar.`baz>`"), + "<p><code><http://foo.bar.</code>baz>`</p>", + "should have same precedence as autolinks (1)" + ); + + assert_eq!( + micromark("<http://foo.bar.`baz>`"), + "<p><a href=\"http://foo.bar.%60baz\">http://foo.bar.`baz</a>`</p>", + "should have same precedence as autolinks (2)" + ); + + assert_eq!( + micromark("```foo``"), + "<p>```foo``</p>", + "should not support more accents before a fence" + ); + + assert_eq!( + micromark("`foo"), + "<p>`foo</p>", + "should not support no closing fence (1)" + ); + + assert_eq!( + micromark("`foo``bar``"), + "<p>`foo<code>bar</code></p>", + "should not support no closing fence (2)" + ); + + // Extra: + assert_eq!( + micromark("`foo\t\tbar`"), + "<p><code>foo\t\tbar</code></p>", + "should support tabs in code" + ); + + assert_eq!( + micromark("\\``x`"), + "<p>`<code>x</code></p>", + "should support an escaped initial grave accent" + ); + + // To do: turning things off. + // assert_eq!( + // micromark("`a`", {extensions: [{disable: {null: ["codeText"]}}]}), + // "<p>`a`</p>", + // "should support turning off code (text)" + // ); +} diff --git a/tests/misc_tabs.rs b/tests/misc_tabs.rs index bdd88cf..0d05be9 100644 --- a/tests/misc_tabs.rs +++ b/tests/misc_tabs.rs @@ -181,40 +181,35 @@ fn tabs_text() { "should support character reference resolving to a tab" ); - // To do: code (text). - // assert_eq!( - // micromark("`\tx`"), - // "<p><code>\tx</code></p>", - // "should support a tab starting code" - // ); + assert_eq!( + micromark("`\tx`"), + "<p><code>\tx</code></p>", + "should support a tab starting code" + ); - // To do: code (text). - // assert_eq!( - // micromark("`x\t`"), - // "<p><code>x\t</code></p>", - // "should support a tab ending code" - // ); + assert_eq!( + micromark("`x\t`"), + "<p><code>x\t</code></p>", + "should support a tab ending code" + ); - // To do: code (text). - // assert_eq!( - // micromark("`\tx\t`"), - // "<p><code>\tx\t</code></p>", - // "should support tabs around code" - // ); + assert_eq!( + micromark("`\tx\t`"), + "<p><code>\tx\t</code></p>", + "should support tabs around code" + ); - // To do: code (text). - // assert_eq!( - // micromark("`\tx `"), - // "<p><code>\tx </code></p>", - // "should support a tab starting, and a space ending, code" - // ); + assert_eq!( + micromark("`\tx `"), + "<p><code>\tx </code></p>", + "should support a tab starting, and a space ending, code" + ); - // To do: code (text). - // assert_eq!( - // micromark("` x\t`"), - // "<p><code> x\t</code></p>", - // "should support a space starting, and a tab ending, code" - // ); + assert_eq!( + micromark("` x\t`"), + "<p><code> x\t</code></p>", + "should support a space starting, and a tab ending, code" + ); // To do: trim trailing whitespace. // // Note: CM does not strip it in this case. |