diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler.rs | 335 | ||||
| -rw-r--r-- | src/content/string.rs | 1 | ||||
| -rw-r--r-- | src/content/text.rs | 1 | ||||
| -rw-r--r-- | src/tokenizer.rs | 28 | 
4 files changed, 176 insertions, 189 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 9f84a38..50c06e1 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -102,8 +102,51 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St          match event.event_type {              EventType::Enter => match token_type { -                TokenType::Paragraph => { -                    buf_tail_mut(buffers).push("<p>".to_string()); +                TokenType::AtxHeading +                | TokenType::AtxHeadingSequence +                | TokenType::AtxHeadingWhitespace +                | TokenType::Autolink +                | TokenType::AutolinkEmail +                | TokenType::AutolinkMarker +                | TokenType::AutolinkProtocol +                | TokenType::BlankLineEnding +                | TokenType::BlankLineWhitespace +                | TokenType::CharacterEscape +                | TokenType::CharacterEscapeMarker +                | TokenType::CharacterEscapeValue +                | TokenType::CharacterReference +                | TokenType::CharacterReferenceMarker +                | TokenType::CharacterReferenceMarkerHexadecimal +                | TokenType::CharacterReferenceMarkerNumeric +                | TokenType::CharacterReferenceMarkerSemi +                | TokenType::CharacterReferenceValue +                | TokenType::CodeIndentedPrefixWhitespace +                | TokenType::CodeFencedFence +                | TokenType::CodeFencedFenceSequence +                | TokenType::CodeFencedFenceWhitespace +                | TokenType::CodeFlowChunk +                | TokenType::CodeTextData +                | TokenType::CodeTextLineEnding +                | TokenType::CodeTextSequence +                | TokenType::Content +                | TokenType::Data +                | TokenType::HardBreakEscape +                | TokenType::HardBreakEscapeMarker +                | TokenType::HardBreakTrailing +                | TokenType::HardBreakTrailingSpace +                | TokenType::HtmlFlowData +                | TokenType::HtmlTextData +                | TokenType::LineEnding +                | TokenType::ThematicBreak +                | TokenType::ThematicBreakSequence +                | TokenType::ThematicBreakWhitespace +                | TokenType::Whitespace => { +                    // Ignore. +                } +                TokenType::AtxHeadingText +                | TokenType::CodeFencedFenceInfo +                | TokenType::CodeFencedFenceMeta => { +                    buffer(buffers);                  }                  TokenType::CodeIndented => {                      code_flow_seen_data = Some(false); @@ -117,9 +160,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                      buf_tail_mut(buffers).push("<pre><code".to_string());                      code_fenced_fences_count = Some(0);                  } -                TokenType::AtxHeadingText -                | TokenType::CodeFencedFenceInfo -                | TokenType::CodeFencedFenceMeta => { +                TokenType::CodeText => { +                    buf_tail_mut(buffers).push("<code>".to_string());                      buffer(buffers);                  }                  TokenType::HtmlFlow => { @@ -133,161 +175,58 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                          ignore_encode = true;                      }                  } -                TokenType::CodeText => { -                    buf_tail_mut(buffers).push("<code>".to_string()); -                    buffer(buffers); +                TokenType::Paragraph => { +                    buf_tail_mut(buffers).push("<p>".to_string());                  } -                TokenType::Content -                | TokenType::AtxHeading -                | TokenType::AtxHeadingSequence -                | TokenType::AtxHeadingWhitespace -                | TokenType::LineEnding -                | TokenType::ThematicBreak -                | TokenType::ThematicBreakSequence -                | TokenType::ThematicBreakWhitespace -                | TokenType::CodeIndentedPrefixWhitespace -                | TokenType::CodeFlowChunk -                | TokenType::BlankLineEnding -                | TokenType::BlankLineWhitespace -                | TokenType::Whitespace -                | TokenType::HardBreakEscape -                | TokenType::HardBreakEscapeMarker -                | TokenType::HardBreakTrailing -                | TokenType::HardBreakTrailingSpace -                | TokenType::HtmlFlowData -                | TokenType::HtmlTextData -                | TokenType::CodeFencedFence -                | TokenType::CodeFencedFenceSequence -                | TokenType::CodeFencedFenceWhitespace -                | TokenType::CodeTextSequence -                | TokenType::CodeTextData -                | TokenType::CodeTextLineEnding -                | TokenType::Data -                | TokenType::CharacterEscape -                | TokenType::CharacterEscapeMarker -                | TokenType::CharacterEscapeValue -                | TokenType::CharacterReference -                | TokenType::CharacterReferenceMarker -                | TokenType::CharacterReferenceMarkerNumeric -                | TokenType::CharacterReferenceMarkerHexadecimal -                | TokenType::CharacterReferenceMarkerSemi -                | TokenType::CharacterReferenceValue -                | TokenType::Autolink -                | TokenType::AutolinkMarker -                | TokenType::AutolinkProtocol -                | TokenType::AutolinkEmail => {}                  #[allow(unreachable_patterns)]                  _ => {                      unreachable!("unhandled `enter` of TokenType {:?}", token_type)                  }              },              EventType::Exit => match token_type { -                TokenType::Content -                | TokenType::ThematicBreakSequence -                | TokenType::ThematicBreakWhitespace -                | TokenType::CodeIndentedPrefixWhitespace +                TokenType::Autolink +                | TokenType::AutolinkMarker                  | TokenType::BlankLineEnding                  | TokenType::BlankLineWhitespace -                | TokenType::Whitespace -                | TokenType::CodeFencedFenceSequence -                | TokenType::CodeFencedFenceWhitespace -                | TokenType::CodeTextSequence                  | TokenType::CharacterEscape                  | TokenType::CharacterEscapeMarker                  | TokenType::CharacterReference                  | TokenType::CharacterReferenceMarkerSemi +                | TokenType::CodeFencedFenceSequence +                | TokenType::CodeFencedFenceWhitespace +                | TokenType::CodeIndentedPrefixWhitespace +                | TokenType::CodeTextSequence +                | TokenType::Content                  | TokenType::HardBreakEscapeMarker                  | TokenType::HardBreakTrailingSpace -                | TokenType::Autolink -                | TokenType::AutolinkMarker => {} -                TokenType::HtmlFlow | TokenType::HtmlText => { -                    ignore_encode = false; +                | TokenType::ThematicBreakSequence +                | TokenType::ThematicBreakWhitespace +                | TokenType::Whitespace => { +                    // Ignore.                  } -                TokenType::HtmlFlowData | TokenType::HtmlTextData => { -                    let slice = serialize(codes, &from_exit_event(events, index), false); - -                    let res = if ignore_encode { slice } else { encode(&slice) }; - +                // Just output it. +                TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {                      // last_was_tag = false; -                    buf_tail_mut(buffers).push(res); -                } -                TokenType::Paragraph => { -                    buf_tail_mut(buffers).push("</p>".to_string()); -                } -                TokenType::HardBreakEscape | TokenType::HardBreakTrailing => { -                    buf_tail_mut(buffers).push("<br />".to_string()); -                } -                TokenType::CodeIndented | TokenType::CodeFenced => { -                    let seen_data = -                        code_flow_seen_data.expect("`code_flow_seen_data` must be defined"); - -                    // To do: containers. -                    // One special case is if we are inside a container, and the fenced code was -                    // not closed (meaning it runs to the end). -                    // In that case, the following line ending, is considered *outside* the -                    // fenced code and block quote by micromark, but CM wants to treat that -                    // ending as part of the code. -                    // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag { -                    //     line_ending(); -                    // } - -                    // But in most cases, it’s simpler: when we’ve seen some data, emit an extra -                    // line ending when needed. -                    if seen_data { -                        line_ending_if_needed(buffers); -                    } - -                    buf_tail_mut(buffers).push("</code></pre>".to_string()); - -                    if let Some(count) = code_fenced_fences_count { -                        if count < 2 { -                            line_ending_if_needed(buffers); -                        } -                    } - -                    code_flow_seen_data = None; -                    code_fenced_fences_count = None; -                    slurp_one_line_ending = false; -                } -                TokenType::CodeFencedFence => { -                    let count = if let Some(count) = code_fenced_fences_count { -                        count -                    } else { -                        0 -                    }; - -                    if count == 0 { -                        buf_tail_mut(buffers).push(">".to_string()); -                        // tag = true; -                        slurp_one_line_ending = true; -                    } - -                    code_fenced_fences_count = Some(count + 1); -                } -                TokenType::CodeFencedFenceInfo => { -                    let value = resume(buffers); -                    buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value)); -                    // tag = true; -                } -                TokenType::CodeFencedFenceMeta => { -                    resume(buffers); -                } -                TokenType::CodeFlowChunk => { -                    code_flow_seen_data = Some(true);                      buf_tail_mut(buffers).push(encode(&serialize(                          codes,                          &from_exit_event(events, index),                          false,                      )));                  } - +                TokenType::AtxHeading => { +                    let rank = atx_opening_sequence_size +                        .expect("`atx_opening_sequence_size` must be set in headings"); +                    buf_tail_mut(buffers).push(format!("</h{}>", rank)); +                    atx_opening_sequence_size = None; +                    atx_heading_buffer = None; +                }                  // `AtxHeadingWhitespace` is ignored after the opening sequence,                  // before the closing sequence, and after the closing sequence.                  // But it is used around intermediate sequences.                  // `atx_heading_buffer` is set to `Some` by the first `AtxHeadingText`.                  // `AtxHeadingSequence` is ignored as the opening and closing sequence,                  // but not when intermediate. -                TokenType::AtxHeadingWhitespace | TokenType::AtxHeadingSequence => { +                TokenType::AtxHeadingSequence | TokenType::AtxHeadingWhitespace => {                      if let Some(buf) = atx_heading_buffer {                          atx_heading_buffer = Some(                              buf.to_string() @@ -320,50 +259,26 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                      buf_tail_mut(buffers).push(encode(&result));                  } -                TokenType::AtxHeading => { -                    let rank = atx_opening_sequence_size -                        .expect("`atx_opening_sequence_size` must be set in headings"); -                    buf_tail_mut(buffers).push(format!("</h{}>", rank)); -                    atx_opening_sequence_size = None; -                    atx_heading_buffer = None; -                } -                TokenType::AutolinkProtocol => { +                TokenType::AutolinkEmail => {                      let slice = serialize(codes, &from_exit_event(events, index), false);                      let buf = buf_tail_mut(buffers);                      buf.push(format!( -                        "<a href=\"{}\">", +                        "<a href=\"mailto:{}\">",                          sanitize_uri(slice.as_str(), &protocol_href)                      ));                      buf.push(encode(&slice));                      buf.push("</a>".to_string());                  } -                TokenType::AutolinkEmail => { +                TokenType::AutolinkProtocol => {                      let slice = serialize(codes, &from_exit_event(events, index), false);                      let buf = buf_tail_mut(buffers);                      buf.push(format!( -                        "<a href=\"mailto:{}\">", +                        "<a href=\"{}\">",                          sanitize_uri(slice.as_str(), &protocol_href)                      ));                      buf.push(encode(&slice));                      buf.push("</a>".to_string());                  } -                TokenType::ThematicBreak => { -                    buf_tail_mut(buffers).push("<hr />".to_string()); -                } -                TokenType::LineEnding => { -                    // if slurp_all_line_endings { -                    //     // Empty. -                    // } else -                    if slurp_one_line_ending { -                        slurp_one_line_ending = false; -                    } else { -                        buf_tail_mut(buffers).push(encode(&serialize( -                            codes, -                            &from_exit_event(events, index), -                            false, -                        ))); -                    } -                }                  TokenType::CharacterReferenceMarker => {                      character_reference_kind = Some(CharacterReferenceKind::Named);                  } @@ -389,9 +304,71 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                      };                      buf_tail_mut(buffers).push(encode(&value)); -                      character_reference_kind = None;                  } +                TokenType::CodeFenced | TokenType::CodeIndented => { +                    let seen_data = +                        code_flow_seen_data.expect("`code_flow_seen_data` must be defined"); + +                    // To do: containers. +                    // One special case is if we are inside a container, and the fenced code was +                    // not closed (meaning it runs to the end). +                    // In that case, the following line ending, is considered *outside* the +                    // fenced code and block quote by micromark, but CM wants to treat that +                    // ending as part of the code. +                    // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag { +                    //     line_ending(); +                    // } + +                    // But in most cases, it’s simpler: when we’ve seen some data, emit an extra +                    // line ending when needed. +                    if seen_data { +                        line_ending_if_needed(buffers); +                    } + +                    buf_tail_mut(buffers).push("</code></pre>".to_string()); + +                    if let Some(count) = code_fenced_fences_count { +                        if count < 2 { +                            line_ending_if_needed(buffers); +                        } +                    } + +                    code_flow_seen_data = None; +                    code_fenced_fences_count = None; +                    slurp_one_line_ending = false; +                } +                TokenType::CodeFencedFence => { +                    let count = if let Some(count) = code_fenced_fences_count { +                        count +                    } else { +                        0 +                    }; + +                    if count == 0 { +                        buf_tail_mut(buffers).push(">".to_string()); +                        // tag = true; +                        slurp_one_line_ending = true; +                    } + +                    code_fenced_fences_count = Some(count + 1); +                } +                TokenType::CodeFencedFenceInfo => { +                    let value = resume(buffers); +                    buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value)); +                    // tag = true; +                } +                TokenType::CodeFencedFenceMeta => { +                    resume(buffers); +                } +                TokenType::CodeFlowChunk => { +                    code_flow_seen_data = Some(true); +                    buf_tail_mut(buffers).push(encode(&serialize( +                        codes, +                        &from_exit_event(events, index), +                        false, +                    ))); +                }                  TokenType::CodeText => {                      let result = resume(buffers);                      let mut chars = result.chars(); @@ -417,14 +394,38 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  TokenType::CodeTextLineEnding => {                      buf_tail_mut(buffers).push(" ".to_string());                  } -                // This branch below currently acts as the resulting `data` tokens. -                TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => { + +                TokenType::HardBreakEscape | TokenType::HardBreakTrailing => { +                    buf_tail_mut(buffers).push("<br />".to_string()); +                } + +                TokenType::HtmlFlow | TokenType::HtmlText => { +                    ignore_encode = false; +                } +                TokenType::HtmlFlowData | TokenType::HtmlTextData => { +                    let slice = serialize(codes, &from_exit_event(events, index), false);                      // last_was_tag = false; -                    buf_tail_mut(buffers).push(encode(&serialize( -                        codes, -                        &from_exit_event(events, index), -                        false, -                    ))); +                    buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) }); +                } +                TokenType::LineEnding => { +                    // if slurp_all_line_endings { +                    //     // Empty. +                    // } else +                    if slurp_one_line_ending { +                        slurp_one_line_ending = false; +                    } else { +                        buf_tail_mut(buffers).push(encode(&serialize( +                            codes, +                            &from_exit_event(events, index), +                            false, +                        ))); +                    } +                } +                TokenType::Paragraph => { +                    buf_tail_mut(buffers).push("</p>".to_string()); +                } +                TokenType::ThematicBreak => { +                    buf_tail_mut(buffers).push("<hr />".to_string());                  }                  #[allow(unreachable_patterns)]                  _ => { diff --git a/src/content/string.rs b/src/content/string.rs index 2723785..25d8582 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -55,6 +55,7 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code { +        // To do: line endings.          Code::None => {              tokenizer.exit(TokenType::Data);              (State::Ok, None) diff --git a/src/content/text.rs b/src/content/text.rs index f61b390..6a30d4c 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -11,7 +11,6 @@  //! *   [Hard break (escape)][crate::construct::hard_break_escape]  //! *   [Hard break (trailing)][crate::construct::hard_break_trailing]  //! *   [Code (text)][crate::construct::code_text] -//! *   Line ending  //! *   Label start (image)  //! *   Label start (link)  //! *   [Character escape][crate::construct::character_escape] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index da45ee5..0aae480 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -24,66 +24,52 @@ pub enum TokenType {      AutolinkMarker,      AutolinkProtocol,      AutolinkEmail, -      AtxHeading,      AtxHeadingSequence,      AtxHeadingWhitespace,      AtxHeadingText, - +    BlankLineEnding, +    BlankLineWhitespace,      CharacterEscape,      CharacterEscapeMarker,      CharacterEscapeValue, -      CharacterReference,      CharacterReferenceMarker,      CharacterReferenceMarkerNumeric,      CharacterReferenceMarkerHexadecimal,      CharacterReferenceMarkerSemi,      CharacterReferenceValue, -      CodeFenced,      CodeFencedFence,      CodeFencedFenceSequence,      CodeFencedFenceWhitespace,      CodeFencedFenceInfo,      CodeFencedFenceMeta, - +    CodeFlowChunk,      CodeIndented,      CodeIndentedPrefixWhitespace, -      CodeText,      CodeTextSequence,      CodeTextLineEnding,      CodeTextData, - -    CodeFlowChunk, - +    Content,      Data, -      HardBreakEscape,      HardBreakEscapeMarker,      HardBreakTrailing,      HardBreakTrailingSpace, -      HtmlFlow,      HtmlFlowData, -      HtmlText,      HtmlTextData, - +    LineEnding, +    Paragraph,      ThematicBreak,      ThematicBreakSequence,      ThematicBreakWhitespace, -      Whitespace, -    LineEnding, -    BlankLineEnding, -    BlankLineWhitespace, - -    Content, - -    Paragraph, +    // Chunks are tokenizer, but unraveled by `subtokenize`.      ChunkContent,      ChunkString,      ChunkText, | 
