diff options
| author | 2022-07-04 12:16:51 +0200 | |
|---|---|---|
| committer | 2022-07-04 12:16:58 +0200 | |
| commit | faca28020f4894bdfcf5a4b164ebbc75864d8776 (patch) | |
| tree | 93377413ae8c355e2d804f7e700241693b228e70 /src | |
| parent | e1cae8c705e66669d043f5269e9f58c09c7b0eaa (diff) | |
| download | markdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.tar.gz markdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.tar.bz2 markdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.zip  | |
Add support for attention (emphasis, strong)
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler.rs | 24 | ||||
| -rw-r--r-- | src/construct/attention.rs | 401 | ||||
| -rw-r--r-- | src/construct/mod.rs | 3 | ||||
| -rw-r--r-- | src/construct/partial_label.rs | 5 | ||||
| -rw-r--r-- | src/content/text.rs | 18 | ||||
| -rw-r--r-- | src/tokenizer.rs | 9 | 
6 files changed, 449 insertions, 11 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 1f16648..061d3e3 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -421,6 +421,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      enter_map.insert(TokenType::CodeIndented, on_enter_code_indented);      enter_map.insert(TokenType::CodeFenced, on_enter_code_fenced);      enter_map.insert(TokenType::CodeText, on_enter_code_text); +    enter_map.insert(TokenType::Emphasis, on_enter_emphasis);      enter_map.insert(TokenType::HtmlFlow, on_enter_html_flow);      enter_map.insert(TokenType::HtmlText, on_enter_html_text);      enter_map.insert(TokenType::Image, on_enter_image); @@ -431,6 +432,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {          on_enter_resource_destination_string,      );      enter_map.insert(TokenType::Paragraph, on_enter_paragraph); +    enter_map.insert(TokenType::Strong, on_enter_strong);      enter_map.insert(TokenType::Definition, on_enter_definition);      enter_map.insert(          TokenType::DefinitionDestinationString, @@ -441,6 +443,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer);      let mut exit_map: Map = HashMap::new(); +    exit_map.insert(TokenType::Emphasis, on_exit_emphasis);      exit_map.insert(TokenType::Label, on_exit_label);      exit_map.insert(TokenType::LabelText, on_exit_label_text);      exit_map.insert(TokenType::ReferenceString, on_exit_reference_string); @@ -452,6 +455,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {          TokenType::ResourceTitleString,          on_exit_resource_title_string,      ); +    exit_map.insert(TokenType::Strong, on_exit_strong);      exit_map.insert(TokenType::Image, on_exit_media);      exit_map.insert(TokenType::Link, on_exit_media);      exit_map.insert(TokenType::CodeTextData, on_exit_data); @@ -644,6 +648,11 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) {      context.ignore_encode = true;  } +/// Handle [`Enter`][EventType::Enter]:[`Emphasis`][TokenType::Emphasis]. +fn on_enter_emphasis(context: &mut CompileContext) { +    context.tag("<em>".to_string()); +} +  /// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][TokenType::HtmlFlow].  fn on_enter_html_flow(context: &mut CompileContext) {      context.line_ending_if_needed(); @@ -704,6 +713,11 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) {      context.ignore_encode = true;  } +/// Handle [`Enter`][EventType::Enter]:[`Strong`][TokenType::Strong]. +fn on_enter_strong(context: &mut CompileContext) { +    context.tag("<strong>".to_string()); +} +  /// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][TokenType::AutolinkEmail].  fn on_exit_autolink_email(context: &mut CompileContext) {      let slice = serialize( @@ -933,6 +947,11 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {      definition.title = Some(buf);  } +/// Handle [`Exit`][EventType::Exit]:[`Strong`][TokenType::Emphasis]. +fn on_exit_emphasis(context: &mut CompileContext) { +    context.tag("</em>".to_string()); +} +  /// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][TokenType::HeadingAtx].  fn on_exit_heading_atx(context: &mut CompileContext) {      let rank = context @@ -1132,6 +1151,11 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {      media.title = Some(buf);  } +/// Handle [`Exit`][EventType::Exit]:[`Strong`][TokenType::Strong]. +fn on_exit_strong(context: &mut CompileContext) { +    context.tag("</strong>".to_string()); +} +  /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][TokenType::ThematicBreak].  fn on_exit_thematic_break(context: &mut CompileContext) {      context.tag("<hr />".to_string()); diff --git a/src/construct/attention.rs b/src/construct/attention.rs new file mode 100644 index 0000000..f022e6e --- /dev/null +++ b/src/construct/attention.rs @@ -0,0 +1,401 @@ +//! To do. + +use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::edit_map::EditMap; + +/// To do +#[derive(Debug, PartialEq)] +enum GroupKind { +    Whitespace, +    Punctuation, +    Other, +} + +/// To do +#[derive(Debug, PartialEq)] +enum MarkerKind { +    Asterisk, +    Underscore, +} + +impl MarkerKind { +    fn from_char(char: char) -> MarkerKind { +        match char { +            '*' => MarkerKind::Asterisk, +            '_' => MarkerKind::Underscore, +            _ => unreachable!("invalid char"), +        } +    } +    fn from_code(code: Code) -> MarkerKind { +        match code { +            Code::Char(char) => MarkerKind::from_char(char), +            _ => unreachable!("invalid code"), +        } +    } +} + +/// To do +#[derive(Debug)] +struct Run { +    marker: MarkerKind, +    event_index: usize, +    start_point: Point, +    start_index: usize, +    end_point: Point, +    end_index: usize, +    size: usize, +    open: bool, +    close: bool, +} + +/// Before a paragraph. +/// +/// ```markdown +/// |qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char(char) if char == '*' || char == '_' => { +            tokenizer.enter(TokenType::AttentionSequence); +            inside(tokenizer, code, char) +        } +        _ => (State::Nok, None), +    } +} + +/// In a paragraph. +/// +/// ```markdown +/// al|pha +/// ``` +fn inside(tokenizer: &mut Tokenizer, code: Code, marker: char) -> StateFnResult { +    match code { +        Code::Char(char) if char == marker => { +            tokenizer.consume(code); +            (State::Fn(Box::new(move |t, c| inside(t, c, marker))), None) +        } +        _ => { +            tokenizer.exit(TokenType::AttentionSequence); +            tokenizer.register_resolver("attention".to_string(), Box::new(resolve)); +            (State::Ok, Some(vec![code])) +        } +    } +} + +/// To do. +#[allow(clippy::too_many_lines)] +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +    let mut index = 0; +    println!("before: {:?}", tokenizer.events.len()); +    while index < tokenizer.events.len() { +        let event = &tokenizer.events[index]; +        println!( +            "ev: {:?} {:?} {:?} {:?} {:?} {:?}", +            index, +            event.event_type, +            event.token_type, +            event.content_type, +            event.previous, +            event.next +        ); +        index += 1; +    } + +    let codes = &tokenizer.parse_state.codes; +    let mut edit_map = EditMap::new(); +    let mut start = 0; +    let mut runs: Vec<Run> = vec![]; + +    // Find runs of sequences and information about them. +    while start < tokenizer.events.len() { +        let enter = &tokenizer.events[start]; + +        if enter.event_type == EventType::Enter && enter.token_type == TokenType::AttentionSequence +        { +            let end = start + 1; +            let exit = &tokenizer.events[end]; +            let marker = MarkerKind::from_code(codes[enter.index]); +            let before = classify_character(if enter.index > 0 { +                codes[enter.index - 1] +            } else { +                Code::None +            }); +            let after = classify_character(if exit.index < codes.len() { +                codes[exit.index] +            } else { +                Code::None +            }); +            let open = after == GroupKind::Other +                || (after == GroupKind::Punctuation && before != GroupKind::Other); +            // To do: GFM strikethrough? +            // || attentionMarkers.includes(code) +            let close = before == GroupKind::Other +                || (before == GroupKind::Punctuation && after != GroupKind::Other); +            // To do: GFM strikethrough? +            // || attentionMarkers.includes(previous) + +            runs.push(Run { +                event_index: start, +                start_point: enter.point.clone(), +                start_index: enter.index, +                end_point: exit.point.clone(), +                end_index: exit.index, +                size: exit.index - enter.index, +                open: if marker == MarkerKind::Asterisk { +                    open +                } else { +                    open && (before != GroupKind::Other || !close) +                }, +                close: if marker == MarkerKind::Asterisk { +                    close +                } else { +                    close && (after != GroupKind::Other || !open) +                }, +                marker, +            }); + +            start += 1; +        } + +        start += 1; +    } + +    // Walk through runs and match them. +    let mut close = 0; + +    while close < runs.len() { +        let run_close = &runs[close]; + +        // Find a run that can close. +        if run_close.close { +            let mut open = close; + +            // Now walk back to find an opener. +            while open > 0 { +                open -= 1; + +                let run_open = &runs[open]; + +                // Find a token that can open the closer. +                if run_open.open && run_close.marker == run_open.marker { +                    // If the opening can close or the closing can open, +                    // and the close size *is not* a multiple of three, +                    // but the sum of the opening and closing size *is* +                    // multiple of three, then **don’t** match. +                    if (run_open.close || run_close.open) +                        && run_close.size % 3 != 0 +                        && (run_open.size + run_close.size) % 3 == 0 +                    { +                        continue; +                    } + +                    // Number of markers to use from the sequence. +                    let take = if run_open.size > 1 && run_close.size > 1 { +                        2 +                    } else { +                        1 +                    }; + +                    let run_close = &mut runs[close]; +                    let close_event_index = run_close.event_index; +                    let seq_close_enter = (run_close.start_point.clone(), run_close.start_index); +                    run_close.size -= take; +                    run_close.start_point.column += take; +                    run_close.start_point.offset += take; +                    let seq_close_exit = (run_close.start_point.clone(), run_close.start_index); + +                    // Remove closing run if fully used. +                    if run_close.size == 0 { +                        runs.remove(close); +                        edit_map.add(close_event_index, 2, vec![]); +                    } + +                    let run_open = &mut runs[open]; +                    let open_event_index = run_open.event_index; +                    let seq_open_exit = (run_open.end_point.clone(), run_open.end_index); +                    run_open.size -= take; +                    run_open.end_point.column -= take; +                    run_open.end_point.offset -= take; +                    let seq_open_enter = (run_open.end_point.clone(), run_open.end_index); + +                    // Remove opening run if fully used. +                    if run_open.size == 0 { +                        runs.remove(open); +                        edit_map.add(open_event_index, 2, vec![]); +                    } + +                    // Opening. +                    edit_map.add( +                        open_event_index, +                        0, +                        vec![ +                            Event { +                                event_type: EventType::Enter, +                                token_type: if take == 1 { +                                    TokenType::Emphasis +                                } else { +                                    TokenType::Strong +                                }, +                                point: seq_open_enter.0.clone(), +                                index: seq_open_enter.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Enter, +                                token_type: if take == 1 { +                                    TokenType::EmphasisSequence +                                } else { +                                    TokenType::StrongSequence +                                }, +                                point: seq_open_enter.0.clone(), +                                index: seq_open_enter.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Exit, +                                token_type: if take == 1 { +                                    TokenType::EmphasisSequence +                                } else { +                                    TokenType::StrongSequence +                                }, +                                point: seq_open_exit.0.clone(), +                                index: seq_open_exit.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Enter, +                                token_type: if take == 1 { +                                    TokenType::EmphasisText +                                } else { +                                    TokenType::StrongText +                                }, +                                point: seq_open_exit.0.clone(), +                                index: seq_open_exit.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                        ], +                    ); +                    // Closing. +                    edit_map.add( +                        close_event_index, +                        0, +                        vec![ +                            Event { +                                event_type: EventType::Exit, +                                token_type: if take == 1 { +                                    TokenType::EmphasisText +                                } else { +                                    TokenType::StrongText +                                }, +                                point: seq_close_enter.0.clone(), +                                index: seq_close_enter.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Enter, +                                token_type: if take == 1 { +                                    TokenType::EmphasisSequence +                                } else { +                                    TokenType::StrongSequence +                                }, +                                point: seq_close_enter.0.clone(), +                                index: seq_close_enter.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Exit, +                                token_type: if take == 1 { +                                    TokenType::EmphasisSequence +                                } else { +                                    TokenType::StrongSequence +                                }, +                                point: seq_close_exit.0.clone(), +                                index: seq_close_exit.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                            Event { +                                event_type: EventType::Exit, +                                token_type: if take == 1 { +                                    TokenType::Emphasis +                                } else { +                                    TokenType::Strong +                                }, +                                point: seq_close_exit.0.clone(), +                                index: seq_close_exit.1, +                                previous: None, +                                next: None, +                                content_type: None, +                            }, +                        ], +                    ); + +                    break; +                } +            } +        } + +        close += 1; +    } + +    // Mark remaining sequences as data. +    let mut index = 0; +    while index < runs.len() { +        let run = &runs[index]; +        // To do: resize! +        tokenizer.events[run.event_index].token_type = TokenType::Data; +        tokenizer.events[run.event_index + 1].token_type = TokenType::Data; + +        index += 1; +    } + +    let events = edit_map.consume(&mut tokenizer.events); +    let mut index = 0; +    println!("after: {:?}", events.len()); +    while index < events.len() { +        let event = &events[index]; +        println!( +            "ev: {:?} {:?} {:?} {:?} {:?} {:?}", +            index, +            event.event_type, +            event.token_type, +            event.content_type, +            event.previous, +            event.next +        ); +        index += 1; +    } + +    events +} + +fn classify_character(code: Code) -> GroupKind { +    match code { +        // Markdown whitespace. +        Code::None +        | Code::CarriageReturnLineFeed +        | Code::VirtualSpace +        | Code::Char('\t' | '\r' | '\n' | ' ') => GroupKind::Whitespace, +        // Unicode whitespace. +        Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace, +        // Unicode punctuation. +        // To do: `is_punctuation` is not in rust? Why not? +        // Perhaps we need to generate stuff just like: +        // <https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/lib/unicode-punctuation-regex.js>. +        Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation, +        Code::Char(_) => GroupKind::Other, +    } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 9e3dfb0..66b2a3c 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -14,7 +14,7 @@  //!  //! The following constructs are found in markdown:  //! -//! *   attention (strong, emphasis) +//! *   [attention (strong, emphasis)][attention]  //! *   [autolink][]  //! *   [blank line][blank_line]  //! *   block quote @@ -61,6 +61,7 @@  //! example `ascii_punctuation` refers to  //! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. +pub mod attention;  pub mod autolink;  pub mod blank_line;  pub mod character_escape; diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index e505997..32182d6 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -41,7 +41,7 @@  //! > ([label start (image)][label_start_image] or  //! > [label start (link)][label_start_link]) and a closing  //! > ([label end][label_end]), so as to allow further phrasing such as -//! > [code (text)][code_text] or attention. +//! > [code (text)][code_text] or [attention][].  //!  //! ## References  //! @@ -49,6 +49,7 @@  //!  //! [definition]: crate::construct::definition  //! [string]: crate::content::string +//! [attention]: crate::construct::attention  //! [character_escape]: crate::construct::character_escape  //! [character_reference]: crate::construct::character_reference  //! [label_start_image]: crate::construct::label_start_image @@ -56,8 +57,6 @@  //! [label_end]: crate::construct::label_end  //! [code_text]: crate::construct::code_text  //! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX -//! -//! <!-- To do: link attention. -->  use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};  use crate::constant::LINK_REFERENCE_SIZE_MAX; diff --git a/src/content/text.rs b/src/content/text.rs index c3f4e1b..ecb6ae1 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -1,12 +1,13 @@  //! The text content type.  //! -//! **Text** contains phrasing content such as attention (emphasis, strong), -//! media (links, images), and actual text. +//! **Text** contains phrasing content such as +//! [attention][crate::construct::attention] (emphasis, strong), +//! [code (text)][crate::construct::code_text], and actual text.  //!  //! The constructs found in text are:  //! +//! *   [Attention][crate::construct::attention]  //! *   [Autolink][crate::construct::autolink] -//! *   Attention  //! *   [HTML (text)][crate::construct::html_text]  //! *   [Hard break (escape)][crate::construct::hard_break_escape]  //! *   [Hard break (trailing)][crate::construct::hard_break_trailing] @@ -18,9 +19,9 @@  //! *   [Character reference][crate::construct::character_reference]  use crate::construct::{ -    autolink::start as autolink, character_escape::start as character_escape, -    character_reference::start as character_reference, code_text::start as code_text, -    hard_break_escape::start as hard_break_escape, +    attention::start as attention, autolink::start as autolink, +    character_escape::start as character_escape, character_reference::start as character_reference, +    code_text::start as code_text, hard_break_escape::start as hard_break_escape,      hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,      label_end::start as label_end, label_start_image::start as label_start_image,      label_start_link::start as label_start_link, partial_data::start as data, @@ -28,16 +29,18 @@ use crate::construct::{  };  use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; -const MARKERS: [Code; 10] = [ +const MARKERS: [Code; 12] = [      Code::VirtualSpace, // `whitespace`      Code::Char('\t'),   // `whitespace`      Code::Char(' '),    // `hard_break_trailing`, `whitespace`      Code::Char('!'),    // `label_start_image`      Code::Char('&'),    // `character_reference` +    Code::Char('*'),    // `attention`      Code::Char('<'),    // `autolink`, `html_text`      Code::Char('['),    // `label_start_link`      Code::Char('\\'),   // `character_escape`, `hard_break_escape`      Code::Char(']'),    // `label_end` +    Code::Char('_'),    // `attention`      Code::Char('`'),    // `code_text`  ]; @@ -55,6 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          Code::None => (State::Ok, None),          _ => tokenizer.attempt_n(              vec