diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 82 | ||||
| -rw-r--r-- | src/constant.rs | 4 | ||||
| -rw-r--r-- | src/construct/list.rs | 195 | ||||
| -rw-r--r-- | src/construct/mod.rs | 4 | ||||
| -rw-r--r-- | src/content/document.rs | 31 | ||||
| -rw-r--r-- | src/token.rs | 7 | 
6 files changed, 308 insertions, 15 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 8a28654..753d85f 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -235,6 +235,7 @@ struct CompileContext<'a> {      pub code_flow_seen_data: Option<bool>,      pub code_fenced_fences_count: Option<usize>,      pub character_reference_kind: Option<CharacterReferenceKind>, +    pub expect_first_item: Option<bool>,      pub media_stack: Vec<Media>,      pub definitions: HashMap<String, Definition>,      pub tight_stack: Vec<bool>, @@ -269,6 +270,7 @@ impl<'a> CompileContext<'a> {              code_flow_seen_data: None,              code_fenced_fences_count: None,              character_reference_kind: None, +            expect_first_item: None,              media_stack: vec![],              definitions: HashMap::new(),              tight_stack: vec![], @@ -379,7 +381,7 @@ impl<'a> CompileContext<'a> {  /// Turn events and codes into a string of HTML.  #[allow(clippy::too_many_lines)]  pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { -    // let mut slurp_all_line_endings = false; +    // let slurp_all_line_endings = false;      let mut index = 0;      let mut line_ending_inferred: Option<LineEnding> = None; @@ -443,6 +445,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      enter_map.insert(Token::ResourceTitleString, on_enter_buffer);      enter_map.insert(Token::Strong, on_enter_strong); +    // To do: sort. +    enter_map.insert(Token::ListItemMarker, on_enter_list_item_marker); +    enter_map.insert(Token::List, on_enter_list); +      let mut exit_map: Map = HashMap::new();      exit_map.insert(Token::AutolinkEmail, on_exit_autolink_email);      exit_map.insert(Token::AutolinkProtocol, on_exit_autolink_protocol); @@ -488,7 +494,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {          on_exit_definition_title_string,      );      exit_map.insert(Token::Emphasis, on_exit_emphasis); -      exit_map.insert(Token::HardBreakEscape, on_exit_break);      exit_map.insert(Token::HardBreakTrailing, on_exit_break);      exit_map.insert(Token::HeadingAtx, on_exit_heading_atx); @@ -519,6 +524,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      exit_map.insert(Token::Strong, on_exit_strong);      exit_map.insert(Token::ThematicBreak, on_exit_thematic_break); +    // To do: sort. +    exit_map.insert(Token::List, on_exit_list); +    exit_map.insert(Token::ListItemValue, on_exit_list_item_value); +      // Handle one event.      let handle = |context: &mut CompileContext, index: usize| {          let event = &events[index]; @@ -766,7 +775,7 @@ fn on_exit_block_quote(context: &mut CompileContext) {      context.tight_stack.pop();      context.line_ending_if_needed();      context.tag("</blockquote>".to_string()); -    // let mut slurp_all_line_endings = false; +    // context.slurp_all_line_endings = false;  }  /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker]. @@ -1056,7 +1065,7 @@ fn on_exit_label_text(context: &mut CompileContext) {  /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].  fn on_exit_line_ending(context: &mut CompileContext) { -    // if slurp_all_line_endings { +    // if context.slurp_all_line_endings {      //     // Empty.      // } else      if context.slurp_one_line_ending { @@ -1176,3 +1185,68 @@ fn on_exit_thematic_break(context: &mut CompileContext) {      context.line_ending_if_needed();      context.tag("<hr />".to_string());  } + +// To do: sort. +/// To do +fn on_enter_list_item_marker(context: &mut CompileContext) { +    let expect_first_item = context.expect_first_item.take().unwrap(); + +    if expect_first_item { +        context.tag(">".to_string()); +    } else { +        on_exit_list_item(context); +    } + +    context.line_ending_if_needed(); +    context.tag("<li>".to_string()); +    context.expect_first_item = Some(false); +    // “Hack” to prevent a line ending from showing up if the item is empty. +    context.last_was_tag = false; +} + +/// To do (onenterlist{un,}ordered) +fn on_enter_list(context: &mut CompileContext) { +    // To do: !token._loose +    context.tight_stack.push(false); +    context.line_ending_if_needed(); +    context.tag("<ol".to_string()); // To do: `ol` / `ul`. +    context.expect_first_item = Some(true); +} + +/// To do +fn on_exit_list_item_value(context: &mut CompileContext) { +    let expect_first_item = context.expect_first_item.unwrap(); + +    if expect_first_item { +        let slice = serialize( +            context.codes, +            &from_exit_event(context.events, context.index), +            false, +        ); +        let value = slice.parse::<u32>().ok().unwrap(); + +        if value != 1 { +            context.tag(format!(" start=\"{}\"", encode(&value.to_string()))); +        } +    } +} + +/// To do. +/// Note: there is no actual `Token::ListItem`. +fn on_exit_list_item(context: &mut CompileContext) { +    //  && !context.slurp_all_line_endings +    if context.last_was_tag { +        context.line_ending_if_needed(); +    } + +    context.tag("</li>".to_string()); // To do: `ol` / `ul`. +    // context.slurp_all_line_endings = false; +} + +/// To do. +fn on_exit_list(context: &mut CompileContext) { +    on_exit_list_item(context); +    context.tight_stack.pop(); +    context.line_ending(); +    context.tag("</ol>".to_string()); // To do: `ol` / `ul`. +} diff --git a/src/constant.rs b/src/constant.rs index fc74316..5dd2435 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -193,6 +193,10 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;  /// To safeguard performance, labels are capped at a large number: `999`.  pub const LINK_REFERENCE_SIZE_MAX: usize = 999; +/// To do. +/// See: <https://spec.commonmark.org/0.30/#ordered-list-marker> +pub const LIST_ITEM_VALUE_SIZE_MAX: usize = 10; +  /// Maximum allowed unbalanced parens in destination.  ///  /// There can be many balanced parens, but if there are 33 opens that were not diff --git a/src/construct/list.rs b/src/construct/list.rs new file mode 100644 index 0000000..96b2496 --- /dev/null +++ b/src/construct/list.rs @@ -0,0 +1,195 @@ +//! To do. + +use crate::constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE}; +use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::token::Token; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// Type of title. +#[derive(Debug, PartialEq)] +enum Kind { +    /// In a dot (`.`) list. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// 1. a +    /// ``` +    Dot, +    /// In a paren (`)`) list. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// 1) a +    /// ``` +    Paren, +    /// In an asterisk (`*`) list. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// * a +    /// ``` +    Asterisk, +    /// In a plus (`+`) list. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// + a +    /// ``` +    Plus, +    /// In a dash (`-`) list. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// - a +    /// ``` +    Dash, +} + +impl Kind { +    /// Turn the kind into a [char]. +    fn as_char(&self) -> char { +        match self { +            Kind::Dot => '.', +            Kind::Paren => ')', +            Kind::Asterisk => '*', +            Kind::Plus => '+', +            Kind::Dash => '-', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`. +    fn from_char(char: char) -> Kind { +        match char { +            '.' => Kind::Dot, +            ')' => Kind::Paren, +            '*' => Kind::Asterisk, +            '+' => Kind::Plus, +            '-' => Kind::Dash, +            _ => unreachable!("invalid char"), +        } +    } +    /// Turn [Code] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `code` is not `Code::Char('.' | ')' | '*' | '+' | '-')`. +    fn from_code(code: Code) -> Kind { +        match code { +            Code::Char(char) => Kind::from_char(char), +            _ => unreachable!("invalid code"), +        } +    } +} + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) +} + +/// To do. +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        // Unordered. +        Code::Char('*' | '+' | '-') => { +            // To do: check if this is a thematic break? +            tokenizer.enter(Token::List); +            tokenizer.enter(Token::ListItemPrefix); +            marker(tokenizer, code) +        } +        // Ordered. +        Code::Char(char) if char.is_ascii_digit() => { +            tokenizer.enter(Token::List); +            tokenizer.enter(Token::ListItemPrefix); +            tokenizer.enter(Token::ListItemValue); +            // To do: `interrupt || !1`? +            inside(tokenizer, code, 0) +        } +        _ => (State::Nok, None), +    } +} + +/// To do. +fn inside(tokenizer: &mut Tokenizer, code: Code, mut size: usize) -> StateFnResult { +    match code { +        Code::Char(char) if char.is_ascii_digit() && size < LIST_ITEM_VALUE_SIZE_MAX => { +            tokenizer.consume(code); +            size += 1; +            (State::Fn(Box::new(move |t, c| inside(t, c, size))), None) +        } +        // To do: `(!self.interrupt || size < 2)` +        Code::Char('.' | ')') => { +            tokenizer.exit(Token::ListItemValue); +            marker(tokenizer, code) +        } +        _ => (State::Nok, None), +    } +} + +/// To do. +fn marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    let kind = Kind::from_code(code); +    println!("list item kind: {:?}", kind); +    tokenizer.enter(Token::ListItemMarker); +    tokenizer.consume(code); +    tokenizer.exit(Token::ListItemMarker); +    // To do: check blank line, if true `State::Nok` else `on_blank`. +    (State::Fn(Box::new(marker_after)), None) +} + +/// To do. +fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    tokenizer.attempt(list_item_prefix_whitespace, |ok| { +        let func = if ok { prefix_end } else { prefix_other }; +        Box::new(func) +    })(tokenizer, code) +} + +// To do: `on_blank`. + +/// To do. +fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::VirtualSpace | Code::Char('\t' | ' ') => { +            tokenizer.enter(Token::SpaceOrTab); +            tokenizer.consume(code); +            tokenizer.exit(Token::SpaceOrTab); +            (State::Fn(Box::new(prefix_end)), None) +        } +        _ => (State::Nok, None), +    } +} + +/// To do. +fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: calculate size. +    tokenizer.exit(Token::ListItemPrefix); +    (State::Ok, Some(vec![code])) +} + +/// To do. +fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: check how big this should be? +    tokenizer.go( +        space_or_tab_min_max(1, TAB_SIZE - 1), +        list_item_prefix_whitespace_after, +    )(tokenizer, code) +} + +fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: check some stuff? +    (State::Ok, Some(vec![code])) +} + +/// End of a block quote. +pub fn end() -> Vec<Token> { +    vec![Token::List] +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 06ff4e9..be9dfe3 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -33,7 +33,7 @@  //! *   [label end][label_end]  //! *   [label start (image)][label_start_image]  //! *   [label start (link)][label_start_link] -//! *   list +//! *   [list][]  //! *   [paragraph][]  //! *   [thematic break][thematic_break]  //! @@ -42,6 +42,7 @@  //! *   [data][partial_data]  //! *   [destination][partial_destination]  //! *   [label][partial_label] +//! *   [non lazy continuation][partial_non_lazy_continuation]  //! *   [space or tab][partial_space_or_tab]  //! *   [title][partial_title]  //! *   [whitespace][partial_whitespace] @@ -80,6 +81,7 @@ pub mod html_text;  pub mod label_end;  pub mod label_start_image;  pub mod label_start_link; +pub mod list;  pub mod paragraph;  pub mod partial_data;  pub mod partial_destination; diff --git a/src/content/document.rs b/src/content/document.rs index a8ff775..e32534e 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -8,8 +8,9 @@  //! *   [Block quote][crate::construct::block_quote]  //! *   List -use crate::construct::block_quote::{ -    cont as block_quote_cont, end as block_quote_end, start as block_quote, +use crate::construct::{ +    block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote}, +    list::{end as list_end, start as list},  };  use crate::content::flow::start as flow;  use crate::parser::ParseState; @@ -96,9 +97,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR      // continuation line.      if info.continued < info.stack.len() {          let name = &info.stack[info.continued]; -        // To do: list.          let cont = if name == "blockquote" {              block_quote_cont +        } else if name == "list" { +            unreachable!("todo: list cont {:?}", name)          } else {              unreachable!("todo: cont construct {:?}", name)          }; @@ -157,12 +159,19 @@ fn check_new_containers(      }      // Check if there is a new container. -    // To do: list.      tokenizer.attempt(block_quote, move |ok| {          if ok {              Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string()))          } else { -            Box::new(|t, c| there_is_no_new_container(t, c, info)) +            Box::new(|tokenizer, code| { +                tokenizer.attempt(list, move |ok| { +                    if ok { +                        Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string())) +                    } else { +                        Box::new(|t, c| there_is_no_new_container(t, c, info)) +                    } +                })(tokenizer, code) +            })          }      })(tokenizer, code)  } @@ -179,11 +188,12 @@ fn there_is_a_new_container(      // Remove from the event stack.      // We’ll properly add exits at different points manually. -    // To do: list.      let end = if name == "blockquote" {          block_quote_end +    } else if name == "list" { +        list_end      } else { -        unreachable!("todo: cont {:?}", name) +        unreachable!("todo: end {:?}", name)      };      let token_types = end(); @@ -249,11 +259,12 @@ fn exit_containers(      while info.stack.len() > size {          let name = info.stack.pop().unwrap(); -        // To do: list.          let end = if name == "blockquote" {              block_quote_end +        } else if name == "list" { +            list_end          } else { -            unreachable!("todo: cont {:?}", name) +            unreachable!("todo: end {:?}", name)          };          let token_types = end(); @@ -265,7 +276,7 @@ fn exit_containers(              exits.push(Event {                  event_type: EventType::Exit,                  token_type: token_type.clone(), -                // To do: fix position later. +                // Note: positions are fixed later.                  point: tokenizer.point.clone(),                  index: tokenizer.index,                  previous: None, diff --git a/src/token.rs b/src/token.rs index 9b59719..f60f9cd 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1765,4 +1765,11 @@ pub enum Token {      ///     ^ ^ ^      /// ```      ThematicBreakSequence, + +    // To do: sort. +    List, +    ListItemPrefix, +    ListItemValue, +    ListItemMarker, +    // ListItemPrefixSpaceOrTab,  } | 
