From b33a81e40620b8b3eaeeec9d0e0b34ca5958dead Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 28 Sep 2022 17:54:39 +0200 Subject: Add support for turning mdast to hast --- tests/test_utils/hast.rs | 279 +++++++++ tests/test_utils/mod.rs | 248 +------- tests/test_utils/swc.rs | 247 ++++++++ tests/test_utils/to_hast.rs | 1457 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1986 insertions(+), 245 deletions(-) create mode 100644 tests/test_utils/hast.rs create mode 100644 tests/test_utils/swc.rs create mode 100644 tests/test_utils/to_hast.rs (limited to 'tests/test_utils') diff --git a/tests/test_utils/hast.rs b/tests/test_utils/hast.rs new file mode 100644 index 0000000..4adf0ca --- /dev/null +++ b/tests/test_utils/hast.rs @@ -0,0 +1,279 @@ +#![allow(dead_code)] + +// ^-- fix later + +extern crate alloc; +extern crate micromark; +use alloc::{ + fmt, + string::{String, ToString}, + vec::Vec, +}; +use micromark::{mdast::AttributeContent, unist::Position}; + +/// Nodes. +#[derive(Clone, PartialEq)] +pub enum Node { + /// Root. + Root(Root), + /// Element. + Element(Element), + /// Document type. + Doctype(Doctype), + /// Comment. + Comment(Comment), + /// Text. + Text(Text), + + // MDX being passed through. + /// MDX: JSX element. + MdxJsxElement(MdxJsxElement), + /// MDX.js ESM. + MdxjsEsm(MdxjsEsm), + // MDX: expression. + MdxExpression(MdxExpression), +} + +impl fmt::Debug for Node { + // Debug the wrapped struct. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Node::Root(x) => write!(f, "{:?}", x), + Node::Element(x) => write!(f, "{:?}", x), + Node::Doctype(x) => write!(f, "{:?}", x), + Node::Comment(x) => write!(f, "{:?}", x), + Node::Text(x) => write!(f, "{:?}", x), + Node::MdxJsxElement(x) => write!(f, "{:?}", x), + Node::MdxExpression(x) => write!(f, "{:?}", x), + Node::MdxjsEsm(x) => write!(f, "{:?}", x), + } + } +} + +fn children_to_string(children: &[Node]) -> String { + children.iter().map(ToString::to_string).collect() +} + +impl ToString for Node { + fn to_string(&self) -> String { + match self { + // Parents. + Node::Root(x) => children_to_string(&x.children), + Node::Element(x) => children_to_string(&x.children), + Node::MdxJsxElement(x) => children_to_string(&x.children), + // Literals. + Node::Comment(x) => x.value.clone(), + Node::Text(x) => x.value.clone(), + Node::MdxExpression(x) => x.value.clone(), + Node::MdxjsEsm(x) => x.value.clone(), + // Voids. + Node::Doctype(_) => "".to_string(), + } + } +} + +impl Node { + #[must_use] + pub fn children(&self) -> Option<&Vec> { + match self { + // Parent. + Node::Root(x) => Some(&x.children), + Node::Element(x) => Some(&x.children), + Node::MdxJsxElement(x) => Some(&x.children), + // Non-parent. + _ => None, + } + } + + pub fn children_mut(&mut self) -> Option<&mut Vec> { + match self { + // Parent. + Node::Root(x) => Some(&mut x.children), + Node::Element(x) => Some(&mut x.children), + Node::MdxJsxElement(x) => Some(&mut x.children), + // Non-parent. + _ => None, + } + } + + pub fn position(&self) -> Option<&Position> { + match self { + Node::Root(x) => x.position.as_ref(), + Node::Element(x) => x.position.as_ref(), + Node::Doctype(x) => x.position.as_ref(), + Node::Comment(x) => x.position.as_ref(), + Node::Text(x) => x.position.as_ref(), + Node::MdxJsxElement(x) => x.position.as_ref(), + Node::MdxExpression(x) => x.position.as_ref(), + Node::MdxjsEsm(x) => x.position.as_ref(), + } + } + + pub fn position_mut(&mut self) -> Option<&mut Position> { + match self { + Node::Root(x) => x.position.as_mut(), + Node::Element(x) => x.position.as_mut(), + Node::Doctype(x) => x.position.as_mut(), + Node::Comment(x) => x.position.as_mut(), + Node::Text(x) => x.position.as_mut(), + Node::MdxJsxElement(x) => x.position.as_mut(), + Node::MdxExpression(x) => x.position.as_mut(), + Node::MdxjsEsm(x) => x.position.as_mut(), + } + } + + pub fn position_set(&mut self, position: Option) { + match self { + Node::Root(x) => x.position = position, + Node::Element(x) => x.position = position, + Node::Doctype(x) => x.position = position, + Node::Comment(x) => x.position = position, + Node::Text(x) => x.position = position, + Node::MdxJsxElement(x) => x.position = position, + Node::MdxExpression(x) => x.position = position, + Node::MdxjsEsm(x) => x.position = position, + } + } +} + +/// Document. +/// +/// ```html +/// > | a +/// ^ +/// ``` +#[derive(Clone, Debug, PartialEq)] +pub struct Root { + // Parent. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Document type. +/// +/// ```html +/// > | +/// ^^^^^^^^^^^^^^^ +/// ``` +// To do: clone. +#[derive(Clone, Debug, PartialEq)] +pub struct Element { + pub tag_name: String, + pub properties: Vec<(String, PropertyValue)>, + // Parent. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum PropertyItem { + Number(f32), + String(String), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum PropertyValue { + Number(f32), + Boolean(bool), + String(String), + CommaSeparated(Vec), + SpaceSeparated(Vec), +} + +/// Document type. +/// +/// ```html +/// > | +/// ^^^^^^^^^^^^^^^ +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Doctype { + // Void. + /// Positional info. + pub position: Option, +} + +/// Comment. +/// +/// ```html +/// > | +/// ^^^^^^^^^^ +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Comment { + // Text. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// Text. +/// +/// ```html +/// > | a +/// ^ +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Text { + // Text. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: JSX element. +/// +/// ```markdown +/// > | +/// ^^^^^ +/// ``` +#[derive(Clone, Debug, PartialEq)] +pub struct MdxJsxElement { + // Parent. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // JSX element. + /// Name. + /// + /// Fragments have no name. + pub name: Option, + /// Attributes. + pub attributes: Vec, +} + +/// MDX: expression. +/// +/// ```markdown +/// > | {a} +/// ^^^ +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MdxExpression { + // Literal. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: ESM. +/// +/// ```markdown +/// > | import a from 'b' +/// ^^^^^^^^^^^^^^^^^ +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct MdxjsEsm { + // Literal. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} diff --git a/tests/test_utils/mod.rs b/tests/test_utils/mod.rs index 10b9643..111118f 100644 --- a/tests/test_utils/mod.rs +++ b/tests/test_utils/mod.rs @@ -1,245 +1,3 @@ -extern crate micromark; -extern crate swc_common; -extern crate swc_ecma_ast; -extern crate swc_ecma_parser; -use micromark::{MdxExpressionKind, MdxSignal}; -use swc_common::{source_map::Pos, BytePos, FileName, SourceFile, Spanned}; -use swc_ecma_ast::{EsVersion, Expr, Module}; -use swc_ecma_parser::{ - error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsConfig, Syntax, -}; - -/// Parse ESM in MDX with SWC. -pub fn parse_esm(value: &str) -> MdxSignal { - let (file, syntax, version) = create_config(value.to_string()); - let mut errors = vec![]; - let result = parse_file_as_module(&file, syntax, version, None, &mut errors); - - match result { - Err(error) => swc_error_to_signal(&error, value.len(), 0, "esm"), - Ok(tree) => { - if errors.is_empty() { - check_esm_ast(tree) - } else { - if errors.len() > 1 { - println!("parse_esm: todo: multiple errors? {:?}", errors); - } - swc_error_to_signal(&errors[0], value.len(), 0, "esm") - } - } - } -} - -/// Parse expressions in MDX with SWC. -pub fn parse_expression(value: &str, kind: MdxExpressionKind) -> MdxSignal { - // Empty expressions are OK. - if matches!(kind, MdxExpressionKind::Expression) - && matches!(whitespace_and_comments(0, value), MdxSignal::Ok) - { - return MdxSignal::Ok; - } - - // For attribute expression, a spread is needed, for which we have to prefix - // and suffix the input. - // See `check_expression_ast` for how the AST is verified. - let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { - ("({", "})") - } else { - ("", "") - }; - - let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); - let mut errors = vec![]; - let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); - - match result { - Err(error) => swc_error_to_signal(&error, value.len(), prefix.len(), "expression"), - Ok(tree) => { - if errors.is_empty() { - let place = fix_swc_position(tree.span().hi.to_usize(), prefix.len()); - let result = check_expression_ast(tree, kind); - if matches!(result, MdxSignal::Ok) { - whitespace_and_comments(place, value) - } else { - result - } - } else { - if errors.len() > 1 { - unreachable!("parse_expression: todo: multiple errors? {:?}", errors); - } - swc_error_to_signal(&errors[0], value.len(), prefix.len(), "expression") - } - } - } -} - -/// Check that the resulting AST of ESM is OK. -/// -/// This checks that only module declarations (import/exports) are used, not -/// statements. -fn check_esm_ast(tree: Module) -> MdxSignal { - let mut index = 0; - while index < tree.body.len() { - let node = &tree.body[index]; - - if !node.is_module_decl() { - let place = fix_swc_position(node.span().hi.to_usize(), 0); - return MdxSignal::Error( - "Unexpected statement in code: only import/exports are supported".to_string(), - place, - ); - } - - index += 1; - } - - MdxSignal::Ok -} - -/// Check that the resulting AST of an expressions is OK. -/// -/// This checks that attribute expressions are the expected spread. -fn check_expression_ast(tree: Box, kind: MdxExpressionKind) -> MdxSignal { - if matches!(kind, MdxExpressionKind::AttributeExpression) - && tree - .unwrap_parens() - .as_object() - .and_then(|object| { - if object.props.len() == 1 { - object.props[0].as_spread() - } else { - None - } - }) - .is_none() - { - MdxSignal::Error( - "Expected a single spread value, such as `...x`".to_string(), - 0, - ) - } else { - MdxSignal::Ok - } -} - -/// Turn an SWC error into an `MdxSignal`. -/// -/// * If the error happens at `value_len`, yields `MdxSignal::Eof` -/// * Else, yields `MdxSignal::Error`. -fn swc_error_to_signal( - error: &SwcError, - value_len: usize, - prefix_len: usize, - name: &str, -) -> MdxSignal { - let message = error.kind().msg().to_string(); - let place = fix_swc_position(error.span().hi.to_usize(), prefix_len); - let message = format!("Could not parse {} with swc: {}", name, message); - - if place >= value_len { - MdxSignal::Eof(message) - } else { - MdxSignal::Error(message, place) - } -} - -/// Move past JavaScript whitespace (well, actually ASCII whitespace) and -/// comments. -/// -/// This is needed because for expressions, we use an API that parses up to -/// a valid expression, but there may be more expressions after it, which we -/// don’t alow. -fn whitespace_and_comments(mut index: usize, value: &str) -> MdxSignal { - let bytes = value.as_bytes(); - let len = bytes.len(); - let mut in_multiline = false; - let mut in_line = false; - - while index < len { - // In a multiline comment: `/* a */`. - if in_multiline { - if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { - index += 1; - in_multiline = false; - } - } - // In a line comment: `// a`. - else if in_line { - if index + 1 < len && bytes[index] == b'\r' && bytes[index + 1] == b'\n' { - index += 1; - in_line = false; - } else if bytes[index] == b'\r' || bytes[index] == b'\n' { - in_line = false; - } - } - // Not in a comment, opening a multiline comment: `/* a */`. - else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { - index += 1; - in_multiline = true; - } - // Not in a comment, opening a line comment: `// a`. - else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { - index += 1; - in_line = true; - } - // Outside comment, whitespace. - else if bytes[index].is_ascii_whitespace() { - // Fine! - } - // Outside comment, not whitespace. - else { - return MdxSignal::Error( - "Could not parse expression with swc: Unexpected content after expression" - .to_string(), - index, - ); - } - - index += 1; - } - - if in_multiline { - MdxSignal::Error( - "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".to_string(), - index, - ) - } else if in_line { - // EOF instead of EOL is specifically not allowed, because that would - // mean the closing brace is on the commented-out line - MdxSignal::Error( - "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".to_string(), - index, - ) - } else { - MdxSignal::Ok - } -} - -/// Create configuration for SWC, shared between ESM and expressions. -/// -/// This enables modern JavaScript (ES2022) + JSX. -fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { - ( - // File. - SourceFile::new( - FileName::Anon, - false, - FileName::Anon, - source, - BytePos::from_usize(1), - ), - // Syntax. - Syntax::Es(EsConfig { - jsx: true, - ..EsConfig::default() - }), - // Version. - EsVersion::Es2022, - ) -} - -/// Turn an SWC byte position from a resulting AST to an offset in the original -/// input string. -fn fix_swc_position(index: usize, prefix_len: usize) -> usize { - index - 1 - prefix_len -} +pub mod hast; +pub mod swc; +pub mod to_hast; diff --git a/tests/test_utils/swc.rs b/tests/test_utils/swc.rs new file mode 100644 index 0000000..f455674 --- /dev/null +++ b/tests/test_utils/swc.rs @@ -0,0 +1,247 @@ +extern crate micromark; +extern crate swc_common; +extern crate swc_ecma_ast; +extern crate swc_ecma_parser; +use micromark::{MdxExpressionKind, MdxSignal}; +use swc_common::{source_map::Pos, BytePos, FileName, SourceFile, Spanned}; +use swc_ecma_ast::{EsVersion, Expr, Module}; +use swc_ecma_parser::{ + error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsConfig, Syntax, +}; + +/// Parse ESM in MDX with SWC. +#[allow(dead_code)] +pub fn parse_esm(value: &str) -> MdxSignal { + let (file, syntax, version) = create_config(value.to_string()); + let mut errors = vec![]; + let result = parse_file_as_module(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), 0, "esm"), + Ok(tree) => { + if errors.is_empty() { + check_esm_ast(&tree) + } else { + if errors.len() > 1 { + println!("parse_esm: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), 0, "esm") + } + } + } +} + +/// Parse expressions in MDX with SWC. +#[allow(dead_code)] +pub fn parse_expression(value: &str, kind: &MdxExpressionKind) -> MdxSignal { + // Empty expressions are OK. + if matches!(kind, MdxExpressionKind::Expression) + && matches!(whitespace_and_comments(0, value), MdxSignal::Ok) + { + return MdxSignal::Ok; + } + + // For attribute expression, a spread is needed, for which we have to prefix + // and suffix the input. + // See `check_expression_ast` for how the AST is verified. + let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { + ("({", "})") + } else { + ("", "") + }; + + let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); + let mut errors = vec![]; + let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); + + match result { + Err(error) => swc_error_to_signal(&error, value.len(), prefix.len(), "expression"), + Ok(tree) => { + if errors.is_empty() { + let place = fix_swc_position(tree.span().hi.to_usize(), prefix.len()); + let result = check_expression_ast(&tree, kind); + if matches!(result, MdxSignal::Ok) { + whitespace_and_comments(place, value) + } else { + result + } + } else { + if errors.len() > 1 { + unreachable!("parse_expression: todo: multiple errors? {:?}", errors); + } + swc_error_to_signal(&errors[0], value.len(), prefix.len(), "expression") + } + } + } +} + +/// Check that the resulting AST of ESM is OK. +/// +/// This checks that only module declarations (import/exports) are used, not +/// statements. +fn check_esm_ast(tree: &Module) -> MdxSignal { + let mut index = 0; + while index < tree.body.len() { + let node = &tree.body[index]; + + if !node.is_module_decl() { + let place = fix_swc_position(node.span().hi.to_usize(), 0); + return MdxSignal::Error( + "Unexpected statement in code: only import/exports are supported".to_string(), + place, + ); + } + + index += 1; + } + + MdxSignal::Ok +} + +/// Check that the resulting AST of an expressions is OK. +/// +/// This checks that attribute expressions are the expected spread. +fn check_expression_ast(tree: &Expr, kind: &MdxExpressionKind) -> MdxSignal { + if matches!(kind, MdxExpressionKind::AttributeExpression) + && tree + .unwrap_parens() + .as_object() + .and_then(|object| { + if object.props.len() == 1 { + object.props[0].as_spread() + } else { + None + } + }) + .is_none() + { + MdxSignal::Error( + "Expected a single spread value, such as `...x`".to_string(), + 0, + ) + } else { + MdxSignal::Ok + } +} + +/// Turn an SWC error into an `MdxSignal`. +/// +/// * If the error happens at `value_len`, yields `MdxSignal::Eof` +/// * Else, yields `MdxSignal::Error`. +fn swc_error_to_signal( + error: &SwcError, + value_len: usize, + prefix_len: usize, + name: &str, +) -> MdxSignal { + let message = error.kind().msg().to_string(); + let place = fix_swc_position(error.span().hi.to_usize(), prefix_len); + let message = format!("Could not parse {} with swc: {}", name, message); + + if place >= value_len { + MdxSignal::Eof(message) + } else { + MdxSignal::Error(message, place) + } +} + +/// Move past JavaScript whitespace (well, actually ASCII whitespace) and +/// comments. +/// +/// This is needed because for expressions, we use an API that parses up to +/// a valid expression, but there may be more expressions after it, which we +/// don’t alow. +fn whitespace_and_comments(mut index: usize, value: &str) -> MdxSignal { + let bytes = value.as_bytes(); + let len = bytes.len(); + let mut in_multiline = false; + let mut in_line = false; + + while index < len { + // In a multiline comment: `/* a */`. + if in_multiline { + if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { + index += 1; + in_multiline = false; + } + } + // In a line comment: `// a`. + else if in_line { + if index + 1 < len && bytes[index] == b'\r' && bytes[index + 1] == b'\n' { + index += 1; + in_line = false; + } else if bytes[index] == b'\r' || bytes[index] == b'\n' { + in_line = false; + } + } + // Not in a comment, opening a multiline comment: `/* a */`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { + index += 1; + in_multiline = true; + } + // Not in a comment, opening a line comment: `// a`. + else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { + index += 1; + in_line = true; + } + // Outside comment, whitespace. + else if bytes[index].is_ascii_whitespace() { + // Fine! + } + // Outside comment, not whitespace. + else { + return MdxSignal::Error( + "Could not parse expression with swc: Unexpected content after expression" + .to_string(), + index, + ); + } + + index += 1; + } + + if in_multiline { + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".to_string(), + index, + ) + } else if in_line { + // EOF instead of EOL is specifically not allowed, because that would + // mean the closing brace is on the commented-out line + MdxSignal::Error( + "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".to_string(), + index, + ) + } else { + MdxSignal::Ok + } +} + +/// Create configuration for SWC, shared between ESM and expressions. +/// +/// This enables modern JavaScript (ES2022) + JSX. +fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { + ( + // File. + SourceFile::new( + FileName::Anon, + false, + FileName::Anon, + source, + BytePos::from_usize(1), + ), + // Syntax. + Syntax::Es(EsConfig { + jsx: true, + ..EsConfig::default() + }), + // Version. + EsVersion::Es2022, + ) +} + +/// Turn an SWC byte position from a resulting AST to an offset in the original +/// input string. +fn fix_swc_position(index: usize, prefix_len: usize) -> usize { + index - 1 - prefix_len +} diff --git a/tests/test_utils/to_hast.rs b/tests/test_utils/to_hast.rs new file mode 100644 index 0000000..821931c --- /dev/null +++ b/tests/test_utils/to_hast.rs @@ -0,0 +1,1457 @@ +use crate::test_utils::hast; +use micromark::{mdast, sanitize_, unist::Position}; + +// Options? +// - dangerous: raw? No +// - clobberPrefix / footnoteLabel / footnoteLabelTagName / footnoteLabelProperties / footnoteBacklabel? Later + +#[derive(Debug)] +struct State { + definitions: Vec<(String, String, Option)>, + footnote_definitions: Vec<(String, Vec)>, + footnote_calls: Vec<(String, usize)>, +} + +#[derive(Debug)] +enum Result { + Fragment(Vec), + Node(hast::Node), + None, +} + +#[allow(dead_code)] +pub fn to_hast(mdast: &mdast::Node) -> hast::Node { + let mut definitions = vec![]; + + // Collect definitions. + // Calls take info from their definition. + // Calls can come come before definitions. + // Footnote calls can also come before footnote definitions, but those + // calls *do not* take info from their definitions, so we don’t care + // about footnotes here. + visit(mdast, |node| { + if let mdast::Node::Definition(definition) = node { + definitions.push(( + definition.identifier.clone(), + definition.url.clone(), + definition.title.clone(), + )); + } + }); + + // - footnoteById: Record + // - footnoteOrder: Vec + // - footnoteCounts: Record + + let (result, mut state) = one( + mdast, + None, + State { + definitions, + footnote_definitions: vec![], + footnote_calls: vec![], + }, + ); + + if state.footnote_calls.is_empty() { + if let Result::Node(node) = result { + return node; + } + } + + // We either have to generate a footer, or we don’t have a single node. + // So we need a root. + let mut root = hast::Root { + children: vec![], + position: None, + }; + + match result { + Result::Fragment(children) => root.children = children, + Result::Node(node) => { + if let hast::Node::Root(existing) = node { + root = existing; + } else { + root.children.push(node); + } + } + Result::None => {} + } + + if !state.footnote_calls.is_empty() { + let mut items = vec![]; + + let mut index = 0; + while index < state.footnote_calls.len() { + let (id, count) = &state.footnote_calls[index]; + let safe_id = sanitize_(&id.to_lowercase()); + + // Find definition: we’ll always find it. + let mut definition_index = 0; + while definition_index < state.footnote_definitions.len() { + if &state.footnote_definitions[definition_index].0 == id { + break; + } + definition_index += 1; + } + debug_assert_ne!( + definition_index, + state.footnote_definitions.len(), + "expected definition" + ); + + // We’ll find each used definition once, so we can split off to take the content. + let mut content = state.footnote_definitions[definition_index].1.split_off(0); + + let mut reference_index = 0; + let mut backreferences = vec![]; + while reference_index < *count { + let mut backref_children = vec![hast::Node::Text(hast::Text { + value: "↩".into(), + position: None, + })]; + + if reference_index != 0 { + backreferences.push(hast::Node::Text(hast::Text { + value: " ".into(), + position: None, + })); + + backref_children.push(hast::Node::Element(hast::Element { + tag_name: "sup".into(), + properties: vec![], + children: vec![hast::Node::Text(hast::Text { + value: (reference_index + 1).to_string(), + position: None, + })], + position: None, + })); + } + + backreferences.push(hast::Node::Element(hast::Element { + tag_name: "a".into(), + properties: vec![ + ( + "href".into(), + hast::PropertyValue::String(format!( + "#fnref-{}{}", + safe_id, + if reference_index == 0 { + "".into() + } else { + format!("-{}", &(reference_index + 1).to_string()) + } + )), + ), + ( + "dataFootnoteBackref".into(), + hast::PropertyValue::Boolean(true), + ), + ( + "ariaLabel".into(), + hast::PropertyValue::String("Back to content".into()), + ), + ( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![hast::PropertyItem::String( + "data-footnote-backref".into(), + )]), + ), + ], + children: backref_children, + position: None, + })); + + reference_index += 1; + } + + let mut backreference_opt = Some(backreferences); + + if let Some(hast::Node::Element(tail_element)) = content.last_mut() { + if tail_element.tag_name == "p" { + if let Some(hast::Node::Text(text)) = tail_element.children.last_mut() { + text.value.push(' '); + } else { + tail_element.children.push(hast::Node::Text(hast::Text { + value: " ".into(), + position: None, + })); + } + + tail_element + .children + .append(&mut backreference_opt.take().unwrap()); + } + } + + // No paragraph, just push them. + if let Some(mut backreference) = backreference_opt { + content.append(&mut backreference); + } + + items.push(hast::Node::Element(hast::Element { + tag_name: "li".into(), + // To do: support clobber prefix. + properties: vec![( + "id".into(), + hast::PropertyValue::String(format!("#fn-{}", safe_id)), + )], + children: wrap(content, true), + position: None, + })); + index += 1; + } + + root.children.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + root.children.push(hast::Node::Element(hast::Element { + tag_name: "section".into(), + properties: vec![ + ("dataFootnotes".into(), hast::PropertyValue::Boolean(true)), + ( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![hast::PropertyItem::String( + "footnotes".into(), + )]), + ), + ], + children: vec![ + hast::Node::Element(hast::Element { + tag_name: "h2".into(), + properties: vec![ + ( + "id".into(), + hast::PropertyValue::String("footnote-label".into()), + ), + ( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![hast::PropertyItem::String( + "sr-only".into(), + )]), + ), + ], + children: vec![hast::Node::Text(hast::Text { + value: "Footnotes".into(), + position: None, + })], + position: None, + }), + hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + }), + hast::Node::Element(hast::Element { + tag_name: "ol".into(), + properties: vec![], + children: wrap(items, true), + position: None, + }), + hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + }), + ], + position: None, + })); + root.children.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + + hast::Node::Root(root) +} + +fn one(node: &mdast::Node, parent: Option<&mdast::Node>, state: State) -> (Result, State) { + match node { + mdast::Node::BlockQuote(_) => transform_block_quote(node, state), + mdast::Node::Break(_) => transform_break(node, state), + mdast::Node::Code(_) => transform_code(node, state), + mdast::Node::Delete(_) => transform_delete(node, state), + mdast::Node::Emphasis(_) => transform_emphasis(node, state), + mdast::Node::FootnoteDefinition(_) => transform_footnote_definition(node, state), + mdast::Node::FootnoteReference(_) => transform_footnote_reference(node, state), + mdast::Node::Heading(_) => transform_heading(node, state), + mdast::Node::Image(_) => transform_image(node, state), + mdast::Node::ImageReference(_) => transform_image_reference(node, state), + mdast::Node::InlineCode(_) => transform_inline_code(node, state), + mdast::Node::InlineMath(_) => transform_inline_math(node, state), + mdast::Node::Link(_) => transform_link(node, state), + mdast::Node::LinkReference(_) => transform_link_reference(node, state), + mdast::Node::ListItem(_) => transform_list_item(node, parent, state), + mdast::Node::List(_) => transform_list(node, state), + mdast::Node::Math(_) => transform_math(node, state), + mdast::Node::MdxFlowExpression(_) | mdast::Node::MdxTextExpression(_) => { + transform_mdx_expression(node, state) + } + mdast::Node::MdxJsxFlowElement(_) | mdast::Node::MdxJsxTextElement(_) => { + transform_mdx_jsx_element(node, state) + } + mdast::Node::MdxjsEsm(_) => transform_mdxjs_esm(node, state), + mdast::Node::Paragraph(_) => transform_paragraph(node, state), + mdast::Node::Root(_) => transform_root(node, state), + mdast::Node::Strong(_) => transform_strong(node, state), + // Note: this is only called here if there is a single cell passed, not when one is found in a table. + mdast::Node::TableCell(_) => { + transform_table_cell(node, false, mdast::AlignKind::None, state) + } + // Note: this is only called here if there is a single row passed, not when one is found in a table. + mdast::Node::TableRow(_) => transform_table_row(node, false, None, state), + mdast::Node::Table(_) => transform_table(node, state), + mdast::Node::Text(_) => transform_text(node, state), + mdast::Node::ThematicBreak(_) => transform_thematic_break(node, state), + // Ignore. + // Idea: support `Raw` nodes for HTML, optionally? + mdast::Node::Definition(_) + | mdast::Node::Html(_) + | mdast::Node::Yaml(_) + | mdast::Node::Toml(_) => (Result::None, state), + } +} + +/// [`BlockQuote`][mdast::BlockQuote]. +fn transform_block_quote(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "blockquote".into(), + properties: vec![], + children: wrap(children, true), + position: None, + }), + )), + state, + ) +} + +/// [`Break`][mdast::Break]. +fn transform_break(node: &mdast::Node, state: State) -> (Result, State) { + ( + Result::Fragment(vec![ + augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "br".into(), + properties: vec![], + children: vec![], + position: None, + }), + ), + hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + }), + ]), + state, + ) +} + +/// [`Code`][mdast::Code]. +fn transform_code(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::Code(code) = node { + let mut value = code.value.clone(); + value.push('\n'); + let mut properties = vec![]; + + if let Some(lang) = code.lang.as_ref() { + let mut value = "language-".to_string(); + value.push_str(lang); + let value = hast::PropertyItem::String(value); + properties.push(( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![value]), + )); + } + + // To do: option to persist `meta`? + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "pre".into(), + properties: vec![], + children: vec![augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "code".into(), + properties, + children: vec![hast::Node::Text(hast::Text { + value, + position: None, + })], + position: None, + }), + )], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Code`") + } +} + +/// [`Delete`][mdast::Delete]. +fn transform_delete(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "del".into(), + properties: vec![], + children, + position: None, + }), + )), + state, + ) +} + +/// [`Emphasis`][mdast::Emphasis]. +fn transform_emphasis(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "em".into(), + properties: vec![], + children, + position: None, + }), + )), + state, + ) +} + +/// [`FootnoteDefinition`][mdast::FootnoteDefinition]. +fn transform_footnote_definition(node: &mdast::Node, mut state: State) -> (Result, State) { + if let mdast::Node::FootnoteDefinition(definition) = node { + let result = all(node, state); + let children = result.0; + state = result.1; + // Set aside. + state + .footnote_definitions + .push((definition.identifier.clone(), children)); + (Result::None, state) + } else { + unreachable!("expected `FootnoteDefinition`") + } +} + +/// [`FootnoteReference`][mdast::FootnoteReference]. +fn transform_footnote_reference(node: &mdast::Node, mut state: State) -> (Result, State) { + if let mdast::Node::FootnoteReference(reference) = node { + let safe_id = sanitize_(&reference.identifier.to_lowercase()); + let mut call_index = 0; + + // See if this has been called before. + while call_index < state.footnote_calls.len() { + if state.footnote_calls[call_index].0 == reference.identifier { + break; + } + call_index += 1; + } + + // New. + if call_index == state.footnote_calls.len() { + state.footnote_calls.push((reference.identifier.clone(), 0)); + } + + // Increment. + state.footnote_calls[call_index].1 += 1; + + let reuse_counter = state.footnote_calls[call_index].1; + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "sup".into(), + properties: vec![], + children: vec![hast::Node::Element(hast::Element { + tag_name: "a".into(), + // To do: support clobber prefix. + properties: vec![ + ( + "href".into(), + hast::PropertyValue::String(format!("#fn-{}", safe_id)), + ), + ( + "id".into(), + hast::PropertyValue::String(format!( + "fnref-{}{}", + safe_id, + if reuse_counter > 1 { + format!("-{}", reuse_counter) + } else { + "".into() + } + )), + ), + ("dataFootnoteRef".into(), hast::PropertyValue::Boolean(true)), + ( + "ariaDescribedBy".into(), + hast::PropertyValue::String("footnote-label".into()), + ), + ], + children: vec![hast::Node::Text(hast::Text { + value: (call_index + 1).to_string(), + position: None, + })], + position: None, + })], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `FootnoteReference`") + } +} + +/// [`Heading`][mdast::Heading]. +fn transform_heading(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::Heading(heading) = node { + let (children, state) = all(node, state); + let tag_name = format!("h{}", heading.depth); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name, + properties: vec![], + children, + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Heading`") + } +} + +/// [`Image`][mdast::Image]. +fn transform_image(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::Image(image) = node { + let mut properties = vec![]; + + properties.push(( + "src".into(), + hast::PropertyValue::String(sanitize_(&image.url)), + )); + + properties.push(("alt".into(), hast::PropertyValue::String(image.alt.clone()))); + + if let Some(value) = image.title.as_ref() { + properties.push(("title".into(), hast::PropertyValue::String(value.into()))); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "img".into(), + properties, + children: vec![], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Image`") + } +} + +/// [`ImageReference`][mdast::ImageReference]. +fn transform_image_reference(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::ImageReference(reference) = node { + let mut properties = vec![]; + + let definition = state + .definitions + .iter() + .find(|d| d.0 == reference.identifier); + + // To do: revert when undefined? + let (_, url, title) = + definition.expect("expected reference to have a corresponding definition"); + + properties.push(("src".into(), hast::PropertyValue::String(sanitize_(url)))); + + properties.push(( + "alt".into(), + hast::PropertyValue::String(reference.alt.clone()), + )); + + if let Some(value) = title { + properties.push(("title".into(), hast::PropertyValue::String(value.into()))); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "img".into(), + properties, + children: vec![], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `ImageReference`") + } +} + +/// [`InlineCode`][mdast::InlineCode]. +fn transform_inline_code(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::InlineCode(code) = node { + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "code".into(), + properties: vec![], + children: vec![hast::Node::Text(hast::Text { + value: replace_eols_with_spaces(&code.value), + position: None, + })], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `InlineCode`") + } +} + +/// [`InlineMath`][mdast::InlineMath]. +fn transform_inline_math(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::InlineMath(math) = node { + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "code".into(), + properties: vec![( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![ + hast::PropertyItem::String("language-math".into()), + hast::PropertyItem::String("math-inline".into()), + ]), + )], + children: vec![hast::Node::Text(hast::Text { + value: replace_eols_with_spaces(&math.value), + position: None, + })], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `InlineMath`") + } +} + +/// [`Link`][mdast::Link]. +fn transform_link(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::Link(link) = node { + let mut properties = vec![]; + + properties.push(( + "href".into(), + hast::PropertyValue::String(sanitize_(&link.url)), + )); + + if let Some(value) = link.title.as_ref() { + properties.push(("title".into(), hast::PropertyValue::String(value.into()))); + } + + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "a".into(), + properties, + children, + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Link`") + } +} + +/// [`LinkReference`][mdast::LinkReference]. +fn transform_link_reference(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::LinkReference(reference) = node { + let mut properties = vec![]; + + let definition = state + .definitions + .iter() + .find(|d| d.0 == reference.identifier); + + // To do: revert when undefined? + let (_, url, title) = + definition.expect("expected reference to have a corresponding definition"); + + properties.push(("href".into(), hast::PropertyValue::String(sanitize_(url)))); + + if let Some(value) = title { + properties.push(("title".into(), hast::PropertyValue::String(value.into()))); + } + + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "a".into(), + properties, + children, + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `LinkReference`") + } +} + +/// [`ListItem`][mdast::ListItem]. +fn transform_list_item( + node: &mdast::Node, + parent: Option<&mdast::Node>, + state: State, +) -> (Result, State) { + if let mdast::Node::ListItem(item) = node { + let (mut children, state) = all(node, state); + let mut loose = list_item_loose(node); + + if let Some(parent) = parent { + if matches!(parent, mdast::Node::List(_)) { + loose = list_loose(parent); + } + }; + + let mut properties = vec![]; + + // Inject a checkbox. + if let Some(checked) = item.checked { + // According to github-markdown-css, this class hides bullet. + // See: . + properties.push(( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![hast::PropertyItem::String( + "task-list-item".into(), + )]), + )); + + let mut input = Some(hast::Node::Element(hast::Element { + tag_name: "input".into(), + properties: vec![ + ( + "type".into(), + hast::PropertyValue::String("checkbox".into()), + ), + ("checked".into(), hast::PropertyValue::Boolean(checked)), + ("disabled".into(), hast::PropertyValue::Boolean(true)), + ], + children: vec![], + position: None, + })); + + if let Some(hast::Node::Element(x)) = children.first_mut() { + if x.tag_name == "p" { + if !x.children.is_empty() { + x.children.insert( + 0, + hast::Node::Text(hast::Text { + value: " ".into(), + position: None, + }), + ); + } + + x.children.insert(0, input.take().unwrap()); + } + } + + // If the input wasn‘t injected yet, inject a paragraph. + if let Some(input) = input { + children.insert( + 0, + hast::Node::Element(hast::Element { + tag_name: "p".into(), + properties: vec![], + children: vec![input], + position: None, + }), + ); + } + } + + children.reverse(); + let mut result = vec![]; + let mut head = true; + let empty = children.is_empty(); + let mut tail_p = false; + + while let Some(child) = children.pop() { + let mut is_p = false; + if let hast::Node::Element(el) = &child { + if el.tag_name == "p" { + is_p = true; + } + } + + // Add eols before nodes, except if this is a tight, first paragraph. + if loose || !head || !is_p { + result.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + + if is_p && !loose { + // Unwrap the paragraph. + if let hast::Node::Element(mut el) = child { + result.append(&mut el.children); + } + } else { + result.push(child); + } + + head = false; + tail_p = is_p; + } + + // Add eol after last node, except if it is tight or a paragraph. + if !empty && (loose || !tail_p) { + result.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "li".into(), + properties, + children: result, + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `ListItem`") + } +} + +/// [`List`][mdast::List]. +fn transform_list(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::List(list) = node { + let mut contains_task_list = false; + let mut index = 0; + + while index < list.children.len() { + if let mdast::Node::ListItem(item) = &list.children[index] { + if item.checked.is_some() { + contains_task_list = true; + } + } + + index += 1; + } + + let (children, state) = all(node, state); + let mut properties = vec![]; + let tag_name = if list.ordered { + "ol".into() + } else { + "ul".into() + }; + + // Add start. + if let Some(start) = list.start { + if list.ordered && start != 1 { + properties.push(("start".into(), hast::PropertyValue::Number(start.into()))); + } + } + + // Like GitHub, add a class for custom styling. + if contains_task_list { + properties.push(( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![hast::PropertyItem::String( + "contains-task-list".into(), + )]), + )); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name, + properties, + children: wrap(children, true), + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `List`") + } +} + +/// [`Math`][mdast::Math]. +fn transform_math(node: &mdast::Node, state: State) -> (Result, State) { + if let mdast::Node::Math(math) = node { + let mut value = math.value.clone(); + value.push('\n'); + + // To do: option to persist `meta`? + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "pre".into(), + properties: vec![], + children: vec![augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "code".into(), + properties: vec![( + "className".into(), + hast::PropertyValue::SpaceSeparated(vec![ + hast::PropertyItem::String("language-math".into()), + hast::PropertyItem::String("math-display".into()), + ]), + )], + children: vec![hast::Node::Text(hast::Text { + value, + position: None, + })], + position: None, + }), + )], + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Math`") + } +} + +/// [`MdxFlowExpression`][mdast::MdxFlowExpression],[`MdxTextExpression`][mdast::MdxTextExpression]. +fn transform_mdx_expression(node: &mdast::Node, state: State) -> (Result, State) { + ( + Result::Node(augment_node( + node, + hast::Node::MdxExpression(hast::MdxExpression { + value: node.to_string(), + position: None, + }), + )), + state, + ) +} + +/// [`MdxJsxFlowElement`][mdast::MdxJsxFlowElement],[`MdxJsxTextElement`][mdast::MdxJsxTextElement]. +fn transform_mdx_jsx_element(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + + let (name, attributes) = match node { + mdast::Node::MdxJsxFlowElement(n) => (&n.name, &n.attributes), + mdast::Node::MdxJsxTextElement(n) => (&n.name, &n.attributes), + _ => unreachable!("expected mdx jsx element"), + }; + + ( + Result::Node(augment_node( + node, + hast::Node::MdxJsxElement(hast::MdxJsxElement { + name: name.clone(), + attributes: attributes.clone(), + children, + position: None, + }), + )), + state, + ) +} + +/// [`MdxjsEsm`][mdast::MdxjsEsm]. +fn transform_mdxjs_esm(node: &mdast::Node, state: State) -> (Result, State) { + ( + Result::Node(augment_node( + node, + hast::Node::MdxjsEsm(hast::MdxjsEsm { + value: node.to_string(), + position: None, + }), + )), + state, + ) +} + +/// [`Paragraph`][mdast::Paragraph]. +fn transform_paragraph(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "p".into(), + properties: vec![], + children, + position: None, + }), + )), + state, + ) +} + +/// [`Root`][mdast::Root]. +fn transform_root(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Root(hast::Root { + children: wrap(children, false), + position: None, + }), + )), + state, + ) +} + +/// [`Strong`][mdast::Strong]. +fn transform_strong(node: &mdast::Node, state: State) -> (Result, State) { + let (children, state) = all(node, state); + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "strong".into(), + properties: vec![], + children, + position: None, + }), + )), + state, + ) +} + +/// [`TableCell`][mdast::TableCell]. +fn transform_table_cell( + node: &mdast::Node, + head: bool, + align: mdast::AlignKind, + state: State, +) -> (Result, State) { + let (children, state) = all(node, state); + // To do: option to generate a `style` instead? + let align_value = match align { + mdast::AlignKind::None => None, + mdast::AlignKind::Left => Some("left"), + mdast::AlignKind::Right => Some("right"), + mdast::AlignKind::Center => Some("center"), + }; + + let mut properties = vec![]; + + if let Some(value) = align_value { + properties.push(("align".into(), hast::PropertyValue::String(value.into()))); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: if head { "th".into() } else { "td".into() }, + properties, + children, + position: None, + }), + )), + state, + ) +} + +/// [`TableRow`][mdast::TableRow]. +fn transform_table_row( + node: &mdast::Node, + head: bool, + align: Option<&[mdast::AlignKind]>, + mut state: State, +) -> (Result, State) { + if let mdast::Node::TableRow(row) = node { + let mut children = vec![]; + let mut index = 0; + #[allow(clippy::redundant_closure_for_method_calls)] + let len = align.map_or(row.children.len(), |d| d.len()); + let empty_cell = mdast::Node::TableCell(mdast::TableCell { + children: vec![], + position: None, + }); + + while index < len { + let align_value = align + .and_then(|d| d.get(index)) + .unwrap_or(&mdast::AlignKind::None); + + let child = row.children.get(index).unwrap_or(&empty_cell); + let tuple = transform_table_cell(child, head, *align_value, state); + append_result(&mut children, tuple.0); + state = tuple.1; + index += 1; + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "tr".into(), + properties: vec![], + children: wrap(children, true), + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `TableRow`") + } +} + +/// [`Table`][mdast::Table]. +fn transform_table(node: &mdast::Node, mut state: State) -> (Result, State) { + if let mdast::Node::Table(table) = node { + let mut rows = vec![]; + // let body = hast::Element { + // tag_name: "tbody".into(), + // properties: vec![], + // children: vec![], + // position: None, + // }; + let mut index = 0; + + while index < table.children.len() { + let tuple = transform_table_row( + &table.children[index], + index == 0, + Some(&table.align), + state, + ); + append_result(&mut rows, tuple.0); + state = tuple.1; + index += 1; + } + + let body_rows = rows.split_off(1); + let head_row = rows.pop(); + let mut children = vec![]; + + if let Some(row) = head_row { + let position = row.position().cloned(); + children.push(hast::Node::Element(hast::Element { + tag_name: "thead".into(), + properties: vec![], + children: wrap(vec![row], true), + position, + })); + } + + if !body_rows.is_empty() { + let mut position = None; + + if let Some(position_start) = body_rows.first().and_then(hast::Node::position) { + if let Some(position_end) = body_rows.last().and_then(hast::Node::position) { + position = Some(Position { + start: position_start.start.clone(), + end: position_end.end.clone(), + }); + } + } + + children.push(hast::Node::Element(hast::Element { + tag_name: "tbody".into(), + properties: vec![], + children: wrap(body_rows, true), + position, + })); + } + + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "table".into(), + properties: vec![], + children: wrap(children, true), + position: None, + }), + )), + state, + ) + } else { + unreachable!("expected `Table`") + } +} + +/// [`Text`][mdast::Text]. +fn transform_text(node: &mdast::Node, state: State) -> (Result, State) { + ( + Result::Node(augment_node( + node, + hast::Node::Text(hast::Text { + value: node.to_string(), + position: None, + }), + )), + state, + ) +} + +/// [`ThematicBreak`][mdast::ThematicBreak]. +fn transform_thematic_break(node: &mdast::Node, state: State) -> (Result, State) { + ( + Result::Node(augment_node( + node, + hast::Node::Element(hast::Element { + tag_name: "hr".into(), + properties: vec![], + children: vec![], + position: None, + }), + )), + state, + ) +} + +// Transform children of `parent`. +fn all(parent: &mdast::Node, mut state: State) -> (Vec, State) { + let mut result = vec![]; + if let Some(children) = parent.children() { + let mut index = 0; + while index < children.len() { + let child = &children[index]; + let tuple = one(child, Some(parent), state); + append_result(&mut result, tuple.0); + state = tuple.1; + index += 1; + } + } + + (result, state) +} + +/// Wrap `nodes` with line feeds between each entry. +/// Optionally adds line feeds at the start and end. +fn wrap(mut nodes: Vec, loose: bool) -> Vec { + let mut result = vec![]; + let was_empty = nodes.is_empty(); + let mut head = true; + + nodes.reverse(); + + if loose { + result.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + + while let Some(item) = nodes.pop() { + // Inject when there’s more: + if !head { + result.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + head = false; + result.push(item); + } + + if loose && !was_empty { + result.push(hast::Node::Text(hast::Text { + value: "\n".into(), + position: None, + })); + } + + result +} + +/// Patch a position from the node `left` onto `right`. +fn augment_node(left: &mdast::Node, right: hast::Node) -> hast::Node { + if let Some(position) = left.position() { + augment_position(position, right) + } else { + right + } +} + +/// Patch a position from `left` onto `right`. +fn augment_position(left: &Position, mut right: hast::Node) -> hast::Node { + right.position_set(Some(left.clone())); + right +} + +/// Visit. +fn visit(node: &mdast::Node, visitor: Visitor) +where + Visitor: FnMut(&mdast::Node), +{ + visit_impl(node, visitor); +} + +/// Visit, mutably. +// Probably useful later: +#[allow(dead_code)] +fn visit_mut(node: &mut mdast::Node, visitor: Visitor) +where + Visitor: FnMut(&mut mdast::Node), +{ + visit_mut_impl(node, visitor); +} + +/// Internal implementation to visit. +fn visit_impl(node: &mdast::Node, mut visitor: Visitor) -> Visitor +where + Visitor: FnMut(&mdast::Node), +{ + visitor(node); + + if let Some(children) = node.children() { + let mut index = 0; + while index < children.len() { + let child = &children[index]; + visitor = visit_impl(child, visitor); + index += 1; + } + } + + visitor +} + +/// Internal implementation to visit, mutably. +fn visit_mut_impl(node: &mut mdast::Node, mut visitor: Visitor) -> Visitor +where + Visitor: FnMut(&mut mdast::Node), +{ + visitor(node); + + if let Some(children) = node.children_mut() { + let mut index = 0; + while let Some(child) = children.get_mut(index) { + visitor = visit_mut_impl(child, visitor); + index += 1; + } + } + + visitor +} + +// To do: trim arounds breaks: . +/// Append an (optional, variadic) result. +fn append_result(list: &mut Vec, result: Result) { + match result { + Result::Fragment(mut fragment) => list.append(&mut fragment), + Result::Node(node) => list.push(node), + Result::None => {} + }; +} + +/// Replace line endings (CR, LF, CRLF) with spaces. +/// +/// Used for inline code and inline math. +fn replace_eols_with_spaces(value: &str) -> String { + // It’ll grow a bit small for each CR+LF. + let mut result = String::with_capacity(value.len()); + let bytes = value.as_bytes(); + let mut index = 0; + let mut start = 0; + + while index < bytes.len() { + let byte = bytes[index]; + + if byte == b'\r' || byte == b'\n' { + result.push_str(&value[start..index]); + result.push(' '); + + if index + 1 < bytes.len() && byte == b'\r' && bytes[index + 1] == b'\n' { + index += 1; + } + + start = index + 1; + } + + index += 1; + } + + result.push_str(&value[start..]); + + result +} + +/// Check if a list is loose. +fn list_loose(node: &mdast::Node) -> bool { + if let mdast::Node::List(list) = node { + if list.spread { + return true; + } + + if let Some(children) = node.children() { + let mut index = 0; + while index < children.len() { + if list_item_loose(&children[index]) { + return true; + } + index += 1; + } + } + } + + false +} + +/// Check if a list item is loose. +fn list_item_loose(node: &mdast::Node) -> bool { + if let mdast::Node::ListItem(item) = node { + item.spread + } else { + false + } +} -- cgit