diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-28 17:54:39 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-28 17:55:44 +0200 |
commit | b33a81e40620b8b3eaeeec9d0e0b34ca5958dead (patch) | |
tree | c91e56db38777b30cdcef591d0f7cd9bd1ac0ee8 /tests/test_utils/mod.rs | |
parent | a0c84c505d733be2e987a333a34244c1befb56cb (diff) | |
download | markdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.tar.gz markdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.tar.bz2 markdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.zip |
Add support for turning mdast to hast
Diffstat (limited to '')
-rw-r--r-- | tests/test_utils/mod.rs | 248 |
1 files changed, 3 insertions, 245 deletions
diff --git a/tests/test_utils/mod.rs b/tests/test_utils/mod.rs index 10b9643..111118f 100644 --- a/tests/test_utils/mod.rs +++ b/tests/test_utils/mod.rs @@ -1,245 +1,3 @@ -extern crate micromark; -extern crate swc_common; -extern crate swc_ecma_ast; -extern crate swc_ecma_parser; -use micromark::{MdxExpressionKind, MdxSignal}; -use swc_common::{source_map::Pos, BytePos, FileName, SourceFile, Spanned}; -use swc_ecma_ast::{EsVersion, Expr, Module}; -use swc_ecma_parser::{ - error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsConfig, Syntax, -}; - -/// Parse ESM in MDX with SWC. -pub fn parse_esm(value: &str) -> MdxSignal { - let (file, syntax, version) = create_config(value.to_string()); - let mut errors = vec![]; - let result = parse_file_as_module(&file, syntax, version, None, &mut errors); - - match result { - Err(error) => swc_error_to_signal(&error, value.len(), 0, "esm"), - Ok(tree) => { - if errors.is_empty() { - check_esm_ast(tree) - } else { - if errors.len() > 1 { - println!("parse_esm: todo: multiple errors? {:?}", errors); - } - swc_error_to_signal(&errors[0], value.len(), 0, "esm") - } - } - } -} - -/// Parse expressions in MDX with SWC. -pub fn parse_expression(value: &str, kind: MdxExpressionKind) -> MdxSignal { - // Empty expressions are OK. - if matches!(kind, MdxExpressionKind::Expression) - && matches!(whitespace_and_comments(0, value), MdxSignal::Ok) - { - return MdxSignal::Ok; - } - - // For attribute expression, a spread is needed, for which we have to prefix - // and suffix the input. - // See `check_expression_ast` for how the AST is verified. - let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { - ("({", "})") - } else { - ("", "") - }; - - let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); - let mut errors = vec![]; - let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); - - match result { - Err(error) => swc_error_to_signal(&error, value.len(), prefix.len(), "expression"), - Ok(tree) => { - if errors.is_empty() { - let place = fix_swc_position(tree.span().hi.to_usize(), prefix.len()); - let result = check_expression_ast(tree, kind); - if matches!(result, MdxSignal::Ok) { - whitespace_and_comments(place, value) - } else { - result - } - } else { - if errors.len() > 1 { - unreachable!("parse_expression: todo: multiple errors? {:?}", errors); - } - swc_error_to_signal(&errors[0], value.len(), prefix.len(), "expression") - } - } - } -} - -/// Check that the resulting AST of ESM is OK. -/// -/// This checks that only module declarations (import/exports) are used, not -/// statements. -fn check_esm_ast(tree: Module) -> MdxSignal { - let mut index = 0; - while index < tree.body.len() { - let node = &tree.body[index]; - - if !node.is_module_decl() { - let place = fix_swc_position(node.span().hi.to_usize(), 0); - return MdxSignal::Error( - "Unexpected statement in code: only import/exports are supported".to_string(), - place, - ); - } - - index += 1; - } - - MdxSignal::Ok -} - -/// Check that the resulting AST of an expressions is OK. -/// -/// This checks that attribute expressions are the expected spread. -fn check_expression_ast(tree: Box<Expr>, kind: MdxExpressionKind) -> MdxSignal { - if matches!(kind, MdxExpressionKind::AttributeExpression) - && tree - .unwrap_parens() - .as_object() - .and_then(|object| { - if object.props.len() == 1 { - object.props[0].as_spread() - } else { - None - } - }) - .is_none() - { - MdxSignal::Error( - "Expected a single spread value, such as `...x`".to_string(), - 0, - ) - } else { - MdxSignal::Ok - } -} - -/// Turn an SWC error into an `MdxSignal`. -/// -/// * If the error happens at `value_len`, yields `MdxSignal::Eof` -/// * Else, yields `MdxSignal::Error`. -fn swc_error_to_signal( - error: &SwcError, - value_len: usize, - prefix_len: usize, - name: &str, -) -> MdxSignal { - let message = error.kind().msg().to_string(); - let place = fix_swc_position(error.span().hi.to_usize(), prefix_len); - let message = format!("Could not parse {} with swc: {}", name, message); - - if place >= value_len { - MdxSignal::Eof(message) - } else { - MdxSignal::Error(message, place) - } -} - -/// Move past JavaScript whitespace (well, actually ASCII whitespace) and -/// comments. -/// -/// This is needed because for expressions, we use an API that parses up to -/// a valid expression, but there may be more expressions after it, which we -/// don’t alow. -fn whitespace_and_comments(mut index: usize, value: &str) -> MdxSignal { - let bytes = value.as_bytes(); - let len = bytes.len(); - let mut in_multiline = false; - let mut in_line = false; - - while index < len { - // In a multiline comment: `/* a */`. - if in_multiline { - if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { - index += 1; - in_multiline = false; - } - } - // In a line comment: `// a`. - else if in_line { - if index + 1 < len && bytes[index] == b'\r' && bytes[index + 1] == b'\n' { - index += 1; - in_line = false; - } else if bytes[index] == b'\r' || bytes[index] == b'\n' { - in_line = false; - } - } - // Not in a comment, opening a multiline comment: `/* a */`. - else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { - index += 1; - in_multiline = true; - } - // Not in a comment, opening a line comment: `// a`. - else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { - index += 1; - in_line = true; - } - // Outside comment, whitespace. - else if bytes[index].is_ascii_whitespace() { - // Fine! - } - // Outside comment, not whitespace. - else { - return MdxSignal::Error( - "Could not parse expression with swc: Unexpected content after expression" - .to_string(), - index, - ); - } - - index += 1; - } - - if in_multiline { - MdxSignal::Error( - "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".to_string(), - index, - ) - } else if in_line { - // EOF instead of EOL is specifically not allowed, because that would - // mean the closing brace is on the commented-out line - MdxSignal::Error( - "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".to_string(), - index, - ) - } else { - MdxSignal::Ok - } -} - -/// Create configuration for SWC, shared between ESM and expressions. -/// -/// This enables modern JavaScript (ES2022) + JSX. -fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { - ( - // File. - SourceFile::new( - FileName::Anon, - false, - FileName::Anon, - source, - BytePos::from_usize(1), - ), - // Syntax. - Syntax::Es(EsConfig { - jsx: true, - ..EsConfig::default() - }), - // Version. - EsVersion::Es2022, - ) -} - -/// Turn an SWC byte position from a resulting AST to an offset in the original -/// input string. -fn fix_swc_position(index: usize, prefix_len: usize) -> usize { - index - 1 - prefix_len -} +pub mod hast; +pub mod swc; +pub mod to_hast; |