diff options
Diffstat (limited to 'build.rs')
-rw-r--r-- | build.rs | 88 |
1 files changed, 88 insertions, 0 deletions
@@ -1,8 +1,96 @@ extern crate reqwest; +use regex::Regex; use std::fs; #[tokio::main] async fn main() { + commonmark().await; + punctuation().await; +} + +async fn commonmark() { + let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt"; + let data_url = "commonmark-data.txt"; + #[allow(unused_variables)] + let code_url = "tests/commonmark.rs"; + + let value = if let Ok(value) = fs::read_to_string(data_url) { + value + } else { + let value = reqwest::get(url).await.unwrap().text().await.unwrap(); + + fs::write(data_url, value.clone()).unwrap(); + + value + }; + + let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap(); + let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap(); + let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap(); + let mut current_heading: Option<String> = None; + let mut case_index = 0; + + let value = Regex::new(r"<!-- END TESTS -->[\s\S]*") + .unwrap() + .replace(&value, ""); + let value = Regex::new(r"→").unwrap().replace_all(&value, "\t"); + let mut cases: Vec<String> = vec![]; + + for mat in re.find_iter(&value) { + let mut lines = mat.as_str().lines().collect::<Vec<_>>(); + + if lines.len() == 1 { + current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string()); + } else { + lines.remove(0); + lines.pop(); + let section = current_heading.as_ref().unwrap(); + let case = lines.join("\n"); + let parts = re_in_out.split(&case).collect::<Vec<_>>(); + let input = format!("{}\n", parts[0]); + let output = if parts[1].is_empty() { + "".to_string() + } else { + format!("{}\n", parts[1]) + }; + + let test = format!(" assert_eq!(\n micromark_with_options(r###\"{}\"###, DANGER),\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, case_index); + + cases.push(test); + + case_index += 1; + } + } + + #[allow(unused_variables)] + let doc = format!( + "//! CommonMark test suite. + +// > 👉 **Important**: this module is generated by `build.rs`. +// > It is generate from the latest Unicode data. + +extern crate micromark; +use micromark::{{micromark_with_options, Options}}; + +const DANGER: &Options = &Options {{ + allow_dangerous_html: true, + allow_dangerous_protocol: true, + default_line_ending: None, +}}; + +#[test] +fn commonmark() {{ +{} +}} +", + cases.join("\n\n") + ); + + // To do: enable when CM is completely fixed. + // fs::write(code_url, doc).unwrap(); +} + +async fn punctuation() { let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"; let data_url = "unicode-data.txt"; let code_url = "src/unicode.rs"; |