extern crate reqwest;
use regex::Regex;
use std::fs;
#[tokio::main]
async fn main() {
commonmark().await;
punctuation().await;
}
async fn commonmark() {
let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
let data_url = "commonmark-data.txt";
let code_url = "tests/commonmark.rs";
let value = if let Ok(value) = fs::read_to_string(data_url) {
value
} else {
let value = reqwest::get(url).await.unwrap().text().await.unwrap();
fs::write(data_url, value.clone()).unwrap();
value
};
let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
let mut current_heading = None;
let mut number = 1;
let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
.unwrap()
.replace(&value, "");
let value = Regex::new(r"โ").unwrap().replace_all(&value, "\t");
let mut cases = vec![];
for mat in re.find_iter(&value) {
let mut lines = mat.as_str().lines().collect::<Vec<_>>();
if lines.len() == 1 {
current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
} else {
lines.remove(0);
lines.pop();
let section = current_heading.as_ref().unwrap();
let case = lines.join("\n");
let parts = re_in_out.split(&case).collect::<Vec<_>>();
let input = format!("{}\n", parts[0]);
let output = if parts[1].is_empty() {
"".to_string()
} else {
format!("{}\n", parts[1])
};
let test = format!(" assert_eq!(\n micromark_with_options(\n r###\"{}\"###,\n &danger\n ),\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number);
cases.push(test);
number += 1;
}
}
let doc = format!(
"//! CommonMark test suite.
// > ๐ **Important**: this module is generated by `build.rs`.
// > It is generate from the latest CommonMark website.
extern crate micromark;
use micromark::{{micromark_with_options, Options}};
#[rustfmt::skip]
#[test]
fn commonmark() {{
let danger = Options {{
allow_dangerous_html: true,
allow_dangerous_protocol: true,
..Options::default()
}};
{}
}}
",
cases.join("\n\n")
);
fs::write(code_url, doc).unwrap();
}
async fn punctuation() {
let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
let data_url = "unicode-data.txt";
let code_url = "src/unicode.rs";
let value = if let Ok(value) = fs::read_to_string(data_url) {
value
} else {
let value = reqwest::get(url).await.unwrap().text().await.unwrap();
fs::write(data_url, value.clone()).unwrap();
value
};
let search = [
"Pc", // Punctuation, Connector
"Pd", // Punctuation, Dash
"Pe", // Punctuation, Close
"Pf", // Punctuation, FinalQuote
"Pi", // Punctuation, InitialQuote
"Po", // Punctuation, Other
"Ps", // Punctuation, Open
];
let found = value
.lines()
.map(|line| line.split(';').collect::<Vec<_>>())
.map(|cells| (cells[0], cells[2]))
.filter(|c| search.contains(&c.1))
.map(|c| c.0)
.collect::<Vec<_>>();
let doc = format!(
"//! Info on Unicode.
/// List of characters that are considered punctuation.
///
/// > ๐ **Important**: this module is generated by `build.rs`.
/// > It is generate from the latest Unicode data.
///
/// Rust does not contain an `is_punctuation` method on `char`, while it does
/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
///
/// `CommonMark` handles attention (emphasis, strong) markers based on what
/// comes before or after them.
/// One such difference is if those characters are Unicode punctuation.
///
/// ## References
///
/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
pub const PUNCTUATION: [char; {}] = [
{}
];
",
found.len(),
found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
);
fs::write(code_url, doc).unwrap();
}