aboutsummaryrefslogtreecommitdiffstats
path: root/generate/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'generate/src/main.rs')
-rw-r--r--generate/src/main.rs161
1 files changed, 161 insertions, 0 deletions
diff --git a/generate/src/main.rs b/generate/src/main.rs
new file mode 100644
index 0000000..9f0d14b
--- /dev/null
+++ b/generate/src/main.rs
@@ -0,0 +1,161 @@
+// To regenerate, run the following from the repository root:
+//
+// ```sh
+// cargo run --manifest-path generate/Cargo.toml
+// ```
+
+use regex::Regex;
+use std::fs;
+
+#[tokio::main]
+async fn main() {
+ commonmark().await;
+ punctuation().await;
+}
+
+async fn commonmark() {
+ let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
+ let data_url = "commonmark-data.txt";
+ let code_url = "tests/commonmark.rs";
+
+ let value = if let Ok(value) = fs::read_to_string(data_url) {
+ value
+ } else {
+ let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+ fs::write(data_url, value.clone()).unwrap();
+
+ value
+ };
+
+ let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
+ let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
+ let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
+ let mut current_heading = None;
+ let mut number = 1;
+
+ let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
+ .unwrap()
+ .replace(&value, "");
+ let value = Regex::new(r"โ†’").unwrap().replace_all(&value, "\t");
+ let mut cases = vec![];
+
+ for mat in re.find_iter(&value) {
+ let mut lines = mat.as_str().lines().collect::<Vec<_>>();
+
+ if lines.len() == 1 {
+ current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
+ } else {
+ lines.remove(0);
+ lines.pop();
+ let section = current_heading.as_ref().unwrap();
+ let case = lines.join("\n");
+ let parts = re_in_out.split(&case).collect::<Vec<_>>();
+ let input = format!("{}\n", parts[0]);
+ let output = if parts[1].is_empty() {
+ "".into()
+ } else {
+ format!("{}\n", parts[1])
+ };
+
+ let test = format!(" assert_eq!(\n to_html_with_options(\n r###\"{}\"###,\n &danger\n )?,\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number);
+
+ cases.push(test);
+
+ number += 1;
+ }
+ }
+
+ let doc = format!(
+ "//! `CommonMark` test suite.
+
+// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`.
+// > It is generate from the latest CommonMark website.
+
+use markdown::{{to_html_with_options, CompileOptions, Options}};
+use pretty_assertions::assert_eq;
+
+#[rustfmt::skip]
+#[test]
+fn commonmark() -> Result<(), String> {{
+ let danger = Options {{
+ compile: CompileOptions {{
+ allow_dangerous_html: true,
+ allow_dangerous_protocol: true,
+ ..CompileOptions::default()
+ }},
+ ..Options::default()
+ }};
+
+{}
+
+ Ok(())
+}}
+",
+ cases.join("\n\n")
+ );
+
+ fs::write(code_url, doc).unwrap();
+}
+
+async fn punctuation() {
+ let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
+ let data_url = "unicode-data.txt";
+ let code_url = "src/util/unicode.rs";
+
+ let value = if let Ok(value) = fs::read_to_string(data_url) {
+ value
+ } else {
+ let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+ fs::write(data_url, value.clone()).unwrap();
+
+ value
+ };
+
+ let search = [
+ "Pc", // Punctuation, Connector
+ "Pd", // Punctuation, Dash
+ "Pe", // Punctuation, Close
+ "Pf", // Punctuation, FinalQuote
+ "Pi", // Punctuation, InitialQuote
+ "Po", // Punctuation, Other
+ "Ps", // Punctuation, Open
+ ];
+
+ let found = value
+ .lines()
+ .map(|line| line.split(';').collect::<Vec<_>>())
+ .map(|cells| (cells[0], cells[2]))
+ .filter(|c| search.contains(&c.1))
+ .map(|c| c.0)
+ .collect::<Vec<_>>();
+
+ let doc = format!(
+ "//! Info on Unicode.
+
+/// List of characters that are considered punctuation.
+///
+/// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`.
+/// > It is generate from the latest Unicode data.
+///
+/// Rust does not contain an `is_punctuation` method on `char`, while it does
+/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
+///
+/// `CommonMark` handles attention (emphasis, strong) markers based on what
+/// comes before or after them.
+/// One such difference is if those characters are Unicode punctuation.
+///
+/// ## References
+///
+/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
+pub const PUNCTUATION: [char; {}] = [
+{}
+];
+",
+ found.len(),
+ found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
+ );
+
+ fs::write(code_url, doc).unwrap();
+}