From ff5f81498ba1807ab06ffb5dadb1c99c102e0284 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Tue, 13 Dec 2022 12:57:14 +0400
Subject: Replace build script with private crate

Closes GH-34.
Closes GH-35.
---
 generate/src/main.rs | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 generate/src/main.rs

(limited to 'generate/src/main.rs')

diff --git a/generate/src/main.rs b/generate/src/main.rs
new file mode 100644
index 0000000..9f0d14b
--- /dev/null
+++ b/generate/src/main.rs
@@ -0,0 +1,161 @@
+// To regenerate, run the following from the repository root:
+//
+// ```sh
+// cargo run --manifest-path generate/Cargo.toml
+// ```
+
+use regex::Regex;
+use std::fs;
+
+#[tokio::main]
+async fn main() {
+    commonmark().await;
+    punctuation().await;
+}
+
+async fn commonmark() {
+    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
+    let data_url = "commonmark-data.txt";
+    let code_url = "tests/commonmark.rs";
+
+    let value = if let Ok(value) = fs::read_to_string(data_url) {
+        value
+    } else {
+        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+        fs::write(data_url, value.clone()).unwrap();
+
+        value
+    };
+
+    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
+    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
+    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
+    let mut current_heading = None;
+    let mut number = 1;
+
+    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
+        .unwrap()
+        .replace(&value, "");
+    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
+    let mut cases = vec![];
+
+    for mat in re.find_iter(&value) {
+        let mut lines = mat.as_str().lines().collect::<Vec<_>>();
+
+        if lines.len() == 1 {
+            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
+        } else {
+            lines.remove(0);
+            lines.pop();
+            let section = current_heading.as_ref().unwrap();
+            let case = lines.join("\n");
+            let parts = re_in_out.split(&case).collect::<Vec<_>>();
+            let input = format!("{}\n", parts[0]);
+            let output = if parts[1].is_empty() {
+                "".into()
+            } else {
+                format!("{}\n", parts[1])
+            };
+
+            let test = format!("    assert_eq!(\n        to_html_with_options(\n            r###\"{}\"###,\n            &danger\n        )?,\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, number);
+
+            cases.push(test);
+
+            number += 1;
+        }
+    }
+
+    let doc = format!(
+        "//! `CommonMark` test suite.
+
+// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
+// > It is generate from the latest CommonMark website.
+
+use markdown::{{to_html_with_options, CompileOptions, Options}};
+use pretty_assertions::assert_eq;
+
+#[rustfmt::skip]
+#[test]
+fn commonmark() -> Result<(), String> {{
+    let danger = Options {{
+        compile: CompileOptions {{
+            allow_dangerous_html: true,
+            allow_dangerous_protocol: true,
+            ..CompileOptions::default()
+        }},
+        ..Options::default()
+    }};
+
+{}
+
+    Ok(())
+}}
+",
+        cases.join("\n\n")
+    );
+
+    fs::write(code_url, doc).unwrap();
+}
+
+async fn punctuation() {
+    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
+    let data_url = "unicode-data.txt";
+    let code_url = "src/util/unicode.rs";
+
+    let value = if let Ok(value) = fs::read_to_string(data_url) {
+        value
+    } else {
+        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+        fs::write(data_url, value.clone()).unwrap();
+
+        value
+    };
+
+    let search = [
+        "Pc", // Punctuation, Connector
+        "Pd", // Punctuation, Dash
+        "Pe", // Punctuation, Close
+        "Pf", // Punctuation, FinalQuote
+        "Pi", // Punctuation, InitialQuote
+        "Po", // Punctuation, Other
+        "Ps", // Punctuation, Open
+    ];
+
+    let found = value
+        .lines()
+        .map(|line| line.split(';').collect::<Vec<_>>())
+        .map(|cells| (cells[0], cells[2]))
+        .filter(|c| search.contains(&c.1))
+        .map(|c| c.0)
+        .collect::<Vec<_>>();
+
+    let doc = format!(
+        "//! Info on Unicode.
+
+/// List of characters that are considered punctuation.
+///
+/// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
+/// > It is generate from the latest Unicode data.
+///
+/// Rust does not contain an `is_punctuation` method on `char`, while it does
+/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
+///
+/// `CommonMark` handles attention (emphasis, strong) markers based on what
+/// comes before or after them.
+/// One such difference is if those characters are Unicode punctuation.
+///
+/// ## References
+///
+/// *   [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
+pub const PUNCTUATION: [char; {}] = [
+{}
+];
+",
+    found.len(),
+    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
+    );
+
+    fs::write(code_url, doc).unwrap();
+}
-- 
cgit