From ff5f81498ba1807ab06ffb5dadb1c99c102e0284 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Tue, 13 Dec 2022 12:57:14 +0400
Subject: Replace build script with private crate

Closes GH-34.
Closes GH-35.
---
 .github/workflows/main.yml |   2 +
 Cargo.toml                 |   7 +-
 build.rs                   | 155 -------------------------------------------
 generate/Cargo.toml        |  11 ++++
 generate/src/main.rs       | 161 +++++++++++++++++++++++++++++++++++++++++++++
 readme.md                  |   4 ++
 src/util/unicode.rs        |   2 +-
 tests/commonmark.rs        |   2 +-
 8 files changed, 182 insertions(+), 162 deletions(-)
 delete mode 100644 build.rs
 create mode 100644 generate/Cargo.toml
 create mode 100644 generate/src/main.rs

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a38c5c5..c439044 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -11,6 +11,7 @@ jobs:
         with:
           toolchain: stable
           components: rustfmt, clippy
+      - run: cargo run --manifest-path generate/Cargo.toml
       - run: cargo fmt --check && cargo clippy --examples --tests --benches
       - run: cargo test
   coverage:
@@ -20,5 +21,6 @@ jobs:
       - uses: actions-rs/toolchain@v1
         with:
           toolchain: stable
+      - run: cargo run --manifest-path generate/Cargo.toml
       - run: cargo install cargo-tarpaulin && cargo tarpaulin --out Xml
       - uses: codecov/codecov-action@v3
diff --git a/Cargo.toml b/Cargo.toml
index cd80209..bd27a80 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,7 +21,6 @@ harness = false
 log = "0.4"
 unicode-id = { version = "0.3", features = ["no_std"] }
 
-
 [dev-dependencies]
 env_logger = "0.10"
 criterion = "0.4"
@@ -33,7 +32,5 @@ swc_core = { version = "0.43.30", features = [
   "common",
 ] }
 
-[build-dependencies]
-regex = "1"
-reqwest = "0.11"
-tokio = { version = "1", features = ["full"] }
+[workspace]
+members = ["generate"]
diff --git a/build.rs b/build.rs
deleted file mode 100644
index 658cb0a..0000000
--- a/build.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-use regex::Regex;
-use std::fs;
-
-#[tokio::main]
-async fn main() {
-    commonmark().await;
-    punctuation().await;
-}
-
-async fn commonmark() {
-    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
-    let data_url = "commonmark-data.txt";
-    let code_url = "tests/commonmark.rs";
-
-    let value = if let Ok(value) = fs::read_to_string(data_url) {
-        value
-    } else {
-        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
-
-        fs::write(data_url, value.clone()).unwrap();
-
-        value
-    };
-
-    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
-    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
-    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
-    let mut current_heading = None;
-    let mut number = 1;
-
-    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
-        .unwrap()
-        .replace(&value, "");
-    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
-    let mut cases = vec![];
-
-    for mat in re.find_iter(&value) {
-        let mut lines = mat.as_str().lines().collect::<Vec<_>>();
-
-        if lines.len() == 1 {
-            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
-        } else {
-            lines.remove(0);
-            lines.pop();
-            let section = current_heading.as_ref().unwrap();
-            let case = lines.join("\n");
-            let parts = re_in_out.split(&case).collect::<Vec<_>>();
-            let input = format!("{}\n", parts[0]);
-            let output = if parts[1].is_empty() {
-                "".into()
-            } else {
-                format!("{}\n", parts[1])
-            };
-
-            let test = format!("    assert_eq!(\n        to_html_with_options(\n            r###\"{}\"###,\n            &danger\n        )?,\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, number);
-
-            cases.push(test);
-
-            number += 1;
-        }
-    }
-
-    let doc = format!(
-        "//! `CommonMark` test suite.
-
-// > 👉 **Important**: this module is generated by `build.rs`.
-// > It is generate from the latest CommonMark website.
-
-use markdown::{{to_html_with_options, CompileOptions, Options}};
-use pretty_assertions::assert_eq;
-
-#[rustfmt::skip]
-#[test]
-fn commonmark() -> Result<(), String> {{
-    let danger = Options {{
-        compile: CompileOptions {{
-            allow_dangerous_html: true,
-            allow_dangerous_protocol: true,
-            ..CompileOptions::default()
-        }},
-        ..Options::default()
-    }};
-
-{}
-
-    Ok(())
-}}
-",
-        cases.join("\n\n")
-    );
-
-    fs::write(code_url, doc).unwrap();
-}
-
-async fn punctuation() {
-    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
-    let data_url = "unicode-data.txt";
-    let code_url = "src/util/unicode.rs";
-
-    let value = if let Ok(value) = fs::read_to_string(data_url) {
-        value
-    } else {
-        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
-
-        fs::write(data_url, value.clone()).unwrap();
-
-        value
-    };
-
-    let search = [
-        "Pc", // Punctuation, Connector
-        "Pd", // Punctuation, Dash
-        "Pe", // Punctuation, Close
-        "Pf", // Punctuation, FinalQuote
-        "Pi", // Punctuation, InitialQuote
-        "Po", // Punctuation, Other
-        "Ps", // Punctuation, Open
-    ];
-
-    let found = value
-        .lines()
-        .map(|line| line.split(';').collect::<Vec<_>>())
-        .map(|cells| (cells[0], cells[2]))
-        .filter(|c| search.contains(&c.1))
-        .map(|c| c.0)
-        .collect::<Vec<_>>();
-
-    let doc = format!(
-        "//! Info on Unicode.
-
-/// List of characters that are considered punctuation.
-///
-/// > 👉 **Important**: this module is generated by `build.rs`.
-/// > It is generate from the latest Unicode data.
-///
-/// Rust does not contain an `is_punctuation` method on `char`, while it does
-/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
-///
-/// `CommonMark` handles attention (emphasis, strong) markers based on what
-/// comes before or after them.
-/// One such difference is if those characters are Unicode punctuation.
-///
-/// ## References
-///
-/// *   [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
-pub const PUNCTUATION: [char; {}] = [
-{}
-];
-",
-    found.len(),
-    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
-    );
-
-    fs::write(code_url, doc).unwrap();
-}
diff --git a/generate/Cargo.toml b/generate/Cargo.toml
new file mode 100644
index 0000000..7a2cf63
--- /dev/null
+++ b/generate/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "markdown-generate"
+version = "0.0.0"
+authors = ["Titus Wormer <tituswormer@gmail.com>"]
+edition = "2018"
+publish = false
+
+[dependencies]
+regex = "1"
+reqwest = "0.11"
+tokio = { version = "1", features = ["full"] }
diff --git a/generate/src/main.rs b/generate/src/main.rs
new file mode 100644
index 0000000..9f0d14b
--- /dev/null
+++ b/generate/src/main.rs
@@ -0,0 +1,161 @@
+// To regenerate, run the following from the repository root:
+//
+// ```sh
+// cargo run --manifest-path generate/Cargo.toml
+// ```
+
+use regex::Regex;
+use std::fs;
+
+#[tokio::main]
+async fn main() {
+    commonmark().await;
+    punctuation().await;
+}
+
+async fn commonmark() {
+    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
+    let data_url = "commonmark-data.txt";
+    let code_url = "tests/commonmark.rs";
+
+    let value = if let Ok(value) = fs::read_to_string(data_url) {
+        value
+    } else {
+        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+        fs::write(data_url, value.clone()).unwrap();
+
+        value
+    };
+
+    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
+    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
+    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
+    let mut current_heading = None;
+    let mut number = 1;
+
+    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
+        .unwrap()
+        .replace(&value, "");
+    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
+    let mut cases = vec![];
+
+    for mat in re.find_iter(&value) {
+        let mut lines = mat.as_str().lines().collect::<Vec<_>>();
+
+        if lines.len() == 1 {
+            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
+        } else {
+            lines.remove(0);
+            lines.pop();
+            let section = current_heading.as_ref().unwrap();
+            let case = lines.join("\n");
+            let parts = re_in_out.split(&case).collect::<Vec<_>>();
+            let input = format!("{}\n", parts[0]);
+            let output = if parts[1].is_empty() {
+                "".into()
+            } else {
+                format!("{}\n", parts[1])
+            };
+
+            let test = format!("    assert_eq!(\n        to_html_with_options(\n            r###\"{}\"###,\n            &danger\n        )?,\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, number);
+
+            cases.push(test);
+
+            number += 1;
+        }
+    }
+
+    let doc = format!(
+        "//! `CommonMark` test suite.
+
+// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
+// > It is generate from the latest CommonMark website.
+
+use markdown::{{to_html_with_options, CompileOptions, Options}};
+use pretty_assertions::assert_eq;
+
+#[rustfmt::skip]
+#[test]
+fn commonmark() -> Result<(), String> {{
+    let danger = Options {{
+        compile: CompileOptions {{
+            allow_dangerous_html: true,
+            allow_dangerous_protocol: true,
+            ..CompileOptions::default()
+        }},
+        ..Options::default()
+    }};
+
+{}
+
+    Ok(())
+}}
+",
+        cases.join("\n\n")
+    );
+
+    fs::write(code_url, doc).unwrap();
+}
+
+async fn punctuation() {
+    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
+    let data_url = "unicode-data.txt";
+    let code_url = "src/util/unicode.rs";
+
+    let value = if let Ok(value) = fs::read_to_string(data_url) {
+        value
+    } else {
+        let value = reqwest::get(url).await.unwrap().text().await.unwrap();
+
+        fs::write(data_url, value.clone()).unwrap();
+
+        value
+    };
+
+    let search = [
+        "Pc", // Punctuation, Connector
+        "Pd", // Punctuation, Dash
+        "Pe", // Punctuation, Close
+        "Pf", // Punctuation, FinalQuote
+        "Pi", // Punctuation, InitialQuote
+        "Po", // Punctuation, Other
+        "Ps", // Punctuation, Open
+    ];
+
+    let found = value
+        .lines()
+        .map(|line| line.split(';').collect::<Vec<_>>())
+        .map(|cells| (cells[0], cells[2]))
+        .filter(|c| search.contains(&c.1))
+        .map(|c| c.0)
+        .collect::<Vec<_>>();
+
+    let doc = format!(
+        "//! Info on Unicode.
+
+/// List of characters that are considered punctuation.
+///
+/// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
+/// > It is generate from the latest Unicode data.
+///
+/// Rust does not contain an `is_punctuation` method on `char`, while it does
+/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric).
+///
+/// `CommonMark` handles attention (emphasis, strong) markers based on what
+/// comes before or after them.
+/// One such difference is if those characters are Unicode punctuation.
+///
+/// ## References
+///
+/// *   [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
+pub const PUNCTUATION: [char; {}] = [
+{}
+];
+",
+    found.len(),
+    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
+    );
+
+    fs::write(code_url, doc).unwrap();
+}
diff --git a/readme.md b/readme.md
index 7f39fdf..55091ea 100644
--- a/readme.md
+++ b/readme.md
@@ -241,6 +241,10 @@ Fuzz testing is used to check for things that might fall through coverage.
 
 The following bash scripts are useful when working on this project:
 
+*   generate code (latest CM tests and Unicode info):
+    ```sh
+    cargo run --manifest-path generate/Cargo.toml
+    ```
 *   run examples:
     ```sh
     RUST_BACKTRACE=1 RUST_LOG=debug cargo run --example lib
diff --git a/src/util/unicode.rs b/src/util/unicode.rs
index 15004c7..a8da957 100644
--- a/src/util/unicode.rs
+++ b/src/util/unicode.rs
@@ -2,7 +2,7 @@
 
 /// List of characters that are considered punctuation.
 ///
-/// > 👉 **Important**: this module is generated by `build.rs`.
+/// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
 /// > It is generate from the latest Unicode data.
 ///
 /// Rust does not contain an `is_punctuation` method on `char`, while it does
diff --git a/tests/commonmark.rs b/tests/commonmark.rs
index 30f62ca..9dbc417 100644
--- a/tests/commonmark.rs
+++ b/tests/commonmark.rs
@@ -1,6 +1,6 @@
 //! `CommonMark` test suite.
 
-// > 👉 **Important**: this module is generated by `build.rs`.
+// > 👉 **Important**: this module is generated by `generate/src/main.rs`.
 // > It is generate from the latest CommonMark website.
 
 use markdown::{to_html_with_options, CompileOptions, Options};
-- 
cgit