From ff5f81498ba1807ab06ffb5dadb1c99c102e0284 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 13 Dec 2022 12:57:14 +0400 Subject: Replace build script with private crate Closes GH-34. Closes GH-35. --- .github/workflows/main.yml | 2 + Cargo.toml | 7 +- build.rs | 155 ------------------------------------------- generate/Cargo.toml | 11 ++++ generate/src/main.rs | 161 +++++++++++++++++++++++++++++++++++++++++++++ readme.md | 4 ++ src/util/unicode.rs | 2 +- tests/commonmark.rs | 2 +- 8 files changed, 182 insertions(+), 162 deletions(-) delete mode 100644 build.rs create mode 100644 generate/Cargo.toml create mode 100644 generate/src/main.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a38c5c5..c439044 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,6 +11,7 @@ jobs: with: toolchain: stable components: rustfmt, clippy + - run: cargo run --manifest-path generate/Cargo.toml - run: cargo fmt --check && cargo clippy --examples --tests --benches - run: cargo test coverage: @@ -20,5 +21,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable + - run: cargo run --manifest-path generate/Cargo.toml - run: cargo install cargo-tarpaulin && cargo tarpaulin --out Xml - uses: codecov/codecov-action@v3 diff --git a/Cargo.toml b/Cargo.toml index cd80209..bd27a80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ harness = false log = "0.4" unicode-id = { version = "0.3", features = ["no_std"] } - [dev-dependencies] env_logger = "0.10" criterion = "0.4" @@ -33,7 +32,5 @@ swc_core = { version = "0.43.30", features = [ "common", ] } -[build-dependencies] -regex = "1" -reqwest = "0.11" -tokio = { version = "1", features = ["full"] } +[workspace] +members = ["generate"] diff --git a/build.rs b/build.rs deleted file mode 100644 index 658cb0a..0000000 --- a/build.rs +++ /dev/null @@ -1,155 +0,0 @@ -use regex::Regex; -use std::fs; - -#[tokio::main] -async fn main() { - commonmark().await; - punctuation().await; -} - -async fn commonmark() { - let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt"; - let data_url = "commonmark-data.txt"; - let code_url = "tests/commonmark.rs"; - - let value = if let Ok(value) = fs::read_to_string(data_url) { - value - } else { - let value = reqwest::get(url).await.unwrap().text().await.unwrap(); - - fs::write(data_url, value.clone()).unwrap(); - - value - }; - - let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap(); - let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap(); - let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap(); - let mut current_heading = None; - let mut number = 1; - - let value = Regex::new(r"[\s\S]*") - .unwrap() - .replace(&value, ""); - let value = Regex::new(r"โ†’").unwrap().replace_all(&value, "\t"); - let mut cases = vec![]; - - for mat in re.find_iter(&value) { - let mut lines = mat.as_str().lines().collect::>(); - - if lines.len() == 1 { - current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string()); - } else { - lines.remove(0); - lines.pop(); - let section = current_heading.as_ref().unwrap(); - let case = lines.join("\n"); - let parts = re_in_out.split(&case).collect::>(); - let input = format!("{}\n", parts[0]); - let output = if parts[1].is_empty() { - "".into() - } else { - format!("{}\n", parts[1]) - }; - - let test = format!(" assert_eq!(\n to_html_with_options(\n r###\"{}\"###,\n &danger\n )?,\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number); - - cases.push(test); - - number += 1; - } - } - - let doc = format!( - "//! `CommonMark` test suite. - -// > ๐Ÿ‘‰ **Important**: this module is generated by `build.rs`. -// > It is generate from the latest CommonMark website. - -use markdown::{{to_html_with_options, CompileOptions, Options}}; -use pretty_assertions::assert_eq; - -#[rustfmt::skip] -#[test] -fn commonmark() -> Result<(), String> {{ - let danger = Options {{ - compile: CompileOptions {{ - allow_dangerous_html: true, - allow_dangerous_protocol: true, - ..CompileOptions::default() - }}, - ..Options::default() - }}; - -{} - - Ok(()) -}} -", - cases.join("\n\n") - ); - - fs::write(code_url, doc).unwrap(); -} - -async fn punctuation() { - let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"; - let data_url = "unicode-data.txt"; - let code_url = "src/util/unicode.rs"; - - let value = if let Ok(value) = fs::read_to_string(data_url) { - value - } else { - let value = reqwest::get(url).await.unwrap().text().await.unwrap(); - - fs::write(data_url, value.clone()).unwrap(); - - value - }; - - let search = [ - "Pc", // Punctuation, Connector - "Pd", // Punctuation, Dash - "Pe", // Punctuation, Close - "Pf", // Punctuation, FinalQuote - "Pi", // Punctuation, InitialQuote - "Po", // Punctuation, Other - "Ps", // Punctuation, Open - ]; - - let found = value - .lines() - .map(|line| line.split(';').collect::>()) - .map(|cells| (cells[0], cells[2])) - .filter(|c| search.contains(&c.1)) - .map(|c| c.0) - .collect::>(); - - let doc = format!( - "//! Info on Unicode. - -/// List of characters that are considered punctuation. -/// -/// > ๐Ÿ‘‰ **Important**: this module is generated by `build.rs`. -/// > It is generate from the latest Unicode data. -/// -/// Rust does not contain an `is_punctuation` method on `char`, while it does -/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric). -/// -/// `CommonMark` handles attention (emphasis, strong) markers based on what -/// comes before or after them. -/// One such difference is if those characters are Unicode punctuation. -/// -/// ## References -/// -/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character) -pub const PUNCTUATION: [char; {}] = [ -{} -]; -", - found.len(), - found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::>().join("\n") - ); - - fs::write(code_url, doc).unwrap(); -} diff --git a/generate/Cargo.toml b/generate/Cargo.toml new file mode 100644 index 0000000..7a2cf63 --- /dev/null +++ b/generate/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "markdown-generate" +version = "0.0.0" +authors = ["Titus Wormer "] +edition = "2018" +publish = false + +[dependencies] +regex = "1" +reqwest = "0.11" +tokio = { version = "1", features = ["full"] } diff --git a/generate/src/main.rs b/generate/src/main.rs new file mode 100644 index 0000000..9f0d14b --- /dev/null +++ b/generate/src/main.rs @@ -0,0 +1,161 @@ +// To regenerate, run the following from the repository root: +// +// ```sh +// cargo run --manifest-path generate/Cargo.toml +// ``` + +use regex::Regex; +use std::fs; + +#[tokio::main] +async fn main() { + commonmark().await; + punctuation().await; +} + +async fn commonmark() { + let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt"; + let data_url = "commonmark-data.txt"; + let code_url = "tests/commonmark.rs"; + + let value = if let Ok(value) = fs::read_to_string(data_url) { + value + } else { + let value = reqwest::get(url).await.unwrap().text().await.unwrap(); + + fs::write(data_url, value.clone()).unwrap(); + + value + }; + + let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap(); + let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap(); + let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap(); + let mut current_heading = None; + let mut number = 1; + + let value = Regex::new(r"[\s\S]*") + .unwrap() + .replace(&value, ""); + let value = Regex::new(r"โ†’").unwrap().replace_all(&value, "\t"); + let mut cases = vec![]; + + for mat in re.find_iter(&value) { + let mut lines = mat.as_str().lines().collect::>(); + + if lines.len() == 1 { + current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string()); + } else { + lines.remove(0); + lines.pop(); + let section = current_heading.as_ref().unwrap(); + let case = lines.join("\n"); + let parts = re_in_out.split(&case).collect::>(); + let input = format!("{}\n", parts[0]); + let output = if parts[1].is_empty() { + "".into() + } else { + format!("{}\n", parts[1]) + }; + + let test = format!(" assert_eq!(\n to_html_with_options(\n r###\"{}\"###,\n &danger\n )?,\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number); + + cases.push(test); + + number += 1; + } + } + + let doc = format!( + "//! `CommonMark` test suite. + +// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`. +// > It is generate from the latest CommonMark website. + +use markdown::{{to_html_with_options, CompileOptions, Options}}; +use pretty_assertions::assert_eq; + +#[rustfmt::skip] +#[test] +fn commonmark() -> Result<(), String> {{ + let danger = Options {{ + compile: CompileOptions {{ + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..CompileOptions::default() + }}, + ..Options::default() + }}; + +{} + + Ok(()) +}} +", + cases.join("\n\n") + ); + + fs::write(code_url, doc).unwrap(); +} + +async fn punctuation() { + let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"; + let data_url = "unicode-data.txt"; + let code_url = "src/util/unicode.rs"; + + let value = if let Ok(value) = fs::read_to_string(data_url) { + value + } else { + let value = reqwest::get(url).await.unwrap().text().await.unwrap(); + + fs::write(data_url, value.clone()).unwrap(); + + value + }; + + let search = [ + "Pc", // Punctuation, Connector + "Pd", // Punctuation, Dash + "Pe", // Punctuation, Close + "Pf", // Punctuation, FinalQuote + "Pi", // Punctuation, InitialQuote + "Po", // Punctuation, Other + "Ps", // Punctuation, Open + ]; + + let found = value + .lines() + .map(|line| line.split(';').collect::>()) + .map(|cells| (cells[0], cells[2])) + .filter(|c| search.contains(&c.1)) + .map(|c| c.0) + .collect::>(); + + let doc = format!( + "//! Info on Unicode. + +/// List of characters that are considered punctuation. +/// +/// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`. +/// > It is generate from the latest Unicode data. +/// +/// Rust does not contain an `is_punctuation` method on `char`, while it does +/// support [`is_ascii_alphanumeric`](char::is_ascii_alphanumeric). +/// +/// `CommonMark` handles attention (emphasis, strong) markers based on what +/// comes before or after them. +/// One such difference is if those characters are Unicode punctuation. +/// +/// ## References +/// +/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character) +pub const PUNCTUATION: [char; {}] = [ +{} +]; +", + found.len(), + found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::>().join("\n") + ); + + fs::write(code_url, doc).unwrap(); +} diff --git a/readme.md b/readme.md index 7f39fdf..55091ea 100644 --- a/readme.md +++ b/readme.md @@ -241,6 +241,10 @@ Fuzz testing is used to check for things that might fall through coverage. The following bash scripts are useful when working on this project: +* generate code (latest CM tests and Unicode info): + ```sh + cargo run --manifest-path generate/Cargo.toml + ``` * run examples: ```sh RUST_BACKTRACE=1 RUST_LOG=debug cargo run --example lib diff --git a/src/util/unicode.rs b/src/util/unicode.rs index 15004c7..a8da957 100644 --- a/src/util/unicode.rs +++ b/src/util/unicode.rs @@ -2,7 +2,7 @@ /// List of characters that are considered punctuation. /// -/// > ๐Ÿ‘‰ **Important**: this module is generated by `build.rs`. +/// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`. /// > It is generate from the latest Unicode data. /// /// Rust does not contain an `is_punctuation` method on `char`, while it does diff --git a/tests/commonmark.rs b/tests/commonmark.rs index 30f62ca..9dbc417 100644 --- a/tests/commonmark.rs +++ b/tests/commonmark.rs @@ -1,6 +1,6 @@ //! `CommonMark` test suite. -// > ๐Ÿ‘‰ **Important**: this module is generated by `build.rs`. +// > ๐Ÿ‘‰ **Important**: this module is generated by `generate/src/main.rs`. // > It is generate from the latest CommonMark website. use markdown::{to_html_with_options, CompileOptions, Options}; -- cgit