aboutsummaryrefslogtreecommitdiffstats
path: root/script/generate-unicode.js
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-04 17:28:11 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-04 17:28:11 +0200
commit2bd50cd8082e686ee74adc0770cc63593b1718f1 (patch)
tree0d2c7ddd3df290c2f89e4726f9bb6264e68a376b /script/generate-unicode.js
parent0450e7c2b12bd3ef53e0cffb60a3dd860325b478 (diff)
downloadmarkdown-rs-2bd50cd8082e686ee74adc0770cc63593b1718f1.tar.gz
markdown-rs-2bd50cd8082e686ee74adc0770cc63593b1718f1.tar.bz2
markdown-rs-2bd50cd8082e686ee74adc0770cc63593b1718f1.zip
Use Rust to crawl unicode
Diffstat (limited to '')
-rw-r--r--script/generate-unicode.js68
1 files changed, 0 insertions, 68 deletions
diff --git a/script/generate-unicode.js b/script/generate-unicode.js
deleted file mode 100644
index 35150af..0000000
--- a/script/generate-unicode.js
+++ /dev/null
@@ -1,68 +0,0 @@
-// To do: port to Rust with `reqwest`?
-import fs from "node:fs/promises";
-
-const dataUrl = new URL("unicode-data.txt", import.meta.url);
-const codeUrl = new URL("../src/unicode.rs", import.meta.url);
-/** @type {string} */
-let data;
-
-try {
- data = String(await fs.readFile(dataUrl));
-} catch {
- const response = await fetch(
- "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"
- );
- console.log(response);
- const text = await response.text();
- await fs.writeFile(dataUrl, text);
- data = text;
-}
-
-let rows = data.split("\n");
-let index = -1;
-let search = [
- "Pc", // Punctuation, Connector
- "Pd", // Punctuation, Dash
- "Pe", // Punctuation, Close
- "Pf", // Punctuation, FinalQuote
- "Pi", // Punctuation, InitialQuote
- "Po", // Punctuation, Other
- "Ps", // Punctuation, Open
-];
-/** @type {Array<string>} */
-let found = [];
-
-while (++index < rows.length) {
- const cells = rows[index].split(";");
- const [code, , category] = cells;
- if (search.includes(category)) {
- found.push(code);
- }
-}
-
-await fs.writeFile(
- codeUrl,
- [
- "//! Information on Unicode.",
- "",
- "/// List of characters that are considered punctuation according to Unicode.",
- "///",
- "/// > ๐Ÿ‘‰ **Important**: this module is generated by `script/`.",
- "/// > It is generate from the latest Unicode data.",
- "///",
- "/// Rust does not contain an `is_punctuation` method on `char`, while it does",
- "/// support [`is_ascii_alphanumeric`](char::is_ascii_punctuation).",
- "///",
- "/// `CommonMark` handles attention (emphasis, strong) markers based on what",
- "/// comes before or after them.",
- "/// One such difference is if those characters are Unicode punctuation.",
- "///",
- "/// ## References",
- "///",
- "/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)",
- "pub const PUNCTUATION: [char; " + found.length + "] = [",
- ...found.map((d) => " '\\u{" + d + "}',"),
- "];",
- "",
- ].join("\n")
-);