// To do: port to Rust with `reqwest`? import fs from "node:fs/promises"; const dataUrl = new URL("unicode-data.txt", import.meta.url); const codeUrl = new URL("../src/unicode.rs", import.meta.url); /** @type {string} */ let data; try { data = String(await fs.readFile(dataUrl)); } catch { const response = await fetch( "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt" ); console.log(response); const text = await response.text(); await fs.writeFile(dataUrl, text); data = text; } let rows = data.split("\n"); let index = -1; let search = [ "Pc", // Punctuation, Connector "Pd", // Punctuation, Dash "Pe", // Punctuation, Close "Pf", // Punctuation, FinalQuote "Pi", // Punctuation, InitialQuote "Po", // Punctuation, Other "Ps", // Punctuation, Open ]; /** @type {Array} */ let found = []; while (++index < rows.length) { const cells = rows[index].split(";"); const [code, , category] = cells; if (search.includes(category)) { found.push(code); } } await fs.writeFile( codeUrl, [ "//! Information on Unicode.", "", "/// List of characters that are considered punctuation according to Unicode.", "///", "/// > ๐Ÿ‘‰ **Important**: this module is generated by `script/`.", "/// > It is generate from the latest Unicode data.", "///", "/// Rust does not contain an `is_punctuation` method on `char`, while it does", "/// support [`is_ascii_alphanumeric`](char::is_ascii_punctuation).", "///", "/// `CommonMark` handles attention (emphasis, strong) markers based on what", "/// comes before or after them.", "/// One such difference is if those characters are Unicode punctuation.", "///", "/// ## References", "///", "/// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)", "pub const PUNCTUATION: [char; " + found.length + "] = [", ...found.map((d) => " '\\u{" + d + "}',"), "];", "", ].join("\n") );