extern crate reqwest; use std::fs; #[tokio::main] async fn main() { let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt"; let data_url = "unicode-data.txt"; let code_url = "src/unicode.rs"; let value = if let Ok(value) = fs::read_to_string(data_url) { value } else { let value = reqwest::get(url) .await .unwrap() .text() .await .unwrap(); fs::write(data_url, value.clone()).unwrap(); value }; let search = [ "Pc", // Punctuation, Connector "Pd", // Punctuation, Dash "Pe", // Punctuation, Close "Pf", // Punctuation, FinalQuote "Pi", // Punctuation, InitialQuote "Po", // Punctuation, Other "Ps", // Punctuation, Open ]; let found = value .lines() .map(|line| line.split(';').collect::>()) .map(|cells| (cells[0], cells[2])) .filter(|c| search.contains(&c.1)) .map(|c| c.0) .collect::>(); let doc = format!( "//! Information on Unicode. /// List of characters that are considered punctuation according to Unicode. /// /// > ๐Ÿ‘‰ **Important**: this module is generated by `script/`. /// > It is generate from the latest Unicode data. /// /// Rust does not contain an `is_punctuation` method on `char`, while it does /// support [`is_ascii_alphanumeric`](char::is_ascii_punctuation). /// /// `CommonMark` handles attention (emphasis, strong) markers based on what /// comes before or after them. /// One such difference is if those characters are Unicode punctuation. /// /// ## References /// /// * [*ยง 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character) pub const PUNCTUATION: [char; {}] = [ {} ]; ", found.len(), found.iter().map(|d| format!(" '\\u{{{}}}',", d)).collect::>().join("\n") ); fs::write(code_url, doc).unwrap(); }