path: root/build.rs

                     
                 
extern crate reqwest;
use regex::Regex;
use std::fs;

#[tokio::main]
async fn main() {
    commonmark().await;
    punctuation().await;
}

async fn commonmark() {
    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
    let data_url = "commonmark-data.txt";
    #[allow(unused_variables)]
    let code_url = "tests/commonmark.rs";

    let value = if let Ok(value) = fs::read_to_string(data_url) {
        value
    } else {
        let value = reqwest::get(url).await.unwrap().text().await.unwrap();

        fs::write(data_url, value.clone()).unwrap();

        value
    };

    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
    let mut current_heading: Option<String> = None;
    let mut case_index = 0;

    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
        .unwrap()
        .replace(&value, "");
    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
    let mut cases: Vec<String> = vec![];

    for mat in re.find_iter(&value) {
        let mut lines = mat.as_str().lines().collect::<Vec<_>>();

        if lines.len() == 1 {
            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
        } else {
            lines.remove(0);
            lines.pop();
            let section = current_heading.as_ref().unwrap();
            let case = lines.join("\n");
            let parts = re_in_out.split(&case).collect::<Vec<_>>();
            let input = format!("{}\n", parts[0]);
            let output = if parts[1].is_empty() {
                "".to_string()
            } else {
                format!("{}\n", parts[1])
            };

            let test = format!("    assert_eq!(\n        micromark_with_options(r###\"{}\"###, DANGER),\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, case_index);

            cases.push(test);

            case_index += 1;
        }
    }

    #[allow(unused_variables)]
    let doc = format!(
        "//! CommonMark test suite.

// > 👉 **Important**: this module is generated by `build.rs`.
// > It is generate from the latest Unicode data.

extern crate micromark;
use micromark::{{micromark_with_options, Options}};

const DANGER: &Options = &Options {{
    allow_dangerous_html: true,
    allow_dangerous_protocol: true,
    default_line_ending: None,
}};

#[test]
fn commonmark() {{
{}
}}
",
        cases.join("\n\n")
    );

    // To do: enable when CM is completely fixed.
    // fs::write(code_url, doc).unwrap();
}

async fn punctuation() {
    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
    let data_url = "unicode-data.txt";
    let code_url = "src/unicode.rs";

    let value = if let Ok(value) = fs::read_to_string(data_url) {
        value
    } else {
        let value = reqwest::get(url).await.unwrap().text().await.unwrap();

        fs::write(data_url, value.clone()).unwrap();

        value
    };

    let search = [
        "Pc", // Punctuation, Connector
        "Pd", // Punctuation, Dash
        "Pe", // Punctuation, Close
        "Pf", // Punctuation, FinalQuote
        "Pi", // Punctuation, InitialQuote
        "Po", // Punctuation, Other
        "Ps", // Punctuation, Open
    ];

    let found = value
        .lines()
        .map(|line| line.split(';').collect::<Vec<_>>())
        .map(|cells| (cells[0], cells[2]))
        .filter(|c| search.contains(&c.1))
        .map(|c| c.0)
        .collect::<Vec<_>>();

    let doc = format!(
        "//! Information on Unicode.

/// List of characters that are considered punctuation according to Unicode.
///
/// > 👉 **Important**: this module is generated by `build.rs`.
/// > It is generate from the latest Unicode data.
///
/// Rust does not contain an `is_punctuation` method on `char`, while it does
/// support [`is_ascii_alphanumeric`](char::is_ascii_punctuation).
///
/// `CommonMark` handles attention (emphasis, strong) markers based on what
/// comes before or after them.
/// One such difference is if those characters are Unicode punctuation.
///
/// ## References
///
/// *   [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
pub const PUNCTUATION: [char; {}] = [
{}
];
",
    found.len(),
    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
    );

    fs::write(code_url, doc).unwrap();
}
extern crate reqwest;
use regex::Regex;
use std::fs;

#[tokio::main]
async fn main() {
    commonmark().await;
    punctuation().await;
}

async fn commonmark() {
    let url = "https://raw.githubusercontent.com/commonmark/commonmark-spec/0.30/spec.txt";
    let data_url = "commonmark-data.txt";
    #[allow(unused_variables)]
    let code_url = "tests/commonmark.rs";

    let value = if let Ok(value) = fs::read_to_string(data_url) {
        value
    } else {
        let value = reqwest::get(url).await.unwrap().text().await.unwrap();

        fs::write(data_url, value.clone()).unwrap();

        value
    };

    let re = Regex::new(r"(?m)(?:^`{32} example\n[\s\S]*?\n`{32}$|^#{1,6} *(.*)$)").unwrap();
    let re_heading_prefix = Regex::new(r"#{1,6} ").unwrap();
    let re_in_out = Regex::new(r"\n\.(?:\n|$)").unwrap();
    let mut current_heading: Option<String> = None;
    let mut case_index = 0;

    let value = Regex::new(r"<!-- END TESTS -->[\s\S]*")
        .unwrap()
        .replace(&value, "");
    let value = Regex::new(r"→").unwrap().replace_all(&value, "\t");
    let mut cases: Vec<String> = vec![];

    for mat in re.find_iter(&value) {
        let mut lines = mat.as_str().lines().collect::<Vec<_>>();

        if lines.len() == 1 {
            current_heading = Some(re_heading_prefix.replace(lines[0], "").to_string());
        } else {
            lines.remove(0);
            lines.pop();
            let section = current_heading.as_ref().unwrap();
            let case = lines.join("\n");
            let parts = re_in_out.split(&case).collect::<Vec<_>>();
            let input = format!("{}\n", parts[0]);
            let output = if parts[1].is_empty() {
                "".to_string()
            } else {
                format!("{}\n", parts[1])
            };

            let test = format!("    assert_eq!(\n        micromark_with_options(r###\"{}\"###, DANGER),\n        r###\"{}\"###,\n        r###\"{} ({})\"###\n);", input, output, section, case_index);

            cases.push(test);

            case_index += 1;
        }
    }

    #[allow(unused_variables)]
    let doc = format!(
        "//! CommonMark test suite.

// > 👉 **Important**: this module is generated by `build.rs`.
// > It is generate from the latest Unicode data.

extern crate micromark;
use micromark::{{micromark_with_options, Options}};

const DANGER: &Options = &Options {{
    allow_dangerous_html: true,
    allow_dangerous_protocol: true,
    default_line_ending: None,
}};

#[test]
fn commonmark() {{
{}
}}
",
        cases.join("\n\n")
    );

    // To do: enable when CM is completely fixed.
    // fs::write(code_url, doc).unwrap();
}

async fn punctuation() {
    let url = "https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt";
    let data_url = "unicode-data.txt";
    let code_url = "src/unicode.rs";

    let value = if let Ok(value) = fs::read_to_string(data_url) {
        value
    } else {
        let value = reqwest::get(url).await.unwrap().text().await.unwrap();

        fs::write(data_url, value.clone()).unwrap();

        value
    };

    let search = [
        "Pc", // Punctuation, Connector
        "Pd", // Punctuation, Dash
        "Pe", // Punctuation, Close
        "Pf", // Punctuation, FinalQuote
        "Pi", // Punctuation, InitialQuote
        "Po", // Punctuation, Other
        "Ps", // Punctuation, Open
    ];

    let found = value
        .lines()
        .map(|line| line.split(';').collect::<Vec<_>>())
        .map(|cells| (cells[0], cells[2]))
        .filter(|c| search.contains(&c.1))
        .map(|c| c.0)
        .collect::<Vec<_>>();

    let doc = format!(
        "//! Information on Unicode.

/// List of characters that are considered punctuation according to Unicode.
///
/// > 👉 **Important**: this module is generated by `build.rs`.
/// > It is generate from the latest Unicode data.
///
/// Rust does not contain an `is_punctuation` method on `char`, while it does
/// support [`is_ascii_alphanumeric`](char::is_ascii_punctuation).
///
/// `CommonMark` handles attention (emphasis, strong) markers based on what
/// comes before or after them.
/// One such difference is if those characters are Unicode punctuation.
///
/// ## References
///
/// *   [*§ 2.1 Characters and lines* in `CommonMark`](https://spec.commonmark.org/0.30/#unicode-punctuation-character)
pub const PUNCTUATION: [char; {}] = [
{}
];
",
    found.len(),
    found.iter().map(|d| format!("    '\\u{{{}}}',", d)).collect::<Vec<_>>().join("\n")
    );

    fs::write(code_url, doc).unwrap();
}