From c98f902eeae2ac9952173385b146249f11364765 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 11 Jul 2022 11:46:47 +0200 Subject: Add tests for matching identifiers w/ turkish `i` --- readme.md | 3 +-- src/util/normalize_identifier.rs | 1 - tests/definition.rs | 25 +++++++++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/readme.md b/readme.md index ad8aec0..2d87144 100644 --- a/readme.md +++ b/readme.md @@ -152,8 +152,6 @@ cargo doc --document-private-items - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept, how to integrate with streams or so? - [ ] (1) Go through clippy rules, and such, to add strict code styles -- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is - `unicode_whitespace` or so the same?) - [ ] (1) Any special handling of surrogates? - [ ] (1) Make sure debugging, assertions are useful for other folks - [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing @@ -274,3 +272,4 @@ important. - [x] (1) Use `char::REPLACEMENT_CHARACTER`? - [x] (3) Add support for concrete constructs (html (flow) or code (fenced) cannot be “pierced” into by containers) +- [x] (1) Make sure that rust character groups match CM character groups diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 123a3a9..73f246d 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -54,7 +54,6 @@ pub fn normalize_identifier(value: &str) -> String { } } - // To do: test if this matches unicode. // Some characters are considered “uppercase”, but if their lowercase // counterpart is uppercased will result in a different uppercase // character. diff --git a/tests/definition.rs b/tests/definition.rs index 0686b6d..9bf4072 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -111,6 +111,31 @@ fn definition() { "should match w/ case-insensitive (2)" ); + assert_eq!( + micromark("[ı]: a\n\n[I]"), + "

I

", + "should match w/ undotted turkish i (1)" + ); + assert_eq!( + micromark("[I]: a\n\n[ı]"), + "

ı

", + "should match w/ undotted turkish i (2)" + ); + // Ref: + // GFM parses the same (last checked: 2022-07-11). + assert_eq!( + micromark("[i]: a\n\n[İ]"), + "

[İ]

", + "should *not* match w/ dotted turkish i (1)" + ); + // Ref: + // GFM parses the same (last checked: 2022-07-11). + assert_eq!( + micromark("[İ]: a\n\n[i]"), + "

[i]

", + "should *not* match w/ dotted turkish i (2)" + ); + assert_eq!( micromark("[foo]: /url"), "", -- cgit