diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-11 11:46:47 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-11 11:46:47 +0200 |
commit | c98f902eeae2ac9952173385b146249f11364765 (patch) | |
tree | 88179439f59608ccde7d32ded1b7dcf1602ccc5e | |
parent | 1fe3139713fb9e5f000af458154ddf634d6ebe64 (diff) | |
download | markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.gz markdown-rs-c98f902eeae2ac9952173385b146249f11364765.tar.bz2 markdown-rs-c98f902eeae2ac9952173385b146249f11364765.zip |
Add tests for matching identifiers w/ turkish `i`
Diffstat (limited to '')
-rw-r--r-- | readme.md | 3 | ||||
-rw-r--r-- | src/util/normalize_identifier.rs | 1 | ||||
-rw-r--r-- | tests/definition.rs | 25 |
3 files changed, 26 insertions, 3 deletions
@@ -152,8 +152,6 @@ cargo doc --document-private-items - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept, how to integrate with streams or so? - [ ] (1) Go through clippy rules, and such, to add strict code styles -- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is - `unicode_whitespace` or so the same?) - [ ] (1) Any special handling of surrogates? - [ ] (1) Make sure debugging, assertions are useful for other folks - [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing @@ -274,3 +272,4 @@ important. - [x] (1) Use `char::REPLACEMENT_CHARACTER`? - [x] (3) Add support for concrete constructs (html (flow) or code (fenced) cannot be “pierced” into by containers) +- [x] (1) Make sure that rust character groups match CM character groups diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index 123a3a9..73f246d 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -54,7 +54,6 @@ pub fn normalize_identifier(value: &str) -> String { } } - // To do: test if this matches unicode. // Some characters are considered “uppercase”, but if their lowercase // counterpart is uppercased will result in a different uppercase // character. diff --git a/tests/definition.rs b/tests/definition.rs index 0686b6d..9bf4072 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -112,6 +112,31 @@ fn definition() { ); assert_eq!( + micromark("[ı]: a\n\n[I]"), + "<p><a href=\"a\">I</a></p>", + "should match w/ undotted turkish i (1)" + ); + assert_eq!( + micromark("[I]: a\n\n[ı]"), + "<p><a href=\"a\">ı</a></p>", + "should match w/ undotted turkish i (2)" + ); + // Ref: <https://spec.commonmark.org/dingus/?text=%5Bi%5D%3A%20a%0A%0A%5Bİ%5D> + // GFM parses the same (last checked: 2022-07-11). + assert_eq!( + micromark("[i]: a\n\n[İ]"), + "<p>[İ]</p>", + "should *not* match w/ dotted turkish i (1)" + ); + // Ref: <https://spec.commonmark.org/dingus/?text=%5Bİ%5D%3A%20a%0A%0A%5Bi%5D> + // GFM parses the same (last checked: 2022-07-11). + assert_eq!( + micromark("[İ]: a\n\n[i]"), + "<p>[i]</p>", + "should *not* match w/ dotted turkish i (2)" + ); + + assert_eq!( micromark("[foo]: /url"), "", "should not contribute anything w/o reference (1)" |