aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--readme.md3
-rw-r--r--src/util/normalize_identifier.rs1
-rw-r--r--tests/definition.rs25
3 files changed, 26 insertions, 3 deletions
diff --git a/readme.md b/readme.md
index ad8aec0..2d87144 100644
--- a/readme.md
+++ b/readme.md
@@ -152,8 +152,6 @@ cargo doc --document-private-items
- [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
how to integrate with streams or so?
- [ ] (1) Go through clippy rules, and such, to add strict code styles
-- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is
- `unicode_whitespace` or so the same?)
- [ ] (1) Any special handling of surrogates?
- [ ] (1) Make sure debugging, assertions are useful for other folks
- [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing
@@ -274,3 +272,4 @@ important.
- [x] (1) Use `char::REPLACEMENT_CHARACTER`?
- [x] (3) Add support for concrete constructs
(html (flow) or code (fenced) cannot be “pierced” into by containers)
+- [x] (1) Make sure that rust character groups match CM character groups
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index 123a3a9..73f246d 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -54,7 +54,6 @@ pub fn normalize_identifier(value: &str) -> String {
}
}
- // To do: test if this matches unicode.
// Some characters are considered “uppercase”, but if their lowercase
// counterpart is uppercased will result in a different uppercase
// character.
diff --git a/tests/definition.rs b/tests/definition.rs
index 0686b6d..9bf4072 100644
--- a/tests/definition.rs
+++ b/tests/definition.rs
@@ -112,6 +112,31 @@ fn definition() {
);
assert_eq!(
+ micromark("[ı]: a\n\n[I]"),
+ "<p><a href=\"a\">I</a></p>",
+ "should match w/ undotted turkish i (1)"
+ );
+ assert_eq!(
+ micromark("[I]: a\n\n[ı]"),
+ "<p><a href=\"a\">ı</a></p>",
+ "should match w/ undotted turkish i (2)"
+ );
+ // Ref: <https://spec.commonmark.org/dingus/?text=%5Bi%5D%3A%20a%0A%0A%5Bİ%5D>
+ // GFM parses the same (last checked: 2022-07-11).
+ assert_eq!(
+ micromark("[i]: a\n\n[İ]"),
+ "<p>[İ]</p>",
+ "should *not* match w/ dotted turkish i (1)"
+ );
+ // Ref: <https://spec.commonmark.org/dingus/?text=%5Bİ%5D%3A%20a%0A%0A%5Bi%5D>
+ // GFM parses the same (last checked: 2022-07-11).
+ assert_eq!(
+ micromark("[İ]: a\n\n[i]"),
+ "<p>[i]</p>",
+ "should *not* match w/ dotted turkish i (2)"
+ );
+
+ assert_eq!(
micromark("[foo]: /url"),
"",
"should not contribute anything w/o reference (1)"