Refactor to proof docs, grammars

author: Titus Wormer <tituswormer@gmail.com> 2022-08-15 18:22:40 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-15 18:22:40 +0200
commit: 2379c2749916483be68dbf816a4c56cd59ced958 (patch)
tree: 5db8ea01782212b3f465d40f912ed87481012bbb /src/util
parent: 3aa45de9dc359169ccaabc07ffa986d72a010cd8 (diff)
download: markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.gz
markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.bz2
markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.zip
8 files changed, 39 insertions, 33 deletions
diff --git a/src/util/decode_character_reference.rs b/src/util/decode_character_reference.rs
index f8fd18f..8ed32f4 100644
--- a/src/util/decode_character_reference.rs
+++ b/src/util/decode_character_reference.rs
@@ -1,4 +1,4 @@
-//! Utilities to decode character references.
+//! Decode character references.
 
 use crate::constant::CHARACTER_REFERENCES;
 
@@ -43,11 +43,11 @@ pub fn decode_named(value: &str) -> String {
 /// Decode numeric character references.
 ///
 /// Turn the number (in string form as either hexadecimal or decimal) coming
-/// from a numeric character reference into a character.
-/// Whether the base of the string form is `10` (decimal) or `16` (hexadecimal)
-/// must be passed as the `radix` parameter.
+/// from a numeric character reference into a string.
+/// The base of the string form must be passed as the `radix` parameter, as
+/// `10` (decimal) or `16` (hexadecimal).
 ///
-/// This returns the `char` associated with that number or a replacement
+/// This returns a `String` form of the associated character or a replacement
 /// character for C0 control characters (except for ASCII whitespace), C1
 /// control characters, lone surrogates, noncharacters, and out of range
 /// characters.
diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs
index 11ac486..33c5706 100644
--- a/src/util/edit_map.rs
+++ b/src/util/edit_map.rs
@@ -1,6 +1,6 @@
-//! Helpers to deal with several changes in events, batching them together.
+//! Deal with several changes in events, batching them together.
 //!
-//! Preferably, changes should be kept to a minumum.
+//! Preferably, changes should be kept to a minimum.
 //! Sometimes, it’s needed to change the list of events, because parsing can be
 //! messy, and it helps to expose a cleaner interface of events to the compiler
 //! and other users.
diff --git a/src/util/encode.rs b/src/util/encode.rs
index d37a2de..6530011 100644
--- a/src/util/encode.rs
+++ b/src/util/encode.rs
@@ -1,10 +1,11 @@
-//! Utilities to encode HTML.
+//! Encode HTML.
 
 /// Encode dangerous html characters.
 ///
 /// This ensures that certain characters which have special meaning in HTML are
 /// dealt with.
-/// Technically, we can skip `>` and `"` in many cases, but CM includes them.
+/// Technically, we can skip `>` and `"` in many cases, but `CommonMark`
+/// includes them.
 ///
 /// This behavior is not explained in prose in `CommonMark` but can be inferred
 /// from the input/output test cases.
diff --git a/src/util/mod.rs b/src/util/mod.rs
index a01f31e..f51845c 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,4 +1,4 @@
-//! Utilities used when compiling markdown.
+//! Utilities used when processing markdown.
 
 pub mod decode_character_reference;
 pub mod edit_map;
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index f5b12d0..ddc51f8 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -1,17 +1,25 @@
-//! Utility to normalize identifiers.
+//! Normalize identifiers.
 
 /// Normalize an identifier, as found in [references][label_end] and
 /// [definitions][definition], so it can be compared when matching.
 ///
 /// This collapsed whitespace found in markdown (`\t`, `\r`, `\n`, and ` `)
-/// into one space, trims it (as in, dropping the first and last space),
-/// and then performs unicode case folding twice: first by uppercasing
-/// lowercase characters, and then lowercasing uppercase characters.
+/// into one space, trims it (as in, dropping the first and last space), and
+/// then performs unicode case folding twice: first by lowercasing uppercase
+/// characters, and then uppercasing lowercase characters.
 ///
 /// Some characters are considered “uppercase”, such as U+03F4 (`ϴ`), but if
 /// their lowercase counterpart (U+03B8 (`θ`)) is uppercased will result in a
 /// different uppercase character (U+0398 (`Θ`)).
-/// Hence, to get that form, we perform both upper- and lowercase.
+/// Hence, to get that form, we perform both lower- and uppercase.
+///
+/// Performing these steps in that order works, but the inverse does not work.
+/// To illustrate, say the source markdown containes two identifiers
+/// `SS` (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to
+/// `ss` (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both
+/// uppercase to `SS` (U+0053 U+0053).
+/// If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
+/// change, and then lowercase to `ß`, which would not match `ss`.
 ///
 /// ## Examples
 ///
@@ -64,17 +72,5 @@ pub fn normalize_identifier(value: &str) -> String {
         result.push_str(&value[start..]);
     }
 
-    // Some characters are considered “uppercase”, but if their lowercase
-    // counterpart is uppercased will result in a different uppercase
-    // character.
-    // Hence, to get that form, we perform both lower- and uppercase.
-    // Performing these steps in that order works, but the inverse does not
-    // work.
-    // To illustrate, say the source markdown containes two identifiers `SS`
-    // (U+0053 U+0053) and `ẞ` (U+1E9E), which would be lowercased to `ss`
-    // (U+0073 U+0073) and `ß` (U+00DF), and those in turn would both uppercase
-    // to `SS` (U+0053 U+0053).
-    // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
-    // change, and then lowercase to `ß`, which would not match `ss`.
     result.to_lowercase().to_uppercase()
 }
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 051e1e1..593a70e 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -1,4 +1,4 @@
-//! Utilities to make urls safe.
+//! Make urls safe.
 
 use crate::util::encode::encode;
 
@@ -60,9 +60,10 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
     value
 }
 
-/// Normalize a URL (such as used in definitions).
+/// Normalize a URL (such as used in [definitions][definition],
+/// [references][label_end]).
 ///
-/// Encode unsafe characters with percent-encoding, skipping already encoded
+/// It encodes unsafe characters with percent-encoding, skipping already encoded
 /// sequences.
 ///
 /// ## Examples
@@ -77,6 +78,9 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
 /// ## References
 ///
 /// *   [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
+///
+/// [definition]: crate::construct::definition
+/// [label_end]: crate::construct::label_end
 fn normalize_uri(value: &str) -> String {
     let chars = value.chars().collect::<Vec<_>>();
     // Note: it’ll grow bigger for each non-ascii or non-safe character.
diff --git a/src/util/skip.rs b/src/util/skip.rs
index 46cbb4a..a7de408 100644
--- a/src/util/skip.rs
+++ b/src/util/skip.rs
@@ -1,4 +1,4 @@
-//! Utilities to deal with lists of events.
+//! Move across lists of events.
 
 use crate::event::{Event, Kind, Name};
 
diff --git a/src/util/slice.rs b/src/util/slice.rs
index e70078a..be2a381 100644
--- a/src/util/slice.rs
+++ b/src/util/slice.rs
@@ -1,4 +1,4 @@
-//! Utilities to deal with characters.
+//! Deal with bytes.
 
 use crate::constant::TAB_SIZE;
 use crate::event::{Event, Kind, Point};
@@ -7,7 +7,9 @@ use std::str;
 /// A range between two points.
 #[derive(Debug)]
 pub struct Position<'a> {
+    /// Start point.
     pub start: &'a Point,
+    /// End point.
     pub end: &'a Point,
 }
 
@@ -55,11 +57,14 @@ impl<'a> Position<'a> {
 
 /// Bytes belonging to a range.
 ///
-/// Includes information on virtual spaces before and after the bytes.
+/// Includes info on virtual spaces before and after the bytes.
 #[derive(Debug)]
 pub struct Slice<'a> {
+    /// Bytes.
     pub bytes: &'a [u8],
+    /// Number of virtual spaces before the bytes.
     pub before: usize,
+    /// Number of virtual spaces after the bytes.
     pub after: usize,
 }
author	Titus Wormer <tituswormer@gmail.com>	2022-08-15 18:22:40 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-15 18:22:40 +0200
commit	2379c2749916483be68dbf816a4c56cd59ced958 (patch)
tree	5db8ea01782212b3f465d40f912ed87481012bbb /src/util
parent	3aa45de9dc359169ccaabc07ffa986d72a010cd8 (diff)
download	markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.gz markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.tar.bz2 markdown-rs-2379c2749916483be68dbf816a4c56cd59ced958.zip