diff options
Diffstat (limited to '')
-rw-r--r-- | src/construct/mod.rs | 1 | ||||
-rw-r--r-- | src/construct/partial_bom.rs | 12 | ||||
-rw-r--r-- | src/construct/partial_whitespace.rs | 16 |
3 files changed, 21 insertions, 8 deletions
diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 7b50957..cfaca0a 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -41,6 +41,7 @@ //! //! There are also several routines used in different places: //! +//! * [bom][partial_bom] //! * [data][partial_data] //! * [destination][partial_destination] //! * [label][partial_label] diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs index 155a1a3..d92c9c1 100644 --- a/src/construct/partial_bom.rs +++ b/src/construct/partial_bom.rs @@ -1,4 +1,14 @@ -//! To do. +//! Byte order mark occurs at the start of the document. +//! +//! It’s the three bytes 0xEF, 0xBB, and 0xBF. +//! +//! ## Tokens +//! +//! * [`ByteOrderMark`][Token::ByteOrderMark] +//! +//! ## References +//! +//! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60) use crate::token::Token; use crate::tokenizer::{State, Tokenizer}; diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 4f872ba..bf3bd4d 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -47,15 +47,18 @@ use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN; use crate::token::Token; -use crate::tokenizer::{Event, EventType, Tokenizer}; +use crate::tokenizer::{Event, EventType, Resolver, Tokenizer}; use crate::util::slice::{Position, Slice}; -/// To do. -pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> impl Fn(&mut Tokenizer) { - move |t| resolve_whitespace(t, hard_break, trim_whole) +/// Create a resolver to handle trailing whitespace in events. +/// +/// Performing this as a resolver instead of a tokenizer improves performance +/// *a lot*. +pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> Box<Resolver> { + Box::new(move |t| resolve_whitespace(t, hard_break, trim_whole)) } -/// To do. +/// Resolve whitespace. pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whole: bool) { let mut index = 0; @@ -76,8 +79,7 @@ pub fn resolve_whitespace(tokenizer: &mut Tokenizer, hard_break: bool, trim_whol } } -/// To do. -#[allow(clippy::too_many_lines)] +/// Trim a [`Data`][Token::Data] token. fn trim_data( tokenizer: &mut Tokenizer, exit_index: usize, |