diff options
-rw-r--r-- | readme.md | 2 | ||||
-rw-r--r-- | src/subtokenize.rs | 4 | ||||
-rw-r--r-- | src/tokenizer.rs | 10 | ||||
-rw-r--r-- | tests/misc_bom.rs | 16 | ||||
-rw-r--r-- | tests/misc_zero.rs | 2 |
5 files changed, 24 insertions, 10 deletions
@@ -66,7 +66,6 @@ cargo doc --document-private-items ### Small things -- [ ] (1) Handle BOM at start - [ ] (1) Parse initial and final whitespace of paragraphs (in text) - [ ] (1) Add docs to subtokenize - [ ] (1) Add module docs to parser @@ -171,6 +170,7 @@ cargo doc --document-private-items - [x] (1) Parse whitespace in each flow construct - [x] (1) Connect `ChunkString` in label, destination, title - [x] (1) Add support for line endings in `string` +- [x] (1) Handle BOM at start ### Extensions diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 4a29a01..0623a37 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) { let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new(); let mut done = true; + if events.is_empty() { + return (events, true); + } + while index < events.len() { let event = &events[index]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d31c8c5..c0a7105 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -680,10 +680,20 @@ fn attempt_impl( // To do: handle BOM at start? pub fn as_codes(value: &str) -> Vec<Code> { let mut codes: Vec<Code> = vec![]; + let mut at_start = true; let mut at_carriage_return = false; let mut column = 1; for char in value.chars() { + if at_start { + if char == '\u{feff}' { + // Ignore. + continue; + } + + at_start = false; + } + // Send a CRLF. if at_carriage_return && '\n' == char { at_carriage_return = false; diff --git a/tests/misc_bom.rs b/tests/misc_bom.rs index 9805616..44f661e 100644 --- a/tests/misc_bom.rs +++ b/tests/misc_bom.rs @@ -1,15 +1,13 @@ extern crate micromark; -// use micromark::micromark; +use micromark::micromark; #[test] fn bom() { - // // To do: BOM. - // assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom"); + assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom"); - // // To do: BOM. - // assert_eq!( - // micromark("\u{FEFF}# hea\u{FEFF}ding"), - // "<h1>hea\u{FEFF}ding</h1>", - // "should ignore a bom" - // ); + assert_eq!( + micromark("\u{FEFF}# hea\u{FEFF}ding"), + "<h1>hea\u{FEFF}ding</h1>", + "should ignore a bom" + ); } diff --git a/tests/misc_zero.rs b/tests/misc_zero.rs index 946a3e2..47aa8ed 100644 --- a/tests/misc_zero.rs +++ b/tests/misc_zero.rs @@ -3,6 +3,8 @@ use micromark::micromark; #[test] fn zero() { + assert_eq!(micromark(""), "", "should support no markdown"); + assert_eq!( micromark("asd\0asd"), "<p>asd�asd</p>", |