From 182467c1d393dee2081ff80f1c049cb145f23123 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 18:53:38 +0200 Subject: Add support for BOM --- src/subtokenize.rs | 4 ++++ src/tokenizer.rs | 10 ++++++++++ 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 4a29a01..0623a37 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec, codes: &[Code]) -> (Vec, bool) { let mut link_to_info: HashMap = HashMap::new(); let mut done = true; + if events.is_empty() { + return (events, true); + } + while index < events.len() { let event = &events[index]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d31c8c5..c0a7105 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -680,10 +680,20 @@ fn attempt_impl( // To do: handle BOM at start? pub fn as_codes(value: &str) -> Vec { let mut codes: Vec = vec![]; + let mut at_start = true; let mut at_carriage_return = false; let mut column = 1; for char in value.chars() { + if at_start { + if char == '\u{feff}' { + // Ignore. + continue; + } + + at_start = false; + } + // Send a CRLF. if at_carriage_return && '\n' == char { at_carriage_return = false; -- cgit