diff options
Diffstat (limited to '')
-rw-r--r-- | src/subtokenize.rs | 4 | ||||
-rw-r--r-- | src/tokenizer.rs | 10 |
2 files changed, 14 insertions, 0 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 4a29a01..0623a37 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) { let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new(); let mut done = true; + if events.is_empty() { + return (events, true); + } + while index < events.len() { let event = &events[index]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d31c8c5..c0a7105 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -680,10 +680,20 @@ fn attempt_impl( // To do: handle BOM at start? pub fn as_codes(value: &str) -> Vec<Code> { let mut codes: Vec<Code> = vec![]; + let mut at_start = true; let mut at_carriage_return = false; let mut column = 1; for char in value.chars() { + if at_start { + if char == '\u{feff}' { + // Ignore. + continue; + } + + at_start = false; + } + // Send a CRLF. if at_carriage_return && '\n' == char { at_carriage_return = false; |