aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/subtokenize.rs4
-rw-r--r--src/tokenizer.rs10
2 files changed, 14 insertions, 0 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 4a29a01..0623a37 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();
let mut done = true;
+ if events.is_empty() {
+ return (events, true);
+ }
+
while index < events.len() {
let event = &events[index];
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d31c8c5..c0a7105 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -680,10 +680,20 @@ fn attempt_impl(
// To do: handle BOM at start?
pub fn as_codes(value: &str) -> Vec<Code> {
let mut codes: Vec<Code> = vec![];
+ let mut at_start = true;
let mut at_carriage_return = false;
let mut column = 1;
for char in value.chars() {
+ if at_start {
+ if char == '\u{feff}' {
+ // Ignore.
+ continue;
+ }
+
+ at_start = false;
+ }
+
// Send a CRLF.
if at_carriage_return && '\n' == char {
at_carriage_return = false;