diff options
| author | 2022-06-20 18:53:38 +0200 | |
|---|---|---|
| committer | 2022-06-20 18:53:38 +0200 | |
| commit | 182467c1d393dee2081ff80f1c049cb145f23123 (patch) | |
| tree | 9ce529c815faab2db1f96b0820a78049b7633a8a | |
| parent | ef5f9a97493fe4a616b49a744d5a571a99ead8e9 (diff) | |
| download | markdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.tar.gz markdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.tar.bz2 markdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.zip | |
Add support for BOM
Diffstat (limited to '')
| -rw-r--r-- | readme.md | 2 | ||||
| -rw-r--r-- | src/subtokenize.rs | 4 | ||||
| -rw-r--r-- | src/tokenizer.rs | 10 | ||||
| -rw-r--r-- | tests/misc_bom.rs | 16 | ||||
| -rw-r--r-- | tests/misc_zero.rs | 2 | 
5 files changed, 24 insertions, 10 deletions
| @@ -66,7 +66,6 @@ cargo doc --document-private-items  ### Small things -- [ ] (1) Handle BOM at start  - [ ] (1) Parse initial and final whitespace of paragraphs (in text)  - [ ] (1) Add docs to subtokenize  - [ ] (1) Add module docs to parser @@ -171,6 +170,7 @@ cargo doc --document-private-items  - [x] (1) Parse whitespace in each flow construct  - [x] (1) Connect `ChunkString` in label, destination, title  - [x] (1) Add support for line endings in `string` +- [x] (1) Handle BOM at start  ### Extensions diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 4a29a01..0623a37 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {      let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();      let mut done = true; +    if events.is_empty() { +        return (events, true); +    } +      while index < events.len() {          let event = &events[index]; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d31c8c5..c0a7105 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -680,10 +680,20 @@ fn attempt_impl(  // To do: handle BOM at start?  pub fn as_codes(value: &str) -> Vec<Code> {      let mut codes: Vec<Code> = vec![]; +    let mut at_start = true;      let mut at_carriage_return = false;      let mut column = 1;      for char in value.chars() { +        if at_start { +            if char == '\u{feff}' { +                // Ignore. +                continue; +            } + +            at_start = false; +        } +          // Send a CRLF.          if at_carriage_return && '\n' == char {              at_carriage_return = false; diff --git a/tests/misc_bom.rs b/tests/misc_bom.rs index 9805616..44f661e 100644 --- a/tests/misc_bom.rs +++ b/tests/misc_bom.rs @@ -1,15 +1,13 @@  extern crate micromark; -// use micromark::micromark; +use micromark::micromark;  #[test]  fn bom() { -    // // To do: BOM. -    // assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom"); +    assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom"); -    // // To do: BOM. -    // assert_eq!( -    //     micromark("\u{FEFF}# hea\u{FEFF}ding"), -    //     "<h1>hea\u{FEFF}ding</h1>", -    //     "should ignore a bom" -    // ); +    assert_eq!( +        micromark("\u{FEFF}# hea\u{FEFF}ding"), +        "<h1>hea\u{FEFF}ding</h1>", +        "should ignore a bom" +    );  } diff --git a/tests/misc_zero.rs b/tests/misc_zero.rs index 946a3e2..47aa8ed 100644 --- a/tests/misc_zero.rs +++ b/tests/misc_zero.rs @@ -3,6 +3,8 @@ use micromark::micromark;  #[test]  fn zero() { +    assert_eq!(micromark(""), "", "should support no markdown"); +      assert_eq!(          micromark("asd\0asd"),          "<p>asd�asd</p>", | 
