diff options
Diffstat (limited to '')
30 files changed, 450 insertions, 386 deletions
| diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 65c2f6f..b042645 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -110,23 +110,23 @@ enum MarkerKind {  }  impl MarkerKind { -    /// Turn the kind into a [char]. -    fn as_char(&self) -> char { +    /// Turn the kind into a byte ([u8]). +    fn as_byte(&self) -> u8 {          match self { -            MarkerKind::Asterisk => '*', -            MarkerKind::Underscore => '_', +            MarkerKind::Asterisk => b'*', +            MarkerKind::Underscore => b'_',          }      } -    /// Turn [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `*` or `_`. -    fn from_char(char: char) -> MarkerKind { -        match char { -            '*' => MarkerKind::Asterisk, -            '_' => MarkerKind::Underscore, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `*` or `_`. +    fn from_byte(byte: u8) -> MarkerKind { +        match byte { +            b'*' => MarkerKind::Asterisk, +            b'_' => MarkerKind::Underscore, +            _ => unreachable!("invalid byte"),          }      }  } @@ -160,9 +160,9 @@ struct Sequence {  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(char) if tokenizer.parse_state.constructs.attention && matches!(char, '*' | '_') => { +        Some(byte) if tokenizer.parse_state.constructs.attention && matches!(byte, b'*' | b'_') => {              tokenizer.enter(Token::AttentionSequence); -            inside(tokenizer, MarkerKind::from_char(char)) +            inside(tokenizer, MarkerKind::from_byte(byte))          }          _ => State::Nok,      } @@ -175,7 +175,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///     ^^  /// ```  fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State { -    if tokenizer.current == Some(marker.as_char()) { +    if tokenizer.current == Some(marker.as_byte()) {          tokenizer.consume();          State::Fn(Box::new(move |t| inside(t, marker)))      } else { @@ -188,7 +188,6 @@ fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State {  /// Resolve attention sequences.  #[allow(clippy::too_many_lines)]  fn resolve_attention(tokenizer: &mut Tokenizer) { -    let chars = &tokenizer.parse_state.chars;      let mut start = 0;      let mut balance = 0;      let mut sequences = vec![]; @@ -203,21 +202,34 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {              if enter.token_type == Token::AttentionSequence {                  let end = start + 1;                  let exit = &tokenizer.events[end]; -                let marker = -                    MarkerKind::from_char(Slice::from_point(chars, &enter.point).head().unwrap()); + +                let before_end = enter.point.index; +                let before_start = if before_end < 4 { 0 } else { before_end - 4 }; +                let string_before = +                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]); +                let char_before = string_before.chars().last(); + +                let after_start = exit.point.index; +                let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() { +                    tokenizer.parse_state.bytes.len() +                } else { +                    after_start + 4 +                }; +                let string_after = +                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]); +                let char_after = string_after.chars().next(); + +                let marker = MarkerKind::from_byte( +                    Slice::from_point(tokenizer.parse_state.bytes, &enter.point) +                        .head() +                        .unwrap(), +                );                  let before = classify_character(if enter.point.index > 0 { -                    Slice::from_point( -                        chars, -                        &Point { -                            index: enter.point.index - 1, -                            ..enter.point -                        }, -                    ) -                    .tail() +                    char_before                  } else {                      None                  }); -                let after = classify_character(Slice::from_point(chars, &exit.point).tail()); +                let after = classify_character(char_after);                  let open = after == GroupKind::Other                      || (after == GroupKind::Punctuation && before != GroupKind::Other);                  // To do: GFM strikethrough? @@ -490,7 +502,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {  /// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)  fn classify_character(char: Option<char>) -> GroupKind {      match char { -        // Custom characters. +        // EOF.          None => GroupKind::Whitespace,          // Unicode whitespace.          Some(char) if char.is_whitespace() => GroupKind::Whitespace, diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 399570b..b843af8 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -115,7 +115,7 @@ use crate::tokenizer::{State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('<') if tokenizer.parse_state.constructs.autolink => { +        Some(b'<') if tokenizer.parse_state.constructs.autolink => {              tokenizer.enter(Token::Autolink);              tokenizer.enter(Token::AutolinkMarker);              tokenizer.consume(); @@ -137,16 +137,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(char) if char.is_ascii_alphabetic() => { +        Some(byte) if byte.is_ascii_alphabetic() => {              tokenizer.consume();              State::Fn(Box::new(scheme_or_email_atext))          } -        Some(char) if is_ascii_atext(char) => email_atext(tokenizer), +        Some(byte) if is_ascii_atext(byte) => email_atext(tokenizer),          _ => State::Nok,      }  } -/// After the first character of the protocol or email name. +/// After the first byte of the protocol or email name.  ///  /// ```markdown  /// > | a<https://example.com>b @@ -156,7 +156,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { +        Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              scheme_inside_or_email_atext(tokenizer, 1)          }          _ => email_atext(tokenizer), @@ -173,11 +173,11 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {  /// ```  fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State {      match tokenizer.current { -        Some(':') => { +        Some(b':') => {              tokenizer.consume();              State::Fn(Box::new(url_inside))          } -        Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') +        Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')              if size < AUTOLINK_SCHEME_SIZE_MAX =>          {              tokenizer.consume(); @@ -195,12 +195,12 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State  /// ```  fn url_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.exit(Token::AutolinkProtocol);              end(tokenizer)          } -        Some(char) if char.is_ascii_control() => State::Nok, -        None | Some(' ') => State::Nok, +        Some(byte) if byte.is_ascii_control() => State::Nok, +        None | Some(b' ') => State::Nok,          Some(_) => {              tokenizer.consume();              State::Fn(Box::new(url_inside)) @@ -216,11 +216,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {  /// ```  fn email_atext(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('@') => { +        Some(b'@') => {              tokenizer.consume();              State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))          } -        Some(char) if is_ascii_atext(char) => { +        Some(byte) if is_ascii_atext(byte) => {              tokenizer.consume();              State::Fn(Box::new(email_atext))          } @@ -236,7 +236,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {  /// ```  fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {      match tokenizer.current { -        Some(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, size), +        Some(byte) if byte.is_ascii_alphanumeric() => email_value(tokenizer, size),          _ => State::Nok,      }  } @@ -249,11 +249,11 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {  /// ```  fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {      match tokenizer.current { -        Some('.') => { +        Some(b'.') => {              tokenizer.consume();              State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))          } -        Some('>') => { +        Some(b'>') => {              let index = tokenizer.events.len();              tokenizer.exit(Token::AutolinkProtocol);              // Change the token type. @@ -275,11 +275,11 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {  /// ```  fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {      match tokenizer.current { -        Some('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => { +        Some(b'-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {              tokenizer.consume();              State::Fn(Box::new(move |t| email_value(t, size + 1)))          } -        Some(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => { +        Some(byte) if byte.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {              tokenizer.consume();              State::Fn(Box::new(move |t| email_label(t, size + 1)))          } @@ -297,7 +297,7 @@ fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {  /// ```  fn end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.enter(Token::AutolinkMarker);              tokenizer.consume();              tokenizer.exit(Token::AutolinkMarker); @@ -324,6 +324,6 @@ fn end(tokenizer: &mut Tokenizer) -> State {  /// IETF.  ///  /// [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric -fn is_ascii_atext(x: char) -> bool { -    matches!(x, '#'..='\'' | '*' | '+' | '-'..='9' | '=' | '?' | 'A'..='Z' | '^'..='~') +fn is_ascii_atext(byte: u8) -> bool { +    matches!(byte, b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~')  } diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index 6780f40..f397a48 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => State::Ok, +        None | Some(b'\n') => State::Ok,          _ => State::Nok,      }  } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 49a0ea0..7e4753d 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -65,7 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.enter(Token::BlockQuote);              cont_before(tokenizer)          } @@ -98,7 +98,7 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {  /// ```  fn cont_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.enter(Token::BlockQuotePrefix);              tokenizer.enter(Token::BlockQuoteMarker);              tokenizer.consume(); @@ -118,7 +118,7 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {  ///      ^  /// ```  fn cont_after(tokenizer: &mut Tokenizer) -> State { -    if let Some('\t' | ' ') = tokenizer.current { +    if let Some(b'\t' | b' ') = tokenizer.current {          tokenizer.enter(Token::SpaceOrTab);          tokenizer.consume();          tokenizer.exit(Token::SpaceOrTab); diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index e9263af..02e8b62 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -44,7 +44,7 @@ use crate::tokenizer::{State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\\') if tokenizer.parse_state.constructs.character_escape => { +        Some(b'\\') if tokenizer.parse_state.constructs.character_escape => {              tokenizer.enter(Token::CharacterEscape);              tokenizer.enter(Token::CharacterEscapeMarker);              tokenizer.consume(); @@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(char) if char.is_ascii_punctuation() => { +        Some(byte) if byte.is_ascii_punctuation() => {              tokenizer.enter(Token::CharacterEscapeValue);              tokenizer.consume();              tokenizer.exit(Token::CharacterEscapeValue); diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 59043d1..90763c1 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -106,15 +106,15 @@ impl Kind {          }      } -    /// Check if a char is allowed. -    fn allowed(&self, char: char) -> bool { +    /// Check if a byte ([`u8`]) is allowed. +    fn allowed(&self, byte: u8) -> bool {          let check = match self { -            Kind::Hexadecimal => char::is_ascii_hexdigit, -            Kind::Decimal => char::is_ascii_digit, -            Kind::Named => char::is_ascii_alphanumeric, +            Kind::Hexadecimal => u8::is_ascii_hexdigit, +            Kind::Decimal => u8::is_ascii_digit, +            Kind::Named => u8::is_ascii_alphanumeric,          }; -        check(&char) +        check(&byte)      }  } @@ -141,7 +141,7 @@ struct Info {  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('&') if tokenizer.parse_state.constructs.character_reference => { +        Some(b'&') if tokenizer.parse_state.constructs.character_reference => {              tokenizer.enter(Token::CharacterReference);              tokenizer.enter(Token::CharacterReferenceMarker);              tokenizer.consume(); @@ -164,7 +164,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///       ^  /// ```  fn open(tokenizer: &mut Tokenizer) -> State { -    if let Some('#') = tokenizer.current { +    if let Some(b'#') = tokenizer.current {          tokenizer.enter(Token::CharacterReferenceMarkerNumeric);          tokenizer.consume();          tokenizer.exit(Token::CharacterReferenceMarkerNumeric); @@ -192,7 +192,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {  ///        ^  /// ```  fn numeric(tokenizer: &mut Tokenizer) -> State { -    if let Some('x' | 'X') = tokenizer.current { +    if let Some(b'x' | b'X') = tokenizer.current {          tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);          tokenizer.consume();          tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal); @@ -229,10 +229,11 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {  /// ```  fn value(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some(';') if info.size > 0 => { +        Some(b';') if info.size > 0 => {              if Kind::Named == info.kind { +                // To do: fix slice.                  let value = Slice::from_position( -                    &tokenizer.parse_state.chars, +                    tokenizer.parse_state.bytes,                      &Position {                          start: &info.start,                          end: &tokenizer.point, @@ -252,8 +253,8 @@ fn value(tokenizer: &mut Tokenizer, mut info: Info) -> State {              tokenizer.exit(Token::CharacterReference);              State::Ok          } -        Some(char) => { -            if info.size < info.kind.max() && info.kind.allowed(char) { +        Some(byte) => { +            if info.size < info.kind.max() && info.kind.allowed(byte) {                  info.size += 1;                  tokenizer.consume();                  State::Fn(Box::new(|t| value(t, info))) diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 98fa54f..21e9259 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -136,23 +136,23 @@ pub enum Kind {  }  impl Kind { -    /// Turn the kind into a [char]. -    fn as_char(&self) -> char { +    /// Turn the kind into a byte ([u8]). +    fn as_byte(&self) -> u8 {          match self { -            Kind::GraveAccent => '`', -            Kind::Tilde => '~', +            Kind::GraveAccent => b'`', +            Kind::Tilde => b'~',          }      } -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `~` or `` ` ``. -    fn from_char(char: char) -> Kind { -        match char { -            '`' => Kind::GraveAccent, -            '~' => Kind::Tilde, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `~` or `` ` ``. +    fn from_byte(byte: u8) -> Kind { +        match byte { +            b'`' => Kind::GraveAccent, +            b'~' => Kind::Tilde, +            _ => unreachable!("invalid byte"),          }      }  } @@ -207,7 +207,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {      if let Some(event) = tail {          if event.token_type == Token::SpaceOrTab {              prefix = Slice::from_position( -                &tokenizer.parse_state.chars, +                tokenizer.parse_state.bytes,                  &Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1),              )              .size(); @@ -215,14 +215,14 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {      }      match tokenizer.current { -        Some(char) if matches!(char, '`' | '~') => { +        Some(byte) if matches!(byte, b'`' | b'~') => {              tokenizer.enter(Token::CodeFencedFenceSequence);              sequence_open(                  tokenizer,                  Info {                      prefix,                      size: 0, -                    kind: Kind::from_char(char), +                    kind: Kind::from_byte(byte),                  },              )          } @@ -240,7 +240,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.consume();              State::Fn(Box::new(|t| {                  info.size += 1; @@ -265,7 +265,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence);              // Do not form containers.              tokenizer.concrete = true; @@ -289,7 +289,7 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::Data);              tokenizer.exit(Token::CodeFencedFenceInfo);              tokenizer.exit(Token::CodeFencedFence); @@ -297,12 +297,12 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {              tokenizer.concrete = true;              at_break(tokenizer, info)          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.exit(Token::Data);              tokenizer.exit(Token::CodeFencedFenceInfo);              tokenizer.attempt_opt(space_or_tab(), |t| meta_before(t, info))(tokenizer)          } -        Some('`') if info.kind == Kind::GraveAccent => State::Nok, +        Some(b'`') if info.kind == Kind::GraveAccent => State::Nok,          Some(_) => {              tokenizer.consume();              State::Fn(Box::new(|t| info_inside(t, info))) @@ -320,7 +320,7 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence);              // Do not form containers.              tokenizer.concrete = true; @@ -344,7 +344,7 @@ fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::Data);              tokenizer.exit(Token::CodeFencedFenceMeta);              tokenizer.exit(Token::CodeFencedFence); @@ -352,7 +352,7 @@ fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {              tokenizer.concrete = true;              at_break(tokenizer, info)          } -        Some('`') if info.kind == Kind::GraveAccent => State::Nok, +        Some(b'`') if info.kind == Kind::GraveAccent => State::Nok,          _ => {              tokenizer.consume();              State::Fn(Box::new(|t| meta(t, info))) @@ -413,7 +413,7 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn close_begin(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); @@ -452,7 +452,7 @@ fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.enter(Token::CodeFencedFenceSequence);              close_sequence(tokenizer, info, 0)          } @@ -470,7 +470,7 @@ fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.consume();              State::Fn(Box::new(move |t| close_sequence(t, info, size + 1)))          } @@ -492,7 +492,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {  /// ```  fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::CodeFencedFence);              State::Ok          } @@ -538,7 +538,7 @@ fn content_start(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => at_break(tokenizer, info), +        None | Some(b'\n') => at_break(tokenizer, info),          _ => {              tokenizer.enter(Token::CodeFlowChunk);              content_continue(tokenizer, info) @@ -556,7 +556,7 @@ fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn content_continue(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::CodeFlowChunk);              at_break(tokenizer, info)          } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index bb1615c..4a3a9f6 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => after(tokenizer), -        Some('\n') => tokenizer.attempt(further_start, |ok| { +        Some(b'\n') => tokenizer.attempt(further_start, |ok| {              Box::new(if ok { at_break } else { after })          })(tokenizer),          _ => { @@ -97,7 +97,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  /// ```  fn content(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::CodeFlowChunk);              at_break(tokenizer)          } @@ -133,7 +133,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {          State::Nok      } else {          match tokenizer.current { -            Some('\n') => { +            Some(b'\n') => {                  tokenizer.enter(Token::LineEnding);                  tokenizer.consume();                  tokenizer.exit(Token::LineEnding); @@ -177,7 +177,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {  /// ```  fn further_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => further_start(tokenizer), +        Some(b'\n') => further_start(tokenizer),          _ => State::Nok,      }  } diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 150f63b..b36a208 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -98,9 +98,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {      let len = tokenizer.events.len();      match tokenizer.current { -        Some('`') +        Some(b'`')              if tokenizer.parse_state.constructs.code_text -                && (tokenizer.previous != Some('`') +                && (tokenizer.previous != Some(b'`')                      || (len > 0                          && tokenizer.events[len - 1].token_type == Token::CharacterEscape)) =>          { @@ -119,7 +119,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///     ^  /// ```  fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State { -    if let Some('`') = tokenizer.current { +    if let Some(b'`') = tokenizer.current {          tokenizer.consume();          State::Fn(Box::new(move |t| sequence_open(t, size + 1)))      } else { @@ -137,13 +137,13 @@ fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {  fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {      match tokenizer.current {          None => State::Nok, -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding);              State::Fn(Box::new(move |t| between(t, size_open)))          } -        Some('`') => { +        Some(b'`') => {              tokenizer.enter(Token::CodeTextSequence);              sequence_close(tokenizer, size_open, 0)          } @@ -162,7 +162,7 @@ fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {  /// ```  fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {      match tokenizer.current { -        None | Some('\n' | '`') => { +        None | Some(b'\n' | b'`') => {              tokenizer.exit(Token::CodeTextData);              between(tokenizer, size_open)          } @@ -181,7 +181,7 @@ fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {  /// ```  fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> State {      match tokenizer.current { -        Some('`') => { +        Some(b'`') => {              tokenizer.consume();              State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1)))          } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index f2b5ae0..14755c9 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -137,7 +137,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('[') => tokenizer.go( +        Some(b'[') => tokenizer.go(              |t| {                  label(                      t, @@ -162,7 +162,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// ```  fn label_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(':') => { +        Some(b':') => {              tokenizer.enter(Token::DefinitionMarker);              tokenizer.consume();              tokenizer.exit(Token::DefinitionMarker); @@ -231,7 +231,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn after_whitespace(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::Definition);              // Youโd be interrupting.              tokenizer.interrupt = true; @@ -294,7 +294,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => State::Ok, +        None | Some(b'\n') => State::Ok,          _ => State::Nok,      }  } diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index 0585c4c..cdbc192 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -51,7 +51,7 @@ use crate::tokenizer::{State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\\') if tokenizer.parse_state.constructs.hard_break_escape => { +        Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {              tokenizer.enter(Token::HardBreakEscape);              tokenizer.consume();              State::Fn(Box::new(inside)) @@ -69,7 +69,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.exit(Token::HardBreakEscape);              State::Ok          } diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 7a7cf2e..9a73b77 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -87,7 +87,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///     ^  /// ```  fn before(tokenizer: &mut Tokenizer) -> State { -    if Some('#') == tokenizer.current { +    if Some(b'#') == tokenizer.current {          tokenizer.enter(Token::HeadingAtxSequence);          sequence_open(tokenizer, 0)      } else { @@ -103,11 +103,11 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// ```  fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {      match tokenizer.current { -        None | Some('\n') if rank > 0 => { +        None | Some(b'\n') if rank > 0 => {              tokenizer.exit(Token::HeadingAtxSequence);              at_break(tokenizer)          } -        Some('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => { +        Some(b'#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {              tokenizer.consume();              State::Fn(Box::new(move |tokenizer| {                  sequence_open(tokenizer, rank + 1) @@ -129,15 +129,15 @@ fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {  /// ```  fn at_break(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::HeadingAtx);              tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));              // Feel free to interrupt.              tokenizer.interrupt = false;              State::Ok          } -        Some('\t' | ' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer), -        Some('#') => { +        Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer), +        Some(b'#') => {              tokenizer.enter(Token::HeadingAtxSequence);              further_sequence(tokenizer)          } @@ -157,7 +157,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {  ///           ^  /// ```  fn further_sequence(tokenizer: &mut Tokenizer) -> State { -    if let Some('#') = tokenizer.current { +    if let Some(b'#') = tokenizer.current {          tokenizer.consume();          State::Fn(Box::new(further_sequence))      } else { @@ -175,7 +175,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {  fn data(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // Note: `#` for closing sequence must be preceded by whitespace, otherwise itโs just text. -        None | Some('\t' | '\n' | ' ') => { +        None | Some(b'\t' | b'\n' | b' ') => {              tokenizer.exit(Token::Data);              at_break(tokenizer)          } diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index f9dd3f7..2a4adbf 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -88,23 +88,23 @@ pub enum Kind {  }  impl Kind { -    /// Turn the kind into a [char]. -    fn as_char(&self) -> char { +    /// Turn the kind into a byte ([u8]). +    fn as_byte(&self) -> u8 {          match self { -            Kind::Dash => '-', -            Kind::EqualsTo => '=', +            Kind::Dash => b'-', +            Kind::EqualsTo => b'=',          }      } -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `-` or `=`. -    fn from_char(char: char) -> Kind { -        match char { -            '-' => Kind::Dash, -            '=' => Kind::EqualsTo, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `-` or `=`. +    fn from_byte(byte: u8) -> Kind { +        match byte { +            b'-' => Kind::Dash, +            b'=' => Kind::EqualsTo, +            _ => unreachable!("invalid byte"),          }      }  } @@ -148,9 +148,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(char) if matches!(char, '-' | '=') => { +        Some(byte) if matches!(byte, b'-' | b'=') => {              tokenizer.enter(Token::HeadingSetextUnderline); -            inside(tokenizer, Kind::from_char(char)) +            inside(tokenizer, Kind::from_byte(byte))          }          _ => State::Nok,      } @@ -165,7 +165,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {      match tokenizer.current { -        Some(char) if char == kind.as_char() => { +        Some(byte) if byte == kind.as_byte() => {              tokenizer.consume();              State::Fn(Box::new(move |t| inside(t, kind)))          } @@ -185,7 +185,7 @@ fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {  /// ```  fn after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              // Feel free to interrupt.              tokenizer.interrupt = false;              tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve)); diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index e2b66e5..5860c5d 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -108,7 +108,7 @@ use crate::token::Token;  use crate::tokenizer::{Point, State, Tokenizer};  use crate::util::slice::{Position, Slice}; -const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '[']; +const CDATA_SEARCH: [u8; 6] = [b'C', b'D', b'A', b'T', b'A', b'['];  /// Kind of HTML (flow).  #[derive(Debug, PartialEq)] @@ -151,23 +151,23 @@ enum QuoteKind {  }  impl QuoteKind { -    /// Turn the kind into a [char]. -    fn as_char(&self) -> char { +    /// Turn the kind into a byte ([u8]). +    fn as_byte(&self) -> u8 {          match self { -            QuoteKind::Double => '"', -            QuoteKind::Single => '\'', +            QuoteKind::Double => b'"', +            QuoteKind::Single => b'\'',          }      } -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `"` or `'`. -    fn from_char(char: char) -> QuoteKind { -        match char { -            '"' => QuoteKind::Double, -            '\'' => QuoteKind::Single, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `"` or `'`. +    fn from_byte(byte: u8) -> QuoteKind { +        match byte { +            b'"' => QuoteKind::Double, +            b'\'' => QuoteKind::Single, +            _ => unreachable!("invalid byte"),          }      }  } @@ -179,8 +179,7 @@ struct Info {      kind: Kind,      /// Whether this is a start tag (`<` not followed by `/`).      start_tag: bool, -    /// Used depending on `kind` to either collect all parsed characters, or to -    /// store expected characters. +    /// Used depending on `kind` to collect all parsed bytes.      start: Option<Point>,      /// Collected index, for various reasons.      size: usize, @@ -225,7 +224,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  ///     ^  /// ```  fn before(tokenizer: &mut Tokenizer) -> State { -    if Some('<') == tokenizer.current { +    if Some(b'<') == tokenizer.current {          tokenizer.enter(Token::HtmlFlowData);          tokenizer.consume();          State::Fn(Box::new(open)) @@ -256,16 +255,16 @@ fn open(tokenizer: &mut Tokenizer) -> State {      };      match tokenizer.current { -        Some('!') => { +        Some(b'!') => {              tokenizer.consume();              State::Fn(Box::new(|t| declaration_open(t, info)))          } -        Some('/') => { +        Some(b'/') => {              tokenizer.consume();              info.start = Some(tokenizer.point.clone());              State::Fn(Box::new(|t| tag_close_start(t, info)))          } -        Some('?') => { +        Some(b'?') => {              info.kind = Kind::Instruction;              tokenizer.consume();              // Do not form containers. @@ -274,7 +273,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {              // right now, so we do need to search for `>`, similar to declarations.              State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))          } -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              info.start_tag = true;              info.start = Some(tokenizer.point.clone());              tag_name(tokenizer, info) @@ -295,18 +294,18 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              info.kind = Kind::Comment;              State::Fn(Box::new(|t| comment_open_inside(t, info)))          } -        Some('[') => { +        Some(b'[') => {              tokenizer.consume();              info.kind = Kind::Cdata;              info.size = 0;              State::Fn(Box::new(|t| cdata_open_inside(t, info)))          } -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              info.kind = Kind::Declaration;              // Do not form containers. @@ -325,7 +324,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              // Do not form containers.              tokenizer.concrete = true; @@ -343,7 +342,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some(char) if char == CDATA_SEARCH[info.size] => { +        Some(byte) if byte == CDATA_SEARCH[info.size] => {              info.size += 1;              tokenizer.consume(); @@ -368,7 +367,7 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(|t| tag_name(t, info)))          } @@ -386,11 +385,11 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        None | Some('\t' | '\n' | ' ' | '/' | '>') => { -            let slash = matches!(tokenizer.current, Some('/')); +        None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => { +            let slash = matches!(tokenizer.current, Some(b'/'));              let start = info.start.take().unwrap();              let name = Slice::from_position( -                &tokenizer.parse_state.chars, +                tokenizer.parse_state.bytes,                  &Position {                      start: &start,                      end: &tokenizer.point, @@ -428,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {                  }              }          } -        Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { +        Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(|t| tag_name(t, info)))          } @@ -444,7 +443,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.consume();              // Do not form containers.              tokenizer.concrete = true; @@ -462,7 +461,7 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_closing_tag_after(t, info)))          } @@ -491,15 +490,15 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('/') => { +        Some(b'/') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_end(t, info)))          } -        Some('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => { +        Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_name(t, info)))          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_name_before(t, info)))          } @@ -519,7 +518,7 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> Stat  /// ```  fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => { +        Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_name(t, info)))          } @@ -538,11 +537,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('=') => { +        Some(b'=') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_name_after(t, info)))          } @@ -561,13 +560,13 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State  /// ```  fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        None | Some('<' | '=' | '>' | '`') => State::Nok, -        Some(char) if matches!(char, '"' | '\'') => { -            info.quote = Some(QuoteKind::from_char(char)); +        None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok, +        Some(byte) if matches!(byte, b'"' | b'\'') => { +            info.quote = Some(QuoteKind::from_byte(byte));              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))          } @@ -585,8 +584,8 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) ->  /// ```  fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => State::Nok, -        Some(char) if char == info.quote.as_ref().unwrap().as_char() => { +        None | Some(b'\n') => State::Nok, +        Some(byte) if byte == info.quote.as_ref().unwrap().as_byte() => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_attribute_value_quoted_after(t, info)))          } @@ -605,7 +604,7 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> Sta  /// ```  fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\t' | '\n' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => { +        None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {              complete_attribute_name_after(tokenizer, info)          }          Some(_) => { @@ -624,7 +623,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> S  /// ```  fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('\t' | ' ' | '/' | '>') => complete_attribute_name_before(tokenizer, info), +        Some(b'\t' | b' ' | b'/' | b'>') => complete_attribute_name_before(tokenizer, info),          _ => State::Nok,      }  } @@ -637,7 +636,7 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info)  /// ```  fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_after(t, info)))          } @@ -653,12 +652,12 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              // Do not form containers.              tokenizer.concrete = true;              continuation(tokenizer, info)          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(|t| complete_after(t, info)))          } @@ -674,27 +673,27 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('-') if info.kind == Kind::Comment => { +        Some(b'-') if info.kind == Kind::Comment => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_comment_inside(t, info)))          } -        Some('<') if info.kind == Kind::Raw => { +        Some(b'<') if info.kind == Kind::Raw => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_raw_tag_open(t, info)))          } -        Some('>') if info.kind == Kind::Declaration => { +        Some(b'>') if info.kind == Kind::Declaration => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_close(t, info)))          } -        Some('?') if info.kind == Kind::Instruction => { +        Some(b'?') if info.kind == Kind::Instruction => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))          } -        Some(']') if info.kind == Kind::Cdata => { +        Some(b']') if info.kind == Kind::Cdata => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_character_data_inside(t, info)))          } -        Some('\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => { +        Some(b'\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {              tokenizer.exit(Token::HtmlFlowData);              tokenizer.check(blank_line_before, |ok| {                  if ok { @@ -704,7 +703,7 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {                  }              })(tokenizer)          } -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::HtmlFlowData);              continuation_start(tokenizer, info)          } @@ -741,7 +740,7 @@ fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); @@ -760,7 +759,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => continuation_start(tokenizer, info), +        None | Some(b'\n') => continuation_start(tokenizer, info),          _ => {              tokenizer.enter(Token::HtmlFlowData);              continuation(tokenizer, info) @@ -776,7 +775,7 @@ fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))          } @@ -792,7 +791,7 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('/') => { +        Some(b'/') => {              tokenizer.consume();              info.start = Some(tokenizer.point.clone());              State::Fn(Box::new(|t| continuation_raw_end_tag(t, info))) @@ -809,12 +808,12 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State  /// ```  fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              info.size = 0;              let start = info.start.take().unwrap();              let name = Slice::from_position( -                &tokenizer.parse_state.chars, +                tokenizer.parse_state.bytes,                  &Position {                      start: &start,                      end: &tokenizer.point, @@ -830,7 +829,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State                  continuation(tokenizer, info)              }          } -        Some('A'..='Z' | 'a'..='z') if info.size < HTML_RAW_SIZE_MAX => { +        Some(b'A'..=b'Z' | b'a'..=b'z') if info.size < HTML_RAW_SIZE_MAX => {              tokenizer.consume();              info.size += 1;              State::Fn(Box::new(|t| continuation_raw_end_tag(t, info))) @@ -850,7 +849,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State  /// ```  fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some(']') => { +        Some(b']') => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))          } @@ -874,11 +873,11 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) ->  /// ```  fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_close(t, info)))          } -        Some('-') if info.kind == Kind::Comment => { +        Some(b'-') if info.kind == Kind::Comment => {              tokenizer.consume();              State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))          } @@ -894,7 +893,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> Sta  /// ```  fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::HtmlFlowData);              continuation_after(tokenizer)          } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index b1ad113..f10a476 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -58,7 +58,7 @@ use crate::construct::partial_space_or_tab::space_or_tab;  use crate::token::Token;  use crate::tokenizer::{State, StateFn, Tokenizer}; -const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '[']; +const CDATA_SEARCH: [u8; 6] = [b'C', b'D', b'A', b'T', b'A', b'['];  /// Start of HTML (text)  /// @@ -67,7 +67,7 @@ const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];  ///       ^  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if Some('<') == tokenizer.current && tokenizer.parse_state.constructs.html_text { +    if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {          tokenizer.enter(Token::HtmlText);          tokenizer.enter(Token::HtmlTextData);          tokenizer.consume(); @@ -89,19 +89,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('!') => { +        Some(b'!') => {              tokenizer.consume();              State::Fn(Box::new(declaration_open))          } -        Some('/') => { +        Some(b'/') => {              tokenizer.consume();              State::Fn(Box::new(tag_close_start))          } -        Some('?') => { +        Some(b'?') => {              tokenizer.consume();              State::Fn(Box::new(instruction))          } -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_open))          } @@ -121,15 +121,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn declaration_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(comment_open_inside))          } -        Some('[') => { +        Some(b'[') => {              tokenizer.consume();              State::Fn(Box::new(|t| cdata_open_inside(t, 0)))          } -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(declaration))          } @@ -145,7 +145,7 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(comment_start))          } @@ -168,8 +168,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {  /// [html_flow]: crate::construct::html_flow  fn comment_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('>') => State::Nok, -        Some('-') => { +        None | Some(b'>') => State::Nok, +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(comment_start_dash))          } @@ -192,7 +192,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {  /// [html_flow]: crate::construct::html_flow  fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('>') => State::Nok, +        None | Some(b'>') => State::Nok,          _ => comment(tokenizer),      }  } @@ -206,8 +206,8 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {  fn comment(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok, -        Some('\n') => at_line_ending(tokenizer, Box::new(comment)), -        Some('-') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(comment)), +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(comment_close))          } @@ -226,7 +226,7 @@ fn comment(tokenizer: &mut Tokenizer) -> State {  /// ```  fn comment_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-') => { +        Some(b'-') => {              tokenizer.consume();              State::Fn(Box::new(end))          } @@ -242,7 +242,7 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {  /// ```  fn cdata_open_inside(tokenizer: &mut Tokenizer, index: usize) -> State {      match tokenizer.current { -        Some(char) if char == CDATA_SEARCH[index] => { +        Some(byte) if byte == CDATA_SEARCH[index] => {              tokenizer.consume();              if index + 1 == CDATA_SEARCH.len() { @@ -264,8 +264,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, index: usize) -> State {  fn cdata(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok, -        Some('\n') => at_line_ending(tokenizer, Box::new(cdata)), -        Some(']') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(cdata)), +        Some(b']') => {              tokenizer.consume();              State::Fn(Box::new(cdata_close))          } @@ -284,7 +284,7 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {  /// ```  fn cdata_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(']') => { +        Some(b']') => {              tokenizer.consume();              State::Fn(Box::new(cdata_end))          } @@ -300,8 +300,8 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {  /// ```  fn cdata_end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => end(tokenizer), -        Some(']') => cdata_close(tokenizer), +        Some(b'>') => end(tokenizer), +        Some(b']') => cdata_close(tokenizer),          _ => cdata(tokenizer),      }  } @@ -314,8 +314,8 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {  /// ```  fn declaration(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('>') => end(tokenizer), -        Some('\n') => at_line_ending(tokenizer, Box::new(declaration)), +        None | Some(b'>') => end(tokenizer), +        Some(b'\n') => at_line_ending(tokenizer, Box::new(declaration)),          _ => {              tokenizer.consume();              State::Fn(Box::new(declaration)) @@ -332,8 +332,8 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {  fn instruction(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          None => State::Nok, -        Some('\n') => at_line_ending(tokenizer, Box::new(instruction)), -        Some('?') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(instruction)), +        Some(b'?') => {              tokenizer.consume();              State::Fn(Box::new(instruction_close))          } @@ -352,7 +352,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {  /// ```  fn instruction_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => end(tokenizer), +        Some(b'>') => end(tokenizer),          _ => instruction(tokenizer),      }  } @@ -365,7 +365,7 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_close_start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('A'..='Z' | 'a'..='z') => { +        Some(b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_close))          } @@ -381,7 +381,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_close(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { +        Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_close))          } @@ -397,8 +397,8 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_close_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => at_line_ending(tokenizer, Box::new(tag_close_between)), -        Some('\t' | ' ') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_close_between)), +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(tag_close_between))          } @@ -414,11 +414,11 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => { +        Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_open))          } -        Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer), +        Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),          _ => State::Nok,      }  } @@ -431,16 +431,16 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_between)), -        Some('\t' | ' ') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_between)), +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_between))          } -        Some('/') => { +        Some(b'/') => {              tokenizer.consume();              State::Fn(Box::new(end))          } -        Some(':' | 'A'..='Z' | '_' | 'a'..='z') => { +        Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_name))          } @@ -456,7 +456,7 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => { +        Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_name))          } @@ -473,12 +473,12 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)), -        Some('\t' | ' ') => { +        Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)), +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_name_after))          } -        Some('=') => { +        Some(b'=') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_value_before))          } @@ -495,15 +495,15 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('<' | '=' | '>' | '`') => State::Nok, -        Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)), -        Some('\t' | ' ') => { +        None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok, +        Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)), +        Some(b'\t' | b' ') => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_value_before))          } -        Some(char) if char == '"' || char == '\'' => { +        Some(byte) if byte == b'"' || byte == b'\'' => {              tokenizer.consume(); -            State::Fn(Box::new(move |t| tag_open_attribute_value_quoted(t, char))) +            State::Fn(Box::new(move |t| tag_open_attribute_value_quoted(t, byte)))          }          Some(_) => {              tokenizer.consume(); @@ -518,14 +518,14 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {  /// > | a <b c="d"> e  ///             ^  /// ``` -fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> State { +fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: u8) -> State {      match tokenizer.current {          None => State::Nok, -        Some('\n') => at_line_ending( +        Some(b'\n') => at_line_ending(              tokenizer,              Box::new(move |t| tag_open_attribute_value_quoted(t, marker)),          ), -        Some(char) if char == marker => { +        Some(byte) if byte == marker => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_value_quoted_after))          } @@ -546,8 +546,8 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> S  /// ```  fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('"' | '\'' | '<' | '=' | '`') => State::Nok, -        Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer), +        None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok, +        Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),          Some(_) => {              tokenizer.consume();              State::Fn(Box::new(tag_open_attribute_value_unquoted)) @@ -564,7 +564,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {  /// ```  fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\t' | '\n' | ' ' | '>' | '/') => tag_open_between(tokenizer), +        Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer),          _ => State::Nok,      }  } @@ -577,7 +577,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.consume();              tokenizer.exit(Token::HtmlTextData);              tokenizer.exit(Token::HtmlText); @@ -599,7 +599,7 @@ fn end(tokenizer: &mut Tokenizer) -> State {  /// ```  fn at_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.exit(Token::HtmlTextData);              tokenizer.enter(Token::LineEnding);              tokenizer.consume(); diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 5ea788f..6399f81 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -182,7 +182,7 @@ struct Info {  /// > | [a] b  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State { -    if Some(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { +    if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {          let mut label_start_index = None;          let mut index = tokenizer.label_start_stack.len(); @@ -217,7 +217,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {                      // To do: virtual spaces not needed, create a `to_str`?                      id: normalize_identifier(                          &Slice::from_position( -                            &tokenizer.parse_state.chars, +                            tokenizer.parse_state.bytes,                              &Position {                                  start: &tokenizer.events[label_start.start.1].point,                                  end: &tokenizer.events[label_end_start - 1].point, @@ -258,7 +258,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current {          // Resource (`[asd](fgh)`)? -        Some('(') => tokenizer.attempt(resource, move |is_ok| { +        Some(b'(') => tokenizer.attempt(resource, move |is_ok| {              Box::new(move |t| {                  // Also fine if `defined`, as then itโs a valid shortcut.                  if is_ok || defined { @@ -269,7 +269,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {              })          })(tokenizer),          // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? -        Some('[') => tokenizer.attempt(full_reference, move |is_ok| { +        Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| {              Box::new(move |t| {                  if is_ok {                      ok(t, info) @@ -382,7 +382,7 @@ fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State {  /// ```  fn resource(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('(') => { +        Some(b'(') => {              tokenizer.enter(Token::Resource);              tokenizer.enter(Token::ResourceMarker);              tokenizer.consume(); @@ -411,7 +411,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn resource_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(')') => resource_end(tokenizer), +        Some(b')') => resource_end(tokenizer),          _ => tokenizer.go(              |t| {                  destination( @@ -451,7 +451,7 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn resource_between(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('"' | '\'' | '(') => tokenizer.go( +        Some(b'"' | b'\'' | b'(') => tokenizer.go(              |t| {                  title(                      t, @@ -486,7 +486,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn resource_end(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(')') => { +        Some(b')') => {              tokenizer.enter(Token::ResourceMarker);              tokenizer.consume();              tokenizer.exit(Token::ResourceMarker); @@ -505,7 +505,7 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {  /// ```  fn full_reference(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('[') => tokenizer.go( +        Some(b'[') => tokenizer.go(              |t| {                  label(                      t, @@ -537,7 +537,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {      // To do: virtual spaces not needed, create a `to_str`?      let id = Slice::from_position( -        &tokenizer.parse_state.chars, +        tokenizer.parse_state.bytes,          &Position::from_exit_event(&tokenizer.events, end),      )      .serialize(); @@ -563,7 +563,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('[') => { +        Some(b'[') => {              tokenizer.enter(Token::Reference);              tokenizer.enter(Token::ReferenceMarker);              tokenizer.consume(); @@ -584,7 +584,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {  /// ```  fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(']') => { +        Some(b']') => {              tokenizer.enter(Token::ReferenceMarker);              tokenizer.consume();              tokenizer.exit(Token::ReferenceMarker); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 078026d..d30b8dd 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -40,7 +40,7 @@ use crate::tokenizer::{LabelStart, State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('!') if tokenizer.parse_state.constructs.label_start_image => { +        Some(b'!') if tokenizer.parse_state.constructs.label_start_image => {              tokenizer.enter(Token::LabelImage);              tokenizer.enter(Token::LabelImageMarker);              tokenizer.consume(); @@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  pub fn open(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('[') => { +        Some(b'[') => {              tokenizer.enter(Token::LabelMarker);              tokenizer.consume();              tokenizer.exit(Token::LabelMarker); diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index d7ae1d6..c47941c 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -39,7 +39,7 @@ use crate::tokenizer::{LabelStart, State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('[') if tokenizer.parse_state.constructs.label_start_link => { +        Some(b'[') if tokenizer.parse_state.constructs.label_start_link => {              let start = tokenizer.events.len();              tokenizer.enter(Token::LabelLink);              tokenizer.enter(Token::LabelMarker); diff --git a/src/construct/list.rs b/src/construct/list.rs index 355eeee..9b59130 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -102,19 +102,19 @@ enum Kind {  }  impl Kind { -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`. -    fn from_char(char: char) -> Kind { -        match char { -            '.' => Kind::Dot, -            ')' => Kind::Paren, -            '*' => Kind::Asterisk, -            '+' => Kind::Plus, -            '-' => Kind::Dash, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `.`, `)`, `*`, `+`, or `-`. +    fn from_byte(byte: u8) -> Kind { +        match byte { +            b'.' => Kind::Dot, +            b')' => Kind::Paren, +            b'*' => Kind::Asterisk, +            b'+' => Kind::Plus, +            b'-' => Kind::Dash, +            _ => unreachable!("invalid byte"),          }      }  } @@ -149,11 +149,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          // Unordered. -        Some('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| { +        Some(b'*' | b'+' | b'-') => tokenizer.check(thematic_break, |ok| {              Box::new(if ok { nok } else { before_unordered })          })(tokenizer),          // Ordered. -        Some(char) if char.is_ascii_digit() && (!tokenizer.interrupt || char == '1') => { +        Some(byte) if byte.is_ascii_digit() && (!tokenizer.interrupt || byte == b'1') => {              tokenizer.enter(Token::ListItemPrefix);              tokenizer.enter(Token::ListItemValue);              inside(tokenizer, 0) @@ -183,11 +183,11 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {      match tokenizer.current { -        Some(char) if char.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => { +        Some(byte) if byte.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {              tokenizer.consume();              State::Fn(Box::new(move |t| inside(t, size + 1)))          } -        Some('.' | ')') if !tokenizer.interrupt || size < 2 => { +        Some(b'.' | b')') if !tokenizer.interrupt || size < 2 => {              tokenizer.exit(Token::ListItemValue);              marker(tokenizer)          } @@ -262,7 +262,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {  ///      ^  /// ```  fn whitespace_after(tokenizer: &mut Tokenizer) -> State { -    if matches!(tokenizer.current, Some('\t' | ' ')) { +    if matches!(tokenizer.current, Some(b'\t' | b' ')) {          State::Nok      } else {          State::Ok @@ -277,7 +277,7 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {  /// ```  fn prefix_other(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.enter(Token::SpaceOrTab);              tokenizer.consume();              tokenizer.exit(Token::SpaceOrTab); @@ -303,7 +303,7 @@ fn after(tokenizer: &mut Tokenizer, blank: bool) -> State {              &[Token::ListItem],          );          let mut prefix = Slice::from_position( -            &tokenizer.parse_state.chars, +            tokenizer.parse_state.bytes,              &Position {                  start: &tokenizer.events[start].point,                  end: &tokenizer.point, @@ -400,13 +400,10 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {              if event.event_type == EventType::Enter {                  let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;                  let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]); -                let kind = Kind::from_char( -                    Slice::from_point( -                        &tokenizer.parse_state.chars, -                        &tokenizer.events[marker].point, -                    ) -                    .head() -                    .unwrap(), +                let kind = Kind::from_byte( +                    Slice::from_point(tokenizer.parse_state.bytes, &tokenizer.events[marker].point) +                        .head() +                        .unwrap(),                  );                  let current = (kind, balance, index, end); diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 569c609..7b50957 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -84,6 +84,7 @@ pub mod label_start_image;  pub mod label_start_link;  pub mod list;  pub mod paragraph; +pub mod partial_bom;  pub mod partial_data;  pub mod partial_destination;  pub mod partial_label; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 5d230d3..146dc40 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -44,7 +44,7 @@ use crate::util::skip::opt as skip_opt;  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              unreachable!("unexpected eol/eof")          }          _ => { @@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::Data);              tokenizer.exit(Token::Paragraph);              tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve)); diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs new file mode 100644 index 0000000..be8d6c8 --- /dev/null +++ b/src/construct/partial_bom.rs @@ -0,0 +1,54 @@ +//! To do. + +use crate::token::Token; +use crate::tokenizer::{State, Tokenizer}; + +/// Before a BOM. +/// +/// ```text +/// > | 0xEF 0xBB 0xBF +///     ^^^^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        Some(0xEF) => { +            tokenizer.enter(Token::ByteOrderMark); +            tokenizer.consume(); +            State::Fn(Box::new(cont)) +        } +        _ => State::Nok, +    } +} + +/// Second byte in BOM. +/// +/// ```text +/// > | 0xEF 0xBB 0xBF +///          ^^^^ +/// ``` +fn cont(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        Some(0xBB) => { +            tokenizer.consume(); +            State::Fn(Box::new(end)) +        } +        _ => State::Nok, +    } +} + +/// Last byte in BOM. +/// +/// ```text +/// > | 0xEF 0xBB 0xBF +///               ^^^^ +/// ``` +fn end(tokenizer: &mut Tokenizer) -> State { +    match tokenizer.current { +        Some(0xBF) => { +            tokenizer.consume(); +            tokenizer.exit(Token::ByteOrderMark); +            State::Ok +        } +        _ => State::Nok, +    } +} diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index 0b66b09..335d7ab 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -15,9 +15,9 @@ use crate::tokenizer::{EventType, State, Tokenizer};  /// > | abc  ///     ^  /// ``` -pub fn start(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State { +pub fn start(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {      match tokenizer.current { -        Some(char) if stop.contains(&char) => { +        Some(byte) if stop.contains(&byte) => {              tokenizer.enter(Token::Data);              tokenizer.consume();              State::Fn(Box::new(move |t| data(t, stop))) @@ -32,16 +32,16 @@ pub fn start(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {  /// > | abc  ///     ^  /// ``` -fn at_break(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State { +fn at_break(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {      match tokenizer.current {          None => State::Ok, -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding);              State::Fn(Box::new(move |t| at_break(t, stop)))          } -        Some(char) if stop.contains(&char) => { +        Some(byte) if stop.contains(&byte) => {              tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));              State::Ok          } @@ -58,10 +58,10 @@ fn at_break(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {  /// > | abc  ///     ^^^  /// ``` -fn data(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State { +fn data(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {      let done = match tokenizer.current { -        None | Some('\n') => true, -        Some(char) if stop.contains(&char) => true, +        None | Some(b'\n') => true, +        Some(byte) if stop.contains(&byte) => true,          _ => false,      }; diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs index 6447228..0a3721c 100644 --- a/src/construct/partial_destination.rs +++ b/src/construct/partial_destination.rs @@ -117,7 +117,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {      };      match tokenizer.current { -        Some('<') => { +        Some(b'<') => {              tokenizer.enter(info.options.destination.clone());              tokenizer.enter(info.options.literal.clone());              tokenizer.enter(info.options.marker.clone()); @@ -125,8 +125,8 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {              tokenizer.exit(info.options.marker.clone());              State::Fn(Box::new(|t| enclosed_before(t, info)))          } -        None | Some(' ' | ')') => State::Nok, -        Some(char) if char.is_ascii_control() => State::Nok, +        None | Some(b' ' | b')') => State::Nok, +        Some(byte) if byte != b'\0' && byte.is_ascii_control() => State::Nok,          Some(_) => {              tokenizer.enter(info.options.destination.clone());              tokenizer.enter(info.options.raw.clone()); @@ -144,7 +144,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {  ///      ^  /// ```  fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State { -    if let Some('>') = tokenizer.current { +    if let Some(b'>') = tokenizer.current {          tokenizer.enter(info.options.marker.clone());          tokenizer.consume();          tokenizer.exit(info.options.marker.clone()); @@ -166,13 +166,13 @@ fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('>') => { +        Some(b'>') => {              tokenizer.exit(Token::Data);              tokenizer.exit(info.options.string.clone());              enclosed_before(tokenizer, info)          } -        None | Some('\n' | '<') => State::Nok, -        Some('\\') => { +        None | Some(b'\n' | b'<') => State::Nok, +        Some(b'\\') => {              tokenizer.consume();              State::Fn(Box::new(|t| enclosed_escape(t, info)))          } @@ -191,7 +191,7 @@ fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('<' | '>' | '\\') => { +        Some(b'<' | b'>' | b'\\') => {              tokenizer.consume();              State::Fn(Box::new(|t| enclosed(t, info)))          } @@ -207,7 +207,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('(') => { +        Some(b'(') => {              if info.balance >= info.options.limit {                  State::Nok              } else { @@ -216,7 +216,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {                  State::Fn(Box::new(move |t| raw(t, info)))              }          } -        Some(')') => { +        Some(b')') => {              if info.balance == 0 {                  tokenizer.exit(Token::Data);                  tokenizer.exit(info.options.string.clone()); @@ -229,7 +229,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {                  State::Fn(Box::new(move |t| raw(t, info)))              }          } -        None | Some('\t' | '\n' | ' ') => { +        None | Some(b'\t' | b'\n' | b' ') => {              if info.balance > 0 {                  State::Nok              } else { @@ -240,8 +240,8 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {                  State::Ok              }          } -        Some(char) if char.is_ascii_control() => State::Nok, -        Some('\\') => { +        Some(byte) if byte != b'\0' && byte.is_ascii_control() => State::Nok, +        Some(b'\\') => {              tokenizer.consume();              State::Fn(Box::new(move |t| raw_escape(t, info)))          } @@ -260,7 +260,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn raw_escape(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some('(' | ')' | '\\') => { +        Some(b'(' | b')' | b'\\') => {              tokenizer.consume();              State::Fn(Box::new(move |t| raw(t, info)))          } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index ee31533..7e40a2d 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -82,9 +82,9 @@ pub struct Options {  struct Info {      /// Whether weโve seen our first `ChunkString`.      connect: bool, -    /// Whether there are non-blank characters in the label. +    /// Whether there are non-blank bytes in the label.      data: bool, -    /// Number of characters in the label. +    /// Number of bytes in the label.      size: usize,      /// Configuration.      options: Options, @@ -98,7 +98,7 @@ struct Info {  /// ```  pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {      match tokenizer.current { -        Some('[') => { +        Some(b'[') => {              let info = Info {                  connect: false,                  data: false, @@ -124,10 +124,10 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {  /// ```  fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        None | Some('[') => State::Nok, -        Some(']') if !info.data => State::Nok, +        None | Some(b'[') => State::Nok, +        Some(b']') if !info.data => State::Nok,          _ if info.size > LINK_REFERENCE_SIZE_MAX => State::Nok, -        Some(']') => { +        Some(b']') => {              tokenizer.exit(info.options.string.clone());              tokenizer.enter(info.options.marker.clone());              tokenizer.consume(); @@ -135,7 +135,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {              tokenizer.exit(info.options.label);              State::Ok          } -        Some('\n') => tokenizer.go( +        Some(b'\n') => tokenizer.go(              space_or_tab_eol_with_options(EolOptions {                  content_type: Some(ContentType::String),                  connect: info.connect, @@ -168,7 +168,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        None | Some('\n' | '[' | ']') => { +        None | Some(b'\n' | b'[' | b']') => {              tokenizer.exit(Token::Data);              at_break(tokenizer, info)          } @@ -176,12 +176,12 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {              tokenizer.exit(Token::Data);              at_break(tokenizer, info)          } -        Some('\t' | ' ') => { +        Some(b'\t' | b' ') => {              tokenizer.consume();              info.size += 1;              State::Fn(Box::new(|t| label(t, info)))          } -        Some('\\') => { +        Some(b'\\') => {              tokenizer.consume();              info.size += 1;              if !info.data { @@ -208,7 +208,7 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn escape(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('[' | '\\' | ']') => { +        Some(b'[' | b'\\' | b']') => {              tokenizer.consume();              info.size += 1;              State::Fn(Box::new(|t| label(t, info))) diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs index 068e30f..6005a6c 100644 --- a/src/construct/partial_non_lazy_continuation.rs +++ b/src/construct/partial_non_lazy_continuation.rs @@ -22,7 +22,7 @@ use crate::tokenizer::{State, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume();              tokenizer.exit(Token::LineEnding); diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 6070ffe..f31cbc6 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -11,9 +11,9 @@ use crate::tokenizer::{ContentType, State, StateFn, Tokenizer};  /// Options to parse `space_or_tab`.  #[derive(Debug)]  pub struct Options { -    /// Minimum allowed characters (inclusive). +    /// Minimum allowed bytes (inclusive).      pub min: usize, -    /// Maximum allowed characters (inclusive). +    /// Maximum allowed bytes (inclusive).      pub max: usize,      /// Token type to use for whitespace events.      pub kind: Token, @@ -134,7 +134,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {  /// ```  fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('\t' | ' ') if info.options.max > 0 => { +        Some(b'\t' | b' ') if info.options.max > 0 => {              tokenizer                  .enter_with_content(info.options.kind.clone(), info.options.content_type.clone()); @@ -165,7 +165,7 @@ fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some('\t' | ' ') if info.size < info.options.max => { +        Some(b'\t' | b' ') if info.size < info.options.max => {              tokenizer.consume();              info.size += 1;              State::Fn(Box::new(|t| inside(t, info))) @@ -190,7 +190,7 @@ fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn after_space_or_tab(tokenizer: &mut Tokenizer, mut info: EolInfo) -> State {      match tokenizer.current { -        Some('\n') => { +        Some(b'\n') => {              tokenizer.enter_with_content(Token::LineEnding, info.options.content_type.clone());              if info.connect { @@ -239,7 +239,7 @@ fn after_eol(tokenizer: &mut Tokenizer, info: EolInfo) -> State {  /// ```  fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {      // Blank line not allowed. -    if matches!(tokenizer.current, None | Some('\n')) { +    if matches!(tokenizer.current, None | Some(b'\n')) {          State::Nok      } else {          State::Ok diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 15fc25e..80861af 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -78,29 +78,29 @@ enum Kind {  }  impl Kind { -    /// Turn the kind into a [char]. +    /// Turn the kind into a byte ([u8]).      ///      /// > ๐ **Note**: a closing paren is used for `Kind::Paren`. -    fn as_char(&self) -> char { +    fn as_byte(&self) -> u8 {          match self { -            Kind::Paren => ')', -            Kind::Double => '"', -            Kind::Single => '\'', +            Kind::Paren => b')', +            Kind::Double => b'"', +            Kind::Single => b'\'',          }      } -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// > ๐ **Note**: an opening paren must be used for `Kind::Paren`.      ///      /// ## Panics      /// -    /// Panics if `char` is not `(`, `"`, or `'`. -    fn from_char(char: char) -> Kind { -        match char { -            '(' => Kind::Paren, -            '"' => Kind::Double, -            '\'' => Kind::Single, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `(`, `"`, or `'`. +    fn from_byte(byte: u8) -> Kind { +        match byte { +            b'(' => Kind::Paren, +            b'"' => Kind::Double, +            b'\'' => Kind::Single, +            _ => unreachable!("invalid byte"),          }      }  } @@ -124,10 +124,10 @@ struct Info {  /// ```  pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {      match tokenizer.current { -        Some(char) if matches!(char, '"' | '\'' | '(') => { +        Some(byte) if matches!(byte, b'"' | b'\'' | b'(') => {              let info = Info {                  connect: false, -                kind: Kind::from_char(char), +                kind: Kind::from_byte(byte),                  options,              };              tokenizer.enter(info.options.title.clone()); @@ -150,7 +150,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {  /// ```  fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.enter(info.options.marker.clone());              tokenizer.consume();              tokenizer.exit(info.options.marker.clone()); @@ -172,12 +172,12 @@ fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.exit(info.options.string.clone());              begin(tokenizer, info)          }          None => State::Nok, -        Some('\n') => tokenizer.go( +        Some(b'\n') => tokenizer.go(              space_or_tab_eol_with_options(EolOptions {                  content_type: Some(ContentType::String),                  connect: info.connect, @@ -210,15 +210,15 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {  /// ```  fn title(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.exit(Token::Data);              at_break(tokenizer, info)          } -        None | Some('\n') => { +        None | Some(b'\n') => {              tokenizer.exit(Token::Data);              at_break(tokenizer, info)          } -        Some('\\') => { +        Some(b'\\') => {              tokenizer.consume();              State::Fn(Box::new(|t| escape(t, info)))          } @@ -237,7 +237,7 @@ fn title(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn escape(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.consume();              State::Fn(Box::new(|t| title(t, info)))          } diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 152824b..13815cb 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -86,25 +86,25 @@ fn trim_data(      hard_break: bool,  ) {      let mut slice = Slice::from_position( -        &tokenizer.parse_state.chars, +        tokenizer.parse_state.bytes,          &Position::from_exit_event(&tokenizer.events, exit_index),      );      if trim_end { -        let mut index = slice.chars.len(); +        let mut index = slice.bytes.len();          let vs = slice.after;          let mut spaces_only = vs == 0;          while index > 0 { -            match slice.chars[index - 1] { -                ' ' => {} -                '\t' => spaces_only = false, +            match slice.bytes[index - 1] { +                b' ' => {} +                b'\t' => spaces_only = false,                  _ => break,              }              index -= 1;          } -        let diff = slice.chars.len() - index; +        let diff = slice.bytes.len() - index;          let token_type = if spaces_only              && hard_break              && exit_index + 1 < tokenizer.events.len() @@ -150,16 +150,16 @@ fn trim_data(              );              tokenizer.events[exit_index].point = enter_point; -            slice.chars = &slice.chars[..index]; +            slice.bytes = &slice.bytes[..index];          }      }      if trim_start {          let mut index = 0;          let vs = slice.before; -        while index < slice.chars.len() { -            match slice.chars[index] { -                ' ' | '\t' => {} +        while index < slice.bytes.len() { +            match slice.bytes[index] { +                b' ' | b'\t' => {}                  _ => break,              } @@ -168,7 +168,7 @@ fn trim_data(          // The whole data is whitespace.          // We can be very fast: we only change the token types. -        if index == slice.chars.len() { +        if index == slice.bytes.len() {              tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab;              tokenizer.events[exit_index].token_type = Token::SpaceOrTab;              return; diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index bed454b..4fc4dc4 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -83,25 +83,25 @@ enum Kind {  }  impl Kind { -    /// Turn the kind into a [char]. -    fn as_char(&self) -> char { +    /// Turn the kind into a byte ([u8]). +    fn as_byte(&self) -> u8 {          match self { -            Kind::Asterisk => '*', -            Kind::Dash => '-', -            Kind::Underscore => '_', +            Kind::Asterisk => b'*', +            Kind::Dash => b'-', +            Kind::Underscore => b'_',          }      } -    /// Turn a [char] into a kind. +    /// Turn a byte ([u8]) into a kind.      ///      /// ## Panics      /// -    /// Panics if `char` is not `*`, `-`, or `_`. -    fn from_char(char: char) -> Kind { -        match char { -            '*' => Kind::Asterisk, -            '-' => Kind::Dash, -            '_' => Kind::Underscore, -            _ => unreachable!("invalid char"), +    /// Panics if `byte` is not `*`, `-`, or `_`. +    fn from_byte(byte: u8) -> Kind { +        match byte { +            b'*' => Kind::Asterisk, +            b'-' => Kind::Dash, +            b'_' => Kind::Underscore, +            _ => unreachable!("invalid byte"),          }      }  } @@ -144,10 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// ```  fn before(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(char) if matches!(char, '*' | '-' | '_') => at_break( +        Some(byte) if matches!(byte, b'*' | b'-' | b'_') => at_break(              tokenizer,              Info { -                kind: Kind::from_char(char), +                kind: Kind::from_byte(byte),                  size: 0,              },          ), @@ -163,13 +163,13 @@ fn before(tokenizer: &mut Tokenizer) -> State {  /// ```  fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {      match tokenizer.current { -        None | Some('\n' | '\r') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => { +        None | Some(b'\n' | b'\r') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {              tokenizer.exit(Token::ThematicBreak);              // Feel free to interrupt.              tokenizer.interrupt = false;              State::Ok          } -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.enter(Token::ThematicBreakSequence);              sequence(tokenizer, info)          } @@ -185,7 +185,7 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {  /// ```  fn sequence(tokenizer: &mut Tokenizer, mut info: Info) -> State {      match tokenizer.current { -        Some(char) if char == info.kind.as_char() => { +        Some(byte) if byte == info.kind.as_byte() => {              tokenizer.consume();              info.size += 1;              State::Fn(Box::new(|t| sequence(t, info))) | 
