diff options
Diffstat (limited to '')
-rw-r--r-- | src/content/document.rs | 39 | ||||
-rw-r--r-- | src/content/flow.rs | 14 | ||||
-rw-r--r-- | src/content/string.rs | 6 | ||||
-rw-r--r-- | src/content/text.rs | 24 |
4 files changed, 52 insertions, 31 deletions
diff --git a/src/content/document.rs b/src/content/document.rs index 32b32ba..2924f6c 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -17,12 +17,12 @@ use crate::parser::ParseState; use crate::subtokenize::subtokenize; use crate::token::Token; use crate::tokenizer::{ - Code, Container, ContainerState, Event, EventType, Point, State, StateFn, Tokenizer, + Container, ContainerState, Event, EventType, Point, State, StateFn, Tokenizer, }; use crate::util::{ normalize_identifier::normalize_identifier, skip, - span::{from_exit_event, serialize}, + slice::{Position, Slice}, }; /// Phases where we can exit containers. @@ -78,7 +78,7 @@ struct DocumentInfo { pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { let mut tokenizer = Tokenizer::new(point, parse_state); - let state = tokenizer.push(0, parse_state.codes.len(), Box::new(start)); + let state = tokenizer.push(0, parse_state.chars.len(), Box::new(before)); tokenizer.flush(state, true); let mut index = 0; @@ -88,13 +88,14 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { let event = &tokenizer.events[index]; if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString { + // To do: when we operate on u8, we can use a `to_str` here as we + // don‘t need virtual spaces. let id = normalize_identifier( - serialize( - &parse_state.codes, - &from_exit_event(&tokenizer.events, index), - false, + &Slice::from_position( + &tokenizer.parse_state.chars, + &Position::from_exit_event(&tokenizer.events, index), ) - .as_str(), + .serialize(), ); if !definitions.contains(&id) { @@ -114,6 +115,26 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> { events } +/// At the beginning. +/// +/// Perhaps a BOM? +/// +/// ```markdown +/// > | a +/// ^ +/// ``` +fn before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some('\u{FEFF}') => { + tokenizer.enter(Token::ByteOrderMark); + tokenizer.consume(); + tokenizer.exit(Token::ByteOrderMark); + State::Fn(Box::new(start)) + } + _ => start(tokenizer), + } +} + /// Before document. // /// ```markdown @@ -337,7 +358,7 @@ fn containers_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State // Parse flow, pausing after eols. tokenizer.go_until( state, - |code| matches!(code, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')), + |code| matches!(code, Some('\n')), move |state| Box::new(move |t| flow_end(t, info, state)), )(tokenizer) } diff --git a/src/content/flow.rs b/src/content/flow.rs index ea09cd9..09c4e2c 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -27,7 +27,7 @@ use crate::construct::{ thematic_break::start as thematic_break, }; use crate::token::Token; -use crate::tokenizer::{Code, State, Tokenizer}; +use crate::tokenizer::{State, Tokenizer}; /// Before flow. /// @@ -41,7 +41,7 @@ use crate::tokenizer::{Code, State, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, + None => State::Ok, _ => tokenizer.attempt(blank_line, |ok| { Box::new(if ok { blank_line_after } else { initial_before }) })(tokenizer), @@ -62,7 +62,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` fn initial_before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, + None => State::Ok, _ => tokenizer.attempt_n( vec![ Box::new(code_indented), @@ -87,8 +87,8 @@ fn initial_before(tokenizer: &mut Tokenizer) -> State { /// ``` fn blank_line_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + None => State::Ok, + Some('\n') => { tokenizer.enter(Token::BlankLineEnding); tokenizer.consume(); tokenizer.exit(Token::BlankLineEnding); @@ -111,8 +111,8 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State { /// ``` fn after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + None => State::Ok, + Some('\n') => { tokenizer.enter(Token::LineEnding); tokenizer.consume(); tokenizer.exit(Token::LineEnding); diff --git a/src/content/string.rs b/src/content/string.rs index c6c0094..8bc2b91 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -16,9 +16,9 @@ use crate::construct::{ character_escape::start as character_escape, character_reference::start as character_reference, partial_data::start as data, partial_whitespace::create_resolve_whitespace, }; -use crate::tokenizer::{Code, State, Tokenizer}; +use crate::tokenizer::{State, Tokenizer}; -const MARKERS: [Code; 2] = [Code::Char('&'), Code::Char('\\')]; +const MARKERS: [char; 2] = ['&', '\\']; /// Start of string. pub fn start(tokenizer: &mut Tokenizer) -> State { @@ -32,7 +32,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// Before string. fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, + None => State::Ok, _ => tokenizer.attempt_n( vec![Box::new(character_reference), Box::new(character_escape)], |ok| Box::new(if ok { before } else { before_data }), diff --git a/src/content/text.rs b/src/content/text.rs index 4248053..ebdf888 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -28,18 +28,18 @@ use crate::construct::{ label_start_image::start as label_start_image, label_start_link::start as label_start_link, partial_data::start as data, partial_whitespace::create_resolve_whitespace, }; -use crate::tokenizer::{Code, State, Tokenizer}; +use crate::tokenizer::{State, Tokenizer}; -const MARKERS: [Code; 9] = [ - Code::Char('!'), // `label_start_image` - Code::Char('&'), // `character_reference` - Code::Char('*'), // `attention` - Code::Char('<'), // `autolink`, `html_text` - Code::Char('['), // `label_start_link` - Code::Char('\\'), // `character_escape`, `hard_break_escape` - Code::Char(']'), // `label_end` - Code::Char('_'), // `attention` - Code::Char('`'), // `code_text` +const MARKERS: [char; 9] = [ + '!', // `label_start_image` + '&', // `character_reference` + '*', // `attention` + '<', // `autolink`, `html_text` + '[', // `label_start_link` + '\\', // `character_escape`, `hard_break_escape` + ']', // `label_end` + '_', // `attention` + '`', // `code_text` ]; /// Start of text. @@ -57,7 +57,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// Before text. pub fn before(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::None => State::Ok, + None => State::Ok, _ => tokenizer.attempt_n( vec![ Box::new(attention), |