diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-28 16:48:00 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-28 16:48:00 +0200 |
commit | f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456 (patch) | |
tree | c1ac3f22473bd79566d835b2474d2ae9e00d6c55 /src/construct/label_end.rs | |
parent | d729b07712ca9cc91e68af1776dac9d7008a90cb (diff) | |
download | markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.gz markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.bz2 markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.zip |
Refactor to work on `char`s
Previously, a custom char implementation was used.
This was easier to work with, as sometimes “virtual” characters are injected,
or characters are ignored.
This replaces that with working on actual `char`s.
In the hope of in the future working on `u8`s, even.
This simplifies the state machine somewhat, as only `\n` is fed, regardless of
whether it was a CRLF, CR, or LF.
It also feeds `' '` instead of virtual spaces.
The BOM, if present, is now available as a `ByteOrderMark` event.
Diffstat (limited to 'src/construct/label_end.rs')
-rw-r--r-- | src/construct/label_end.rs | 92 |
1 files changed, 44 insertions, 48 deletions
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 6f0a707..5ea788f 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -1,4 +1,4 @@ -//! Label end is a construct that occurs in the [text][] content type. +//! Label end is a construct that occurs in the [text][] conten&t type. //! //! It forms with the following BNF: //! @@ -154,10 +154,11 @@ use crate::construct::{ partial_title::{start as title, Options as TitleOptions}, }; use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, Media, State, Tokenizer}; +use crate::tokenizer::{Event, EventType, Media, State, Tokenizer}; use crate::util::{ normalize_identifier::normalize_identifier, - span::{serialize, Span}, + skip, + slice::{Position, Slice}, }; /// State needed to parse label end. @@ -181,7 +182,7 @@ struct Info { /// > | [a] b /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - if Code::Char(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { + if Some(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end { let mut label_start_index = None; let mut index = tokenizer.label_start_stack.len(); @@ -207,19 +208,23 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { } let label_end_start = tokenizer.events.len(); + let info = Info { label_start_index, media: Media { start: label_start.start, end: (label_end_start, label_end_start + 3), - id: normalize_identifier(&serialize( - &tokenizer.parse_state.codes, - &Span { - start_index: tokenizer.events[label_start.start.1].point.index, - end_index: tokenizer.events[label_end_start - 1].point.index, - }, - false, - )), + // To do: virtual spaces not needed, create a `to_str`? + id: normalize_identifier( + &Slice::from_position( + &tokenizer.parse_state.chars, + &Position { + start: &tokenizer.events[label_start.start.1].point, + end: &tokenizer.events[label_end_start - 1].point, + }, + ) + .serialize(), + ), }, }; @@ -253,7 +258,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State { match tokenizer.current { // Resource (`[asd](fgh)`)? - Code::Char('(') => tokenizer.attempt(resource, move |is_ok| { + Some('(') => tokenizer.attempt(resource, move |is_ok| { Box::new(move |t| { // Also fine if `defined`, as then it’s a valid shortcut. if is_ok || defined { @@ -264,7 +269,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State { }) })(tokenizer), // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference? - Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| { + Some('[') => tokenizer.attempt(full_reference, move |is_ok| { Box::new(move |t| { if is_ok { ok(t, info) @@ -377,7 +382,7 @@ fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State { /// ``` fn resource(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char('(') => { + Some('(') => { tokenizer.enter(Token::Resource); tokenizer.enter(Token::ResourceMarker); tokenizer.consume(); @@ -406,7 +411,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State { /// ``` fn resource_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char(')') => resource_end(tokenizer), + Some(')') => resource_end(tokenizer), _ => tokenizer.go( |t| { destination( @@ -446,7 +451,7 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State { /// ``` fn resource_between(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char('"' | '\'' | '(') => tokenizer.go( + Some('"' | '\'' | '(') => tokenizer.go( |t| { title( t, @@ -481,7 +486,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State { /// ``` fn resource_end(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char(')') => { + Some(')') => { tokenizer.enter(Token::ResourceMarker); tokenizer.consume(); tokenizer.exit(Token::ResourceMarker); @@ -500,7 +505,7 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State { /// ``` fn full_reference(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char('[') => tokenizer.go( + Some('[') => tokenizer.go( |t| { label( t, @@ -524,36 +529,23 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` fn full_reference_after(tokenizer: &mut Tokenizer) -> State { - let events = &tokenizer.events; - let mut index = events.len() - 1; - let mut start: Option<usize> = None; - let mut end: Option<usize> = None; - - while index > 0 { - index -= 1; - let event = &events[index]; - if event.token_type == Token::ReferenceString { - if event.event_type == EventType::Exit { - end = Some(event.point.index); - } else { - start = Some(event.point.index); - break; - } - } - } + let end = skip::to_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Token::ReferenceString], + ); + + // To do: virtual spaces not needed, create a `to_str`? + let id = Slice::from_position( + &tokenizer.parse_state.chars, + &Position::from_exit_event(&tokenizer.events, end), + ) + .serialize(); if tokenizer .parse_state .definitions - .contains(&normalize_identifier(&serialize( - &tokenizer.parse_state.codes, - &Span { - // Always found, otherwise we don’t get here. - start_index: start.unwrap(), - end_index: end.unwrap(), - }, - false, - ))) + .contains(&normalize_identifier(&id)) { State::Ok } else { @@ -571,7 +563,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State { /// ``` fn collapsed_reference(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char('[') => { + Some('[') => { tokenizer.enter(Token::Reference); tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(); @@ -592,7 +584,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State { /// ``` fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Code::Char(']') => { + Some(']') => { tokenizer.enter(Token::ReferenceMarker); tokenizer.consume(); tokenizer.exit(Token::ReferenceMarker); @@ -735,7 +727,11 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) { 0, vec![Event { event_type: EventType::Exit, - token_type: Token::Link, + token_type: if group_enter_event.token_type == Token::LabelLink { + Token::Link + } else { + Token::Image + }, point: events[group_end_index].point.clone(), link: None, }], |