aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/label_end.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-28 16:48:00 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-28 16:48:00 +0200
commitf7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456 (patch)
treec1ac3f22473bd79566d835b2474d2ae9e00d6c55 /src/construct/label_end.rs
parentd729b07712ca9cc91e68af1776dac9d7008a90cb (diff)
downloadmarkdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.gz
markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.bz2
markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.zip
Refactor to work on `char`s
Previously, a custom char implementation was used. This was easier to work with, as sometimes “virtual” characters are injected, or characters are ignored. This replaces that with working on actual `char`s. In the hope of in the future working on `u8`s, even. This simplifies the state machine somewhat, as only `\n` is fed, regardless of whether it was a CRLF, CR, or LF. It also feeds `' '` instead of virtual spaces. The BOM, if present, is now available as a `ByteOrderMark` event.
Diffstat (limited to 'src/construct/label_end.rs')
-rw-r--r--src/construct/label_end.rs92
1 files changed, 44 insertions, 48 deletions
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 6f0a707..5ea788f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -1,4 +1,4 @@
-//! Label end is a construct that occurs in the [text][] content type.
+//! Label end is a construct that occurs in the [text][] conten&t type.
//!
//! It forms with the following BNF:
//!
@@ -154,10 +154,11 @@ use crate::construct::{
partial_title::{start as title, Options as TitleOptions},
};
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Media, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Media, State, Tokenizer};
use crate::util::{
normalize_identifier::normalize_identifier,
- span::{serialize, Span},
+ skip,
+ slice::{Position, Slice},
};
/// State needed to parse label end.
@@ -181,7 +182,7 @@ struct Info {
/// > | [a] b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Code::Char(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
+ if Some(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
let mut label_start_index = None;
let mut index = tokenizer.label_start_stack.len();
@@ -207,19 +208,23 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
let label_end_start = tokenizer.events.len();
+
let info = Info {
label_start_index,
media: Media {
start: label_start.start,
end: (label_end_start, label_end_start + 3),
- id: normalize_identifier(&serialize(
- &tokenizer.parse_state.codes,
- &Span {
- start_index: tokenizer.events[label_start.start.1].point.index,
- end_index: tokenizer.events[label_end_start - 1].point.index,
- },
- false,
- )),
+ // To do: virtual spaces not needed, create a `to_str`?
+ id: normalize_identifier(
+ &Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &tokenizer.events[label_start.start.1].point,
+ end: &tokenizer.events[label_end_start - 1].point,
+ },
+ )
+ .serialize(),
+ ),
},
};
@@ -253,7 +258,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
// Resource (`[asd](fgh)`)?
- Code::Char('(') => tokenizer.attempt(resource, move |is_ok| {
+ Some('(') => tokenizer.attempt(resource, move |is_ok| {
Box::new(move |t| {
// Also fine if `defined`, as then it’s a valid shortcut.
if is_ok || defined {
@@ -264,7 +269,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
})
})(tokenizer),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
- Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| {
+ Some('[') => tokenizer.attempt(full_reference, move |is_ok| {
Box::new(move |t| {
if is_ok {
ok(t, info)
@@ -377,7 +382,7 @@ fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State {
/// ```
fn resource(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('(') => {
+ Some('(') => {
tokenizer.enter(Token::Resource);
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
@@ -406,7 +411,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(')') => resource_end(tokenizer),
+ Some(')') => resource_end(tokenizer),
_ => tokenizer.go(
|t| {
destination(
@@ -446,7 +451,7 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('"' | '\'' | '(') => tokenizer.go(
+ Some('"' | '\'' | '(') => tokenizer.go(
|t| {
title(
t,
@@ -481,7 +486,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(')') => {
+ Some(')') => {
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
tokenizer.exit(Token::ResourceMarker);
@@ -500,7 +505,7 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn full_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => tokenizer.go(
+ Some('[') => tokenizer.go(
|t| {
label(
t,
@@ -524,36 +529,23 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
- let events = &tokenizer.events;
- let mut index = events.len() - 1;
- let mut start: Option<usize> = None;
- let mut end: Option<usize> = None;
-
- while index > 0 {
- index -= 1;
- let event = &events[index];
- if event.token_type == Token::ReferenceString {
- if event.event_type == EventType::Exit {
- end = Some(event.point.index);
- } else {
- start = Some(event.point.index);
- break;
- }
- }
- }
+ let end = skip::to_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Token::ReferenceString],
+ );
+
+ // To do: virtual spaces not needed, create a `to_str`?
+ let id = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position::from_exit_event(&tokenizer.events, end),
+ )
+ .serialize();
if tokenizer
.parse_state
.definitions
- .contains(&normalize_identifier(&serialize(
- &tokenizer.parse_state.codes,
- &Span {
- // Always found, otherwise we don’t get here.
- start_index: start.unwrap(),
- end_index: end.unwrap(),
- },
- false,
- )))
+ .contains(&normalize_identifier(&id))
{
State::Ok
} else {
@@ -571,7 +563,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => {
+ Some('[') => {
tokenizer.enter(Token::Reference);
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
@@ -592,7 +584,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(']') => {
+ Some(']') => {
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::ReferenceMarker);
@@ -735,7 +727,11 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
0,
vec![Event {
event_type: EventType::Exit,
- token_type: Token::Link,
+ token_type: if group_enter_event.token_type == Token::LabelLink {
+ Token::Link
+ } else {
+ Token::Image
+ },
point: events[group_end_index].point.clone(),
link: None,
}],