diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-15 11:40:40 +0200 |
commit | ee967aa634b5f8e9d30329d587538f1371a5da95 (patch) | |
tree | cdc1461c822e440b24428eb8d431881e216ab8bd /src/construct/string.rs | |
parent | 13135666fac476f3cd6f059147f496533b304097 (diff) | |
download | markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.gz markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.tar.bz2 markdown-rs-ee967aa634b5f8e9d30329d587538f1371a5da95.zip |
Refactor to move `content` to `construct`
Diffstat (limited to 'src/construct/string.rs')
-rw-r--r-- | src/construct/string.rs | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/src/construct/string.rs b/src/construct/string.rs new file mode 100644 index 0000000..698a51d --- /dev/null +++ b/src/construct/string.rs @@ -0,0 +1,76 @@ +//! The string content type. +//! +//! **String** is a limited [text][] like content type which only allows +//! character escapes and character references. +//! It exists in things such as identifiers (media references, definitions), +//! titles, URLs, code (fenced) info and meta parts. +//! +//! The constructs found in string are: +//! +//! * [Character escape][crate::construct::character_escape] +//! * [Character reference][crate::construct::character_reference] +//! +//! [text]: crate::construct::text + +use crate::construct::partial_whitespace::resolve_whitespace; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; + +/// Characters that can start something in string. +const MARKERS: [u8; 2] = [b'&', b'\\']; + +/// Start of string. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` +pub fn start(tokenizer: &mut Tokenizer) -> State { + tokenizer.register_resolver(ResolveName::String); + tokenizer.tokenize_state.markers = &MARKERS; + State::Retry(StateName::StringBefore) +} + +/// Before string. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` +pub fn before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + None => State::Ok, + Some(b'&') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterReferenceStart) + } + Some(b'\\') => { + tokenizer.attempt( + State::Next(StateName::StringBefore), + State::Next(StateName::StringBeforeData), + ); + State::Retry(StateName::CharacterEscapeStart) + } + _ => State::Retry(StateName::StringBeforeData), + } +} + +/// At data. +/// +/// ````markdown +/// > | ```js +/// ^ +/// ```` +pub fn before_data(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt(State::Next(StateName::StringBefore), State::Nok); + State::Retry(StateName::DataStart) +} + +/// Resolve whitespace in string. +pub fn resolve(tokenizer: &mut Tokenizer) { + resolve_whitespace(tokenizer, false, false); +} |