1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
//! The text content type.
//!
//! **Text** contains phrasing content such as
//! [attention][crate::construct::attention] (emphasis, strong),
//! [code (text)][crate::construct::code_text], and actual text.
//!
//! The constructs found in text are:
//!
//! * [Attention][crate::construct::attention]
//! * [Autolink][crate::construct::autolink]
//! * [Character escape][crate::construct::character_escape]
//! * [Character reference][crate::construct::character_reference]
//! * [Code (text)][crate::construct::code_text]
//! * [Hard break (escape)][crate::construct::hard_break_escape]
//! * [HTML (text)][crate::construct::html_text]
//! * [Label start (image)][crate::construct::label_start_image]
//! * [Label start (link)][crate::construct::label_start_link]
//! * [Label end][crate::construct::label_end]
//!
//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
//! > [whitespace][crate::construct::partial_whitespace].
use crate::construct::partial_whitespace::resolve_whitespace;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
const MARKERS: [u8; 9] = [
b'!', // `label_start_image`
b'&', // `character_reference`
b'*', // `attention`
b'<', // `autolink`, `html_text`
b'[', // `label_start_link`
b'\\', // `character_escape`, `hard_break_escape`
b']', // `label_end`
b'_', // `attention`
b'`', // `code_text`
];
/// Start of text.
pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.register_resolver(ResolveName::Text);
tokenizer.tokenize_state.markers = &MARKERS;
State::Retry(StateName::TextBefore)
}
/// Before text.
pub fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Ok,
Some(b'!') => tokenizer.attempt(
StateName::LabelStartImageStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
Some(b'&') => tokenizer.attempt(
StateName::CharacterReferenceStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
Some(b'*' | b'_') => tokenizer.attempt(
StateName::AttentionStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
// `autolink`, `html_text` (order does not matter)
Some(b'<') => tokenizer.attempt(
StateName::AutolinkStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeHtml),
),
Some(b'[') => tokenizer.attempt(
StateName::LabelStartLinkStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
Some(b'\\') => tokenizer.attempt(
StateName::CharacterEscapeStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeHardBreakEscape),
),
Some(b']') => tokenizer.attempt(
StateName::LabelEndStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
Some(b'`') => tokenizer.attempt(
StateName::CodeTextStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
),
_ => State::Retry(StateName::TextBeforeData),
}
}
/// At `<`, which wasn’t an autolink: before HTML?
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
StateName::HtmlTextStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
)
}
/// At `\`, which wasn’t a character escape: before a hard break?
pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
StateName::HardBreakEscapeStart,
State::Next(StateName::TextBefore),
State::Next(StateName::TextBeforeData),
)
}
/// At data.
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
tokenizer.attempt(
StateName::DataStart,
State::Next(StateName::TextBefore),
State::Nok,
)
}
/// Resolve whitespace.
pub fn resolve(tokenizer: &mut Tokenizer) {
resolve_whitespace(
tokenizer,
tokenizer.parse_state.constructs.hard_break_trailing,
true,
);
}
|