aboutsummaryrefslogtreecommitdiffstats
path: root/src/content/text.rs
blob: ff8c9eb71aa1b4cf92ea42c664738a72458183eb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
//! The text content type.
//!
//! **Text** contains phrasing content such as
//! [attention][crate::construct::attention] (emphasis, strong),
//! [code (text)][crate::construct::code_text], and actual text.
//!
//! The constructs found in text are:
//!
//! *   [Attention][crate::construct::attention]
//! *   [Autolink][crate::construct::autolink]
//! *   [Character escape][crate::construct::character_escape]
//! *   [Character reference][crate::construct::character_reference]
//! *   [Code (text)][crate::construct::code_text]
//! *   [Hard break (escape)][crate::construct::hard_break_escape]
//! *   [HTML (text)][crate::construct::html_text]
//! *   [Label start (image)][crate::construct::label_start_image]
//! *   [Label start (link)][crate::construct::label_start_link]
//! *   [Label end][crate::construct::label_end]
//!
//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
//! > [whitespace][crate::construct::partial_whitespace].

use crate::construct::partial_whitespace::resolve_whitespace;
use crate::tokenizer::{State, StateName, Tokenizer};

const MARKERS: [u8; 9] = [
    b'!',  // `label_start_image`
    b'&',  // `character_reference`
    b'*',  // `attention`
    b'<',  // `autolink`, `html_text`
    b'[',  // `label_start_link`
    b'\\', // `character_escape`, `hard_break_escape`
    b']',  // `label_end`
    b'_',  // `attention`
    b'`',  // `code_text`
];

/// Start of text.
pub fn start(tokenizer: &mut Tokenizer) -> State {
    tokenizer.register_resolver("whitespace".to_string(), Box::new(resolve));
    tokenizer.tokenize_state.stop = &MARKERS;
    before(tokenizer)
}

/// Before text.
pub fn before(tokenizer: &mut Tokenizer) -> State {
    match tokenizer.current {
        None => State::Ok,
        Some(b'!') => tokenizer.attempt(
            StateName::LabelStartImageStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        Some(b'&') => tokenizer.attempt(
            StateName::CharacterReferenceStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        Some(b'*' | b'_') => tokenizer.attempt(
            StateName::AttentionStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        // `autolink`, `html_text` (order does not matter)
        Some(b'<') => tokenizer.attempt(
            StateName::AutolinkStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeHtml),
        ),
        Some(b'[') => tokenizer.attempt(
            StateName::LabelStartLinkStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        Some(b'\\') => tokenizer.attempt(
            StateName::CharacterEscapeStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeHardBreakEscape),
        ),
        Some(b']') => tokenizer.attempt(
            StateName::LabelEndStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        Some(b'`') => tokenizer.attempt(
            StateName::CodeTextStart,
            State::Fn(StateName::TextBefore),
            State::Fn(StateName::TextBeforeData),
        ),
        _ => before_data(tokenizer),
    }
}

/// To do.
pub fn before_html(tokenizer: &mut Tokenizer) -> State {
    tokenizer.attempt(
        StateName::HtmlTextStart,
        State::Fn(StateName::TextBefore),
        State::Fn(StateName::TextBeforeData),
    )
}

/// To do.
pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
    tokenizer.attempt(
        StateName::HardBreakEscapeStart,
        State::Fn(StateName::TextBefore),
        State::Fn(StateName::TextBeforeData),
    )
}

/// At data.
///
/// ```markdown
/// |qwe
/// ```
pub fn before_data(tokenizer: &mut Tokenizer) -> State {
    tokenizer.attempt(
        StateName::DataStart,
        State::Fn(StateName::TextBefore),
        State::Nok,
    )
}

/// Resolve whitespace.
pub fn resolve(tokenizer: &mut Tokenizer) {
    resolve_whitespace(
        tokenizer,
        tokenizer.parse_state.constructs.hard_break_trailing,
        true,
    );
}