//! To do.
use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
use crate::util::edit_map::EditMap;
/// To do
#[derive(Debug, PartialEq)]
enum GroupKind {
Whitespace,
Punctuation,
Other,
}
/// To do
#[derive(Debug, PartialEq)]
enum MarkerKind {
Asterisk,
Underscore,
}
impl MarkerKind {
fn from_char(char: char) -> MarkerKind {
match char {
'*' => MarkerKind::Asterisk,
'_' => MarkerKind::Underscore,
_ => unreachable!("invalid char"),
}
}
fn from_code(code: Code) -> MarkerKind {
match code {
Code::Char(char) => MarkerKind::from_char(char),
_ => unreachable!("invalid code"),
}
}
}
/// To do
#[derive(Debug)]
struct Run {
marker: MarkerKind,
event_index: usize,
start_point: Point,
start_index: usize,
end_point: Point,
end_index: usize,
size: usize,
open: bool,
close: bool,
}
/// Before a paragraph.
///
/// ```markdown
/// |qwe
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char) if char == '*' || char == '_' => {
tokenizer.enter(TokenType::AttentionSequence);
inside(tokenizer, code, char)
}
_ => (State::Nok, None),
}
}
/// In a paragraph.
///
/// ```markdown
/// al|pha
/// ```
fn inside(tokenizer: &mut Tokenizer, code: Code, marker: char) -> StateFnResult {
match code {
Code::Char(char) if char == marker => {
tokenizer.consume(code);
(State::Fn(Box::new(move |t, c| inside(t, c, marker))), None)
}
_ => {
tokenizer.exit(TokenType::AttentionSequence);
tokenizer.register_resolver("attention".to_string(), Box::new(resolve));
(State::Ok, Some(vec![code]))
}
}
}
/// To do.
#[allow(clippy::too_many_lines)]
pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
let mut index = 0;
println!("before: {:?}", tokenizer.events.len());
while index < tokenizer.events.len() {
let event = &tokenizer.events[index];
println!(
"ev: {:?} {:?} {:?} {:?} {:?} {:?}",
index,
event.event_type,
event.token_type,
event.content_type,
event.previous,
event.next
);
index += 1;
}
let codes = &tokenizer.parse_state.codes;
let mut edit_map = EditMap::new();
let mut start = 0;
let mut runs: Vec<Run> = vec![];
// Find runs of sequences and information about them.
while start < tokenizer.events.len() {
let enter = &tokenizer.events[start];
if enter.event_type == EventType::Enter && enter.token_type == TokenType::AttentionSequence
{
let end = start + 1;
let exit = &tokenizer.events[end];
let marker = MarkerKind::from_code(codes[enter.index]);
let before = classify_character(if enter.index > 0 {
codes[enter.index - 1]
} else {
Code::None
});
let after = classify_character(if exit.index < codes.len() {
codes[exit.index]
} else {
Code::None
});
let open = after == GroupKind::Other
|| (after == GroupKind::Punctuation && before != GroupKind::Other);
// To do: GFM strikethrough?
// || attentionMarkers.includes(code)
let close = before == GroupKind::Other
|| (before == GroupKind::Punctuation && after != GroupKind::Other);
// To do: GFM strikethrough?
// || attentionMarkers.includes(previous)
runs.push(Run {
event_index: start,
start_point: enter.point.clone(),
start_index: enter.index,
end_point: exit.point.clone(),
end_index: exit.index,
size: exit.index - enter.index,
open: if marker == MarkerKind::Asterisk {
open
} else {
open && (before != GroupKind::Other || !close)
},
close: if marker == MarkerKind::Asterisk {
close
} else {
close && (after != GroupKind::Other || !open)
},
marker,
});
start += 1;
}
start += 1;
}
// Walk through runs and match them.
let mut close = 0;
while close < runs.len() {
let run_close = &runs[close];
// Find a run that can close.
if run_close.close {
let mut open = close;
// Now walk back to find an opener.
while open > 0 {
open -= 1;
let run_open = &runs[open];
// Find a token that can open the closer.
if run_open.open && run_close.marker == run_open.marker {
// If the opening can close or the closing can open,
// and the close size *is not* a multiple of three,
// but the sum of the opening and closing size *is*
// multiple of three, then **don’t** match.
if (run_open.close || run_close.open)
&& run_close.size % 3 != 0
&& (run_open.size + run_close.size) % 3 == 0
{
continue;
}
// Number of markers to use from the sequence.
let take = if run_open.size > 1 && run_close.size > 1 {
2
} else {
1
};
let run_close = &mut runs[close];
let close_event_index = run_close.event_index;
let seq_close_enter = (run_close.start_point.clone(), run_close.start_index);
run_close.size -= take;
run_close.start_point.column += take;
run_close.start_point.offset += take;
let seq_close_exit = (run_close.start_point.clone(), run_close.start_index);
// Remove closing run if fully used.
if run_close.size == 0 {
runs.remove(close);
edit_map.add(close_event_index, 2, vec![]);
}
let run_open = &mut runs[open];
let open_event_index = run_open.event_index;
let seq_open_exit = (run_open.end_point.clone(), run_open.end_index);
run_open.size -= take;
run_open.end_point.column -= take;
run_open.end_point.offset -= take;
let seq_open_enter = (run_open.end_point.clone(), run_open.end_index);
// Remove opening run if fully used.
if run_open.size == 0 {
runs.remove(open);
edit_map.add(open_event_index, 2, vec![]);
}
// Opening.
edit_map.add(
open_event_index,
0,
vec![
Event {
event_type: EventType::Enter,
token_type: if take == 1 {
TokenType::Emphasis
} else {
TokenType::Strong
},
point: seq_open_enter.0.clone(),
index: seq_open_enter.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Enter,
token_type: if take == 1 {
TokenType::EmphasisSequence
} else {
TokenType::StrongSequence
},
point: seq_open_enter.0.clone(),
index: seq_open_enter.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Exit,
token_type: if take == 1 {
TokenType::EmphasisSequence
} else {
TokenType::StrongSequence
},
point: seq_open_exit.0.clone(),
index: seq_open_exit.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Enter,
token_type: if take == 1 {
TokenType::EmphasisText
} else {
TokenType::StrongText
},
point: seq_open_exit.0.clone(),
index: seq_open_exit.1,
previous: None,
next: None,
content_type: None,
},
],
);
// Closing.
edit_map.add(
close_event_index,
0,
vec![
Event {
event_type: EventType::Exit,
token_type: if take == 1 {
TokenType::EmphasisText
} else {
TokenType::StrongText
},
point: seq_close_enter.0.clone(),
index: seq_close_enter.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Enter,
token_type: if take == 1 {
TokenType::EmphasisSequence
} else {
TokenType::StrongSequence
},
point: seq_close_enter.0.clone(),
index: seq_close_enter.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Exit,
token_type: if take == 1 {
TokenType::EmphasisSequence
} else {
TokenType::StrongSequence
},
point: seq_close_exit.0.clone(),
index: seq_close_exit.1,
previous: None,
next: None,
content_type: None,
},
Event {
event_type: EventType::Exit,
token_type: if take == 1 {
TokenType::Emphasis
} else {
TokenType::Strong
},
point: seq_close_exit.0.clone(),
index: seq_close_exit.1,
previous: None,
next: None,
content_type: None,
},
],
);
break;
}
}
}
close += 1;
}
// Mark remaining sequences as data.
let mut index = 0;
while index < runs.len() {
let run = &runs[index];
// To do: resize!
tokenizer.events[run.event_index].token_type = TokenType::Data;
tokenizer.events[run.event_index + 1].token_type = TokenType::Data;
index += 1;
}
let events = edit_map.consume(&mut tokenizer.events);
let mut index = 0;
println!("after: {:?}", events.len());
while index < events.len() {
let event = &events[index];
println!(
"ev: {:?} {:?} {:?} {:?} {:?} {:?}",
index,
event.event_type,
event.token_type,
event.content_type,
event.previous,
event.next
);
index += 1;
}
events
}
fn classify_character(code: Code) -> GroupKind {
match code {
// Markdown whitespace.
Code::None
| Code::CarriageReturnLineFeed
| Code::VirtualSpace
| Code::Char('\t' | '\r' | '\n' | ' ') => GroupKind::Whitespace,
// Unicode whitespace.
Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace,
// Unicode punctuation.
// To do: `is_punctuation` is not in rust? Why not?
// Perhaps we need to generate stuff just like:
// <https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/lib/unicode-punctuation-regex.js>.
Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation,
Code::Char(_) => GroupKind::Other,
}
}