//! To do. use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; use crate::util::edit_map::EditMap; /// To do #[derive(Debug, PartialEq)] enum GroupKind { Whitespace, Punctuation, Other, } /// To do #[derive(Debug, PartialEq)] enum MarkerKind { Asterisk, Underscore, } impl MarkerKind { fn from_char(char: char) -> MarkerKind { match char { '*' => MarkerKind::Asterisk, '_' => MarkerKind::Underscore, _ => unreachable!("invalid char"), } } fn from_code(code: Code) -> MarkerKind { match code { Code::Char(char) => MarkerKind::from_char(char), _ => unreachable!("invalid code"), } } } /// To do #[derive(Debug)] struct Run { marker: MarkerKind, event_index: usize, start_point: Point, start_index: usize, end_point: Point, end_index: usize, size: usize, open: bool, close: bool, } /// Before a paragraph. /// /// ```markdown /// |qwe /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '*' || char == '_' => { tokenizer.enter(TokenType::AttentionSequence); inside(tokenizer, code, char) } _ => (State::Nok, None), } } /// In a paragraph. /// /// ```markdown /// al|pha /// ``` fn inside(tokenizer: &mut Tokenizer, code: Code, marker: char) -> StateFnResult { match code { Code::Char(char) if char == marker => { tokenizer.consume(code); (State::Fn(Box::new(move |t, c| inside(t, c, marker))), None) } _ => { tokenizer.exit(TokenType::AttentionSequence); tokenizer.register_resolver("attention".to_string(), Box::new(resolve)); (State::Ok, Some(vec![code])) } } } /// To do. #[allow(clippy::too_many_lines)] pub fn resolve(tokenizer: &mut Tokenizer) -> Vec { let mut index = 0; println!("before: {:?}", tokenizer.events.len()); while index < tokenizer.events.len() { let event = &tokenizer.events[index]; println!( "ev: {:?} {:?} {:?} {:?} {:?} {:?}", index, event.event_type, event.token_type, event.content_type, event.previous, event.next ); index += 1; } let codes = &tokenizer.parse_state.codes; let mut edit_map = EditMap::new(); let mut start = 0; let mut runs: Vec = vec![]; // Find runs of sequences and information about them. while start < tokenizer.events.len() { let enter = &tokenizer.events[start]; if enter.event_type == EventType::Enter && enter.token_type == TokenType::AttentionSequence { let end = start + 1; let exit = &tokenizer.events[end]; let marker = MarkerKind::from_code(codes[enter.index]); let before = classify_character(if enter.index > 0 { codes[enter.index - 1] } else { Code::None }); let after = classify_character(if exit.index < codes.len() { codes[exit.index] } else { Code::None }); let open = after == GroupKind::Other || (after == GroupKind::Punctuation && before != GroupKind::Other); // To do: GFM strikethrough? // || attentionMarkers.includes(code) let close = before == GroupKind::Other || (before == GroupKind::Punctuation && after != GroupKind::Other); // To do: GFM strikethrough? // || attentionMarkers.includes(previous) runs.push(Run { event_index: start, start_point: enter.point.clone(), start_index: enter.index, end_point: exit.point.clone(), end_index: exit.index, size: exit.index - enter.index, open: if marker == MarkerKind::Asterisk { open } else { open && (before != GroupKind::Other || !close) }, close: if marker == MarkerKind::Asterisk { close } else { close && (after != GroupKind::Other || !open) }, marker, }); start += 1; } start += 1; } // Walk through runs and match them. let mut close = 0; while close < runs.len() { let run_close = &runs[close]; // Find a run that can close. if run_close.close { let mut open = close; // Now walk back to find an opener. while open > 0 { open -= 1; let run_open = &runs[open]; // Find a token that can open the closer. if run_open.open && run_close.marker == run_open.marker { // If the opening can close or the closing can open, // and the close size *is not* a multiple of three, // but the sum of the opening and closing size *is* // multiple of three, then **don’t** match. if (run_open.close || run_close.open) && run_close.size % 3 != 0 && (run_open.size + run_close.size) % 3 == 0 { continue; } // Number of markers to use from the sequence. let take = if run_open.size > 1 && run_close.size > 1 { 2 } else { 1 }; let run_close = &mut runs[close]; let close_event_index = run_close.event_index; let seq_close_enter = (run_close.start_point.clone(), run_close.start_index); run_close.size -= take; run_close.start_point.column += take; run_close.start_point.offset += take; let seq_close_exit = (run_close.start_point.clone(), run_close.start_index); // Remove closing run if fully used. if run_close.size == 0 { runs.remove(close); edit_map.add(close_event_index, 2, vec![]); } let run_open = &mut runs[open]; let open_event_index = run_open.event_index; let seq_open_exit = (run_open.end_point.clone(), run_open.end_index); run_open.size -= take; run_open.end_point.column -= take; run_open.end_point.offset -= take; let seq_open_enter = (run_open.end_point.clone(), run_open.end_index); // Remove opening run if fully used. if run_open.size == 0 { runs.remove(open); edit_map.add(open_event_index, 2, vec![]); } // Opening. edit_map.add( open_event_index, 0, vec![ Event { event_type: EventType::Enter, token_type: if take == 1 { TokenType::Emphasis } else { TokenType::Strong }, point: seq_open_enter.0.clone(), index: seq_open_enter.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Enter, token_type: if take == 1 { TokenType::EmphasisSequence } else { TokenType::StrongSequence }, point: seq_open_enter.0.clone(), index: seq_open_enter.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Exit, token_type: if take == 1 { TokenType::EmphasisSequence } else { TokenType::StrongSequence }, point: seq_open_exit.0.clone(), index: seq_open_exit.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Enter, token_type: if take == 1 { TokenType::EmphasisText } else { TokenType::StrongText }, point: seq_open_exit.0.clone(), index: seq_open_exit.1, previous: None, next: None, content_type: None, }, ], ); // Closing. edit_map.add( close_event_index, 0, vec![ Event { event_type: EventType::Exit, token_type: if take == 1 { TokenType::EmphasisText } else { TokenType::StrongText }, point: seq_close_enter.0.clone(), index: seq_close_enter.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Enter, token_type: if take == 1 { TokenType::EmphasisSequence } else { TokenType::StrongSequence }, point: seq_close_enter.0.clone(), index: seq_close_enter.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Exit, token_type: if take == 1 { TokenType::EmphasisSequence } else { TokenType::StrongSequence }, point: seq_close_exit.0.clone(), index: seq_close_exit.1, previous: None, next: None, content_type: None, }, Event { event_type: EventType::Exit, token_type: if take == 1 { TokenType::Emphasis } else { TokenType::Strong }, point: seq_close_exit.0.clone(), index: seq_close_exit.1, previous: None, next: None, content_type: None, }, ], ); break; } } } close += 1; } // Mark remaining sequences as data. let mut index = 0; while index < runs.len() { let run = &runs[index]; // To do: resize! tokenizer.events[run.event_index].token_type = TokenType::Data; tokenizer.events[run.event_index + 1].token_type = TokenType::Data; index += 1; } let events = edit_map.consume(&mut tokenizer.events); let mut index = 0; println!("after: {:?}", events.len()); while index < events.len() { let event = &events[index]; println!( "ev: {:?} {:?} {:?} {:?} {:?} {:?}", index, event.event_type, event.token_type, event.content_type, event.previous, event.next ); index += 1; } events } fn classify_character(code: Code) -> GroupKind { match code { // Markdown whitespace. Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\r' | '\n' | ' ') => GroupKind::Whitespace, // Unicode whitespace. Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace, // Unicode punctuation. // To do: `is_punctuation` is not in rust? Why not? // Perhaps we need to generate stuff just like: // . Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation, Code::Char(_) => GroupKind::Other, } }