From 0450e7c2b12bd3ef53e0cffb60a3dd860325b478 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 4 Jul 2022 15:21:11 +0200 Subject: Add support for unicode punctuation --- src/construct/attention.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/construct') diff --git a/src/construct/attention.rs b/src/construct/attention.rs index d4541b4..f4bb841 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -1,6 +1,7 @@ //! To do. use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; +use crate::unicode::PUNCTUATION; use crate::util::edit_map::EditMap; /// To do @@ -421,10 +422,7 @@ fn classify_character(code: Code) -> GroupKind { // Unicode whitespace. Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace, // Unicode punctuation. - // To do: `is_punctuation` is not in rust? Why not? - // Perhaps we need to generate stuff just like: - // . - Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation, + Code::Char(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation, Code::Char(_) => GroupKind::Other, } } -- cgit