From 187d51acc953720d79f40e82aabe90ea5d58a8a3 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 5 Jul 2022 15:10:54 +0200 Subject: Fix misnested attention --- src/construct/attention.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'src') diff --git a/src/construct/attention.rs b/src/construct/attention.rs index d460afb..d0689b8 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -328,6 +328,25 @@ fn resolve(tokenizer: &mut Tokenizer) -> Vec { 1 }; + // We’re *on* a closing sequence, with a matching opening + // sequence. + // Now we make sure that we can’t have misnested attention: + // + // ```html + // a b c + // ``` + // + // Do that by marking everything between it as no longer + // possible to open anything. + // Theoretically we could mark non-closing as well, but we + // don’t look for closers backwards. + let mut between = open + 1; + + while between < close { + sequences[between].open = false; + between += 1; + } + let sequence_close = &mut sequences[close]; let close_event_index = sequence_close.event_index; let seq_close_enter = ( -- cgit