aboutsummaryrefslogblamecommitdiffstats
path: root/src/construct/partial_title.rs
blob: 80861afd76169c214f48a862f176acf7378766e1 (plain) (tree)
1
                                                              

















                                                                               

                                                             






                                                                                                                                                           

                                                                
                                            
 
                                                                             
                             
                        
                                                      
 





                                                   
                     
                             
                      
                                               
                      

 
                  
                           

                                               



                   
           
           

                                       



                   
           
           
           
                                       



                   
           
           


           
           
                                         
       
                                                                   
                             
                    


                                  

         
                                       
       
                                                                         


                 






                                                 
         


     


                                 

                                  





                      


                   

           
       

                                                                    
                                                              
                             
                               
                                            



                                                         
                                
                                                        
                                                   
         
                        


     




                                                 

           
       

                                                          
                                                      
                                                         
                                

                                                        
                     

              
                                                         
                                     



         


                                        

           
       

                                                                 
                                                      
                                                        
                                  
         
                           
                                    
                                                      


                                                        
                 
                                    
                                 
              
                     
              
                                                                                 
 
                             
                                                       
                                                   
                    
                                    
             
 
                                  



         


                  

           
       

                                                          
                                                      
                                        
                                     
         
                               
                                        
                                     
         
                        

                                                    

              

                                                   



         


                             

              
       

                                                           
                                                      

                                                   
         
                                    

     
//! Title occurs in [definition][] and [label end][label_end].
//!
//! They’re formed with the following BNF:
//!
//! ```bnf
//! ; Restriction: no blank lines.
//! ; Restriction: markers must match (in case of `(` with `)`).
//! title ::= marker [  *( code - '\\' | '\\' [ marker ] ) ] marker
//! marker ::= '"' | '\'' | '('
//! ```
//!
//! Titles can be double quoted (`"a"`), single quoted (`'a'`), or
//! parenthesized (`(a)`).
//!
//! Titles can contain line endings and whitespace, but they are not allowed to
//! contain blank lines.
//! They are allowed to be blank themselves.
//!
//! The title is interpreted as the [string][] content type.
//! That means that [character escapes][character_escape] and
//! [character references][character_reference] are allowed.
//!
//! ## References
//!
//! *   [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
//!
//! [definition]: crate::construct::definition
//! [string]: crate::content::string
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end

use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::subtokenize::link;
use crate::token::Token;
use crate::tokenizer::{ContentType, State, Tokenizer};

/// Configuration.
///
/// You must pass the token types in that are used.
#[derive(Debug)]
pub struct Options {
    /// Token for the whole title.
    pub title: Token,
    /// Token for the marker.
    pub marker: Token,
    /// Token for the string inside the quotes.
    pub string: Token,
}

/// Type of title.
#[derive(Debug, PartialEq)]
enum Kind {
    /// In a parenthesized (`(` and `)`) title.
    ///
    /// ## Example
    ///
    /// ```markdown
    /// (a)
    /// ```
    Paren,
    /// In a double quoted (`"`) title.
    ///
    /// ## Example
    ///
    /// ```markdown
    /// "a"
    /// ```
    Double,
    /// In a single quoted (`'`) title.
    ///
    /// ## Example
    ///
    /// ```markdown
    /// 'a'
    /// ```
    Single,
}

impl Kind {
    /// Turn the kind into a byte ([u8]).
    ///
    /// > 👉 **Note**: a closing paren is used for `Kind::Paren`.
    fn as_byte(&self) -> u8 {
        match self {
            Kind::Paren => b')',
            Kind::Double => b'"',
            Kind::Single => b'\'',
        }
    }
    /// Turn a byte ([u8]) into a kind.
    ///
    /// > 👉 **Note**: an opening paren must be used for `Kind::Paren`.
    ///
    /// ## Panics
    ///
    /// Panics if `byte` is not `(`, `"`, or `'`.
    fn from_byte(byte: u8) -> Kind {
        match byte {
            b'(' => Kind::Paren,
            b'"' => Kind::Double,
            b'\'' => Kind::Single,
            _ => unreachable!("invalid byte"),
        }
    }
}

/// State needed to parse titles.
#[derive(Debug)]
struct Info {
    /// Whether we’ve seen data.
    connect: bool,
    /// Kind of title.
    kind: Kind,
    /// Configuration.
    options: Options,
}

/// Before a title.
///
/// ```markdown
/// > | "a"
///     ^
/// ```
pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
    match tokenizer.current {
        Some(byte) if matches!(byte, b'"' | b'\'' | b'(') => {
            let info = Info {
                connect: false,
                kind: Kind::from_byte(byte),
                options,
            };
            tokenizer.enter(info.options.title.clone());
            tokenizer.enter(info.options.marker.clone());
            tokenizer.consume();
            tokenizer.exit(info.options.marker.clone());
            State::Fn(Box::new(|t| begin(t, info)))
        }
        _ => State::Nok,
    }
}

/// After the opening marker.
///
/// This is also used when at the closing marker.
///
/// ```markdown
/// > | "a"
///      ^
/// ```
fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
    match tokenizer.current {
        Some(byte) if byte == info.kind.as_byte() => {
            tokenizer.enter(info.options.marker.clone());
            tokenizer.consume();
            tokenizer.exit(info.options.marker.clone());
            tokenizer.exit(info.options.title);
            State::Ok
        }
        _ => {
            tokenizer.enter(info.options.string.clone());
            at_break(tokenizer, info)
        }
    }
}

/// At something, before something else.
///
/// ```markdown
/// > | "a"
///      ^
/// ```
fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
    match tokenizer.current {
        Some(byte) if byte == info.kind.as_byte() => {
            tokenizer.exit(info.options.string.clone());
            begin(tokenizer, info)
        }
        None => State::Nok,
        Some(b'\n') => tokenizer.go(
            space_or_tab_eol_with_options(EolOptions {
                content_type: Some(ContentType::String),
                connect: info.connect,
            }),
            |t| {
                info.connect = true;
                at_break(t, info)
            },
        )(tokenizer),
        _ => {
            tokenizer.enter_with_content(Token::Data, Some(ContentType::String));

            if info.connect {
                let index = tokenizer.events.len() - 1;
                link(&mut tokenizer.events, index);
            } else {
                info.connect = true;
            }

            title(tokenizer, info)
        }
    }
}

/// In title text.
///
/// ```markdown
/// > | "a"
///      ^
/// ```
fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
    match tokenizer.current {
        Some(byte) if byte == info.kind.as_byte() => {
            tokenizer.exit(Token::Data);
            at_break(tokenizer, info)
        }
        None | Some(b'\n') => {
            tokenizer.exit(Token::Data);
            at_break(tokenizer, info)
        }
        Some(b'\\') => {
            tokenizer.consume();
            State::Fn(Box::new(|t| escape(t, info)))
        }
        _ => {
            tokenizer.consume();
            State::Fn(Box::new(|t| title(t, info)))
        }
    }
}

/// After `\`, in title text.
///
/// ```markdown
/// > | "a\*b"
///      ^
/// ```
fn escape(tokenizer: &mut Tokenizer, info: Info) -> State {
    match tokenizer.current {
        Some(byte) if byte == info.kind.as_byte() => {
            tokenizer.consume();
            State::Fn(Box::new(|t| title(t, info)))
        }
        _ => title(tokenizer, info),
    }
}