aboutsummaryrefslogblamecommitdiffstats
path: root/src/construct/partial_label.rs
blob: 255fde19199fd51d19e7504c810b536c6a402224 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
                                                              
   



                                                                            



                                                                



                                                     









                                                                               

                                                             















                                                                                  



                                                                       
                                               





                                                                                                                                                           
                                      
                                            

                                                                


                                                            
                                          
                                                                             
 
                                                                                         
                                        
                                             
                             
                                
                                                   
 
                   

               

           
       
                                                  
                             
                       

                                                                      
                                
                                                                     
                                                 
         
                        


     
























                                                                          
                                                  

               

           
       
                                                     
                                                              
                                                         
                                                                                      
     


                                                 


                                 
                            




                                                             
                              
                             
                                                       

                                                                  
                  
             
                           

                                                                          
                                    




                                                                         
                         
             
                  







                                                 
 
                                                     


                                                           
                                                            
                 
 
                                                    
             



         
                               





               
                                                      
                                            
                                         

 
                            






               
                                                          




                                             
                      

               

           
       
                                                   
                             
                                             

                                                 
         
                       
                                                                        

                                                     
                    
                                    


                                                                                    
                 
                                                      
                                          
                        
                                          
                  
             



         
                                      

               

              
       
                                                   
                             
                                      
                                
                                               
                                               
         
                                                  

     
//! Label occurs in [definition][] and [label end][label_end].
//!
//! ## Grammar
//!
//! Label forms with the following BNF
//! (<small>see [construct][crate::construct] for character groups</small>):
//!
//! ```bnf
//! ; Restriction: maximum `999` codes allowed between brackets.
//! ; Restriction: no blank lines.
//! ; Restriction: at least 1 `text` byte must exist.
//! label ::= '[' *(label_byte | label_escape) ']'
//! label_byte ::= code - '[' - '\\' - ']'
//! label_escape ::= '\\' ['[' | '\\' | ']']
//! ```
//!
//! The maximum allowed size of the label, without the brackets, is `999`
//! (inclusive), which is defined in
//! [`LINK_REFERENCE_SIZE_MAX`][link_reference_size_max].
//!
//! Labels can contain line endings and whitespace, but they are not allowed to
//! contain blank lines, and they must not be blank themselves.
//!
//! The label is interpreted as the [string][] content type.
//! That means that [character escapes][character_escape] and
//! [character references][character_reference] are allowed.
//!
//! > 👉 **Note**: this label relates to, but is not, the initial “label” of
//! > what is know as a reference in markdown:
//! >
//! > | Kind      | Link     | Image     |
//! > | --------- | -------- | --------- |
//! > | Shortcut  | `[x]`    | `![x]`    |
//! > | Collapsed | `[x][]`  | `![x][]`  |
//! > | Full      | `[x][y]` | `![x][y]` |
//! >
//! > The 6 above things are references, in the three kinds they come in, as
//! > links and images.
//! > The label that this module focusses on is only the thing that contains
//! > `y`.
//! >
//! > The thing that contains `x` is not a single thing when parsing markdown,
//! > but instead constists of an opening
//! > ([label start (image)][label_start_image] or
//! > [label start (link)][label_start_link]) and a closing
//! > ([label end][label_end]), so as to allow further phrasing such as
//! > [code (text)][raw_text] or [attention][].
//!
//! ## References
//!
//! *   [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
//!
//! [definition]: crate::construct::definition
//! [string]: crate::construct::string
//! [attention]: crate::construct::attention
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_start_image]: crate::construct::label_start_image
//! [label_start_link]: crate::construct::label_start_link
//! [label_end]: crate::construct::label_end
//! [raw_text]: crate::construct::raw_text
//! [link_reference_size_max]: crate::util::constant::LINK_REFERENCE_SIZE_MAX

use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
use crate::util::constant::LINK_REFERENCE_SIZE_MAX;

/// Start of label.
///
/// ```markdown
/// > | [a]
///     ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
    match tokenizer.current {
        Some(b'[') => {
            tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
            tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
            tokenizer.consume();
            tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
            State::Next(StateName::LabelAtMarker)
        }
        _ => State::Nok,
    }
}

/// At an optional extra marker.
///
/// Used for footnotes.
///
/// ```markdown
/// > | [^a]
///      ^
/// ```
pub fn at_marker(tokenizer: &mut Tokenizer) -> State {
    // For footnotes (and potentially other custom things in the future),
    // We need to make sure there is a certain marker after `[`.
    if tokenizer.tokenize_state.marker == 0 {
        tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
        State::Retry(StateName::LabelAtBreak)
    } else if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
        tokenizer.enter(tokenizer.tokenize_state.token_4.clone());
        tokenizer.consume();
        tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
        tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
        State::Next(StateName::LabelAtBreak)
    } else {
        State::Nok
    }
}

/// In label, at something, before something else.
///
/// ```markdown
/// > | [a]
///      ^
/// ```
pub fn at_break(tokenizer: &mut Tokenizer) -> State {
    if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
        || matches!(tokenizer.current, None | Some(b'['))
        || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen)
    {
        tokenizer.tokenize_state.connect = false;
        tokenizer.tokenize_state.seen = false;
        tokenizer.tokenize_state.size = 0;
        State::Nok
    } else {
        match tokenizer.current {
            Some(b'\n') => {
                tokenizer.attempt(
                    State::Next(StateName::LabelEolAfter),
                    State::Next(StateName::LabelAtBlankLine),
                );
                State::Retry(space_or_tab_eol_with_options(
                    tokenizer,
                    Options {
                        content: Some(Content::String),
                        connect: tokenizer.tokenize_state.connect,
                    },
                ))
            }
            Some(b']') => {
                tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
                tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
                tokenizer.consume();
                tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
                tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
                tokenizer.tokenize_state.connect = false;
                tokenizer.tokenize_state.seen = false;
                tokenizer.tokenize_state.size = 0;
                State::Ok
            }
            _ => {
                tokenizer.enter_link(
                    Name::Data,
                    Link {
                        previous: None,
                        next: None,
                        content: Content::String,
                    },
                );

                if tokenizer.tokenize_state.connect {
                    let index = tokenizer.events.len() - 1;
                    link(&mut tokenizer.events, index);
                } else {
                    tokenizer.tokenize_state.connect = true;
                }

                State::Retry(StateName::LabelInside)
            }
        }
    }
}

/// In label, after whitespace.
///
/// ```markdown
///   | [a␊
/// > | b]
///     ^
/// ```
pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
    tokenizer.tokenize_state.connect = true;
    State::Retry(StateName::LabelAtBreak)
}

/// In label, at blank line.
///
/// ```markdown
///   | [a␊
/// > | ␊
///     ^
///   | b]
/// ```
pub fn at_blank_line(tokenizer: &mut Tokenizer) -> State {
    tokenizer.tokenize_state.marker = 0;
    tokenizer.tokenize_state.connect = false;
    State::Nok
}

/// In label, in text.
///
/// ```markdown
/// > | [a]
///      ^
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
    match tokenizer.current {
        None | Some(b'\n' | b'[' | b']') => {
            tokenizer.exit(Name::Data);
            State::Retry(StateName::LabelAtBreak)
        }
        Some(byte) => {
            if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX {
                tokenizer.exit(Name::Data);
                State::Retry(StateName::LabelAtBreak)
            } else {
                tokenizer.consume();
                tokenizer.tokenize_state.size += 1;
                if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {
                    tokenizer.tokenize_state.seen = true;
                }
                State::Next(if matches!(byte, b'\\') {
                    StateName::LabelEscape
                } else {
                    StateName::LabelInside
                })
            }
        }
    }
}

/// After `\`, at a special character.
///
/// ```markdown
/// > | [a\*a]
///        ^
/// ```
pub fn escape(tokenizer: &mut Tokenizer) -> State {
    match tokenizer.current {
        Some(b'[' | b'\\' | b']') => {
            tokenizer.consume();
            tokenizer.tokenize_state.size += 1;
            State::Next(StateName::LabelInside)
        }
        _ => State::Retry(StateName::LabelInside),
    }
}