aboutsummaryrefslogblamecommitdiffstats
path: root/src/parser.rs
blob: 4e8d8590642f717f414f0c84785a1e3967dc279b (plain) (tree)
1
2
3
4
5
6
7
8
9
                                       
 
                                 


                                             
                                    
                        
                                           
 
                                                             

                                                          
                                                       
                
                           
                      

                                   
                                  
                      
                        
                                              
                                 

                                                           

 
                                          
   
                                                                


                              


                                                   
                                      
                





                                                                                                
                            
                                         

      






                                                            
 




                                              
                                                   

                                      
          



                                                                
 


                        

                                                                
     
 
                             
 
//! Turn bytes of markdown into events.

use crate::event::{Event, Point};
use crate::state::{Name as StateName, State};
use crate::subtokenize::subtokenize;
use crate::tokenizer::Tokenizer;
use crate::util::location::Location;
use crate::ParseOptions;
use alloc::{string::String, vec, vec::Vec};

/// Info needed, in all content types, when parsing markdown.
///
/// Importantly, this contains a set of known definitions.
/// It also references the input value as bytes (`u8`).
#[derive(Debug)]
pub struct ParseState<'a> {
    /// Configuration.
    pub location: Option<Location>,
    /// Configuration.
    pub options: &'a ParseOptions,
    /// List of chars.
    pub bytes: &'a [u8],
    /// Set of defined definition identifiers.
    pub definitions: Vec<String>,
    /// Set of defined GFM footnote definition identifiers.
    pub gfm_footnote_definitions: Vec<String>,
}

/// Turn a string of markdown into events.
///
/// Passes the bytes back so the compiler can access the source.
pub fn parse<'a>(
    value: &'a str,
    options: &'a ParseOptions,
) -> Result<(Vec<Event>, ParseState<'a>), String> {
    let bytes = value.as_bytes();

    let mut parse_state = ParseState {
        options,
        bytes,
        location: if options.mdx_esm_parse.is_some() || options.mdx_expression_parse.is_some() {
            Some(Location::new(bytes))
        } else {
            None
        },
        definitions: vec![],
        gfm_footnote_definitions: vec![],
    };

    let start = Point {
        line: 1,
        column: 1,
        index: 0,
        vs: 0,
    };
    let mut tokenizer = Tokenizer::new(start, &parse_state);

    let state = tokenizer.push(
        (0, 0),
        (parse_state.bytes.len(), 0),
        State::Next(StateName::DocumentStart),
    );
    let mut result = tokenizer.flush(state, true)?;
    let mut events = tokenizer.events;

    loop {
        let fn_defs = &mut parse_state.gfm_footnote_definitions;
        let defs = &mut parse_state.definitions;
        fn_defs.append(&mut result.gfm_footnote_definitions);
        defs.append(&mut result.definitions);

        if result.done {
            break;
        }

        result = subtokenize(&mut events, &parse_state, &None)?;
    }

    Ok((events, parse_state))
}