aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-28 17:54:39 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-28 17:55:44 +0200
commitb33a81e40620b8b3eaeeec9d0e0b34ca5958dead (patch)
treec91e56db38777b30cdcef591d0f7cd9bd1ac0ee8 /src
parenta0c84c505d733be2e987a333a34244c1befb56cb (diff)
downloadmarkdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.tar.gz
markdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.tar.bz2
markdown-rs-b33a81e40620b8b3eaeeec9d0e0b34ca5958dead.zip
Add support for turning mdast to hast
Diffstat (limited to 'src')
-rw-r--r--src/construct/attention.rs5
-rw-r--r--src/construct/gfm_table.rs6
-rw-r--r--src/construct/heading_atx.rs6
-rw-r--r--src/construct/heading_setext.rs6
-rw-r--r--src/construct/label_end.rs4
-rw-r--r--src/construct/list_item.rs6
-rw-r--r--src/construct/partial_data.rs6
-rw-r--r--src/construct/partial_mdx_expression.rs2
-rw-r--r--src/construct/string.rs6
-rw-r--r--src/construct/text.rs5
-rw-r--r--src/lib.rs12
-rw-r--r--src/mdast.rs77
-rw-r--r--src/resolve.rs24
-rw-r--r--src/to_mdast.rs6
-rw-r--r--src/unist.rs75
-rw-r--r--src/util/sanitize_uri.rs1
16 files changed, 130 insertions, 117 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 4d58610..d99a52c 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -88,7 +88,6 @@ use crate::util::{
},
slice::Slice,
};
-use alloc::string::String;
use alloc::{vec, vec::Vec};
/// Attentention sequence that we can take markers from.
@@ -152,7 +151,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve sequences.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
// Find all sequences, gather info about them.
let mut sequences = get_sequences(tokenizer);
@@ -224,7 +223,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
tokenizer.map.consume(&mut tokenizer.events);
- Ok(None)
+ None
}
/// Get sequences.
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
index 63772c4..547358f 100644
--- a/src/construct/gfm_table.rs
+++ b/src/construct/gfm_table.rs
@@ -232,7 +232,7 @@ use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{constant::TAB_SIZE, skip::opt_back as skip_opt_back};
-use alloc::{string::String, vec};
+use alloc::vec;
/// Start of a GFM table.
///
@@ -772,7 +772,7 @@ pub fn body_row_escape(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve GFM table.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
let mut index = 0;
let mut in_first_cell_awaiting_pipe = true;
let mut in_row = false;
@@ -887,7 +887,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
flush_table_end(tokenizer, last_table_end, last_table_has_body);
}
- Ok(None)
+ None
}
/// Generate a cell.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index b76e455..c867117 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -69,7 +69,7 @@ use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
-use alloc::{string::String, vec};
+use alloc::vec;
/// Start of a heading (atx).
///
@@ -223,7 +223,7 @@ pub fn data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (atx).
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
let mut index = 0;
let mut heading_inside = false;
let mut data_start: Option<usize> = None;
@@ -283,5 +283,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
index += 1;
}
- Ok(None)
+ None
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 3a484e1..1e6fd00 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -77,7 +77,7 @@ use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
use crate::util::{constant::TAB_SIZE, skip};
-use alloc::{string::String, vec};
+use alloc::vec;
/// At start of heading (setext) underline.
///
@@ -184,7 +184,7 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve heading (setext).
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
tokenizer.map.consume(&mut tokenizer.events);
let mut enter = skip::to(&tokenizer.events, 0, &[Name::HeadingSetextUnderline]);
@@ -281,5 +281,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
tokenizer.map.consume(&mut tokenizer.events);
- Ok(None)
+ None
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 95b9a27..ca71245 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -661,7 +661,7 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
///
/// This turns matching label starts and label ends into links, images, and
/// footnotes, and turns unmatched label starts back into data.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
// Inject labels.
let labels = tokenizer.tokenize_state.labels.split_off(0);
inject_labels(tokenizer, &labels);
@@ -673,7 +673,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
tokenizer.map.consume(&mut tokenizer.events);
- Ok(None)
+ None
}
/// Inject links/images/footnotes.
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 13b740b..a4f166d 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -69,7 +69,7 @@ use crate::util::{
skip,
slice::{Position, Slice},
};
-use alloc::{string::String, vec, vec::Vec};
+use alloc::{vec, vec::Vec};
/// Start of list item.
///
@@ -371,7 +371,7 @@ pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
}
/// Find adjacent list items with the same marker.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
let mut index = 0;
@@ -474,5 +474,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
index += 1;
}
- Ok(None)
+ None
}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index b36d9f0..a27730c 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -10,7 +10,7 @@ use crate::event::{Kind, Name};
use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use alloc::{string::String, vec};
+use alloc::vec;
/// At beginning of data.
///
@@ -73,7 +73,7 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
}
/// Merge adjacent data events.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
let mut index = 0;
// Loop through events and merge adjacent data events.
@@ -105,5 +105,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
index += 1;
}
- Ok(None)
+ None
}
diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs
index 3ebd0f0..789443e 100644
--- a/src/construct/partial_mdx_expression.rs
+++ b/src/construct/partial_mdx_expression.rs
@@ -219,7 +219,7 @@ fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> St
};
// Parse and handle what was signaled back.
- match parse(&result.value, kind) {
+ match parse(&result.value, &kind) {
MdxSignal::Ok => State::Ok,
MdxSignal::Error(message, place) => {
let point = place_to_point(&result, place);
diff --git a/src/construct/string.rs b/src/construct/string.rs
index cf2f222..cad570d 100644
--- a/src/construct/string.rs
+++ b/src/construct/string.rs
@@ -17,7 +17,6 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use alloc::string::String;
/// Characters that can start something in string.
const MARKERS: [u8; 2] = [b'&', b'\\'];
@@ -76,8 +75,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace in string.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
resolve_whitespace(tokenizer, false, false);
-
- Ok(None)
+ None
}
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 2648531..0ea0913 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -30,7 +30,6 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::subtokenize::Subresult;
use crate::tokenizer::Tokenizer;
-use alloc::string::String;
/// Characters that can start something in text.
const MARKERS: [u8; 16] = [
@@ -244,7 +243,7 @@ pub fn before_data(tokenizer: &mut Tokenizer) -> State {
}
/// Resolve whitespace.
-pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
resolve_whitespace(
tokenizer,
tokenizer.parse_state.options.constructs.hard_break_trailing,
@@ -260,5 +259,5 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
resolve_gfm_autolink_literal(tokenizer);
}
- Ok(None)
+ None
}
diff --git a/src/lib.rs b/src/lib.rs
index fcdab10..e552327 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -17,7 +17,7 @@ extern crate alloc;
mod construct;
mod event;
-pub mod mdast;
+pub mod mdast; // To do: externalize?
mod parser;
mod resolve;
mod state;
@@ -25,6 +25,7 @@ mod subtokenize;
mod to_html;
mod to_mdast;
mod tokenizer;
+pub mod unist; // To do: externalize.
mod util;
use alloc::{boxed::Box, fmt, string::String};
@@ -32,6 +33,7 @@ use mdast::Node;
use parser::parse;
use to_html::compile as to_html;
use to_mdast::compile as to_mdast;
+use util::sanitize_uri::sanitize;
/// Type of line endings in markdown.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
@@ -146,7 +148,7 @@ pub enum MdxExpressionKind {
/// Can be passed as `mdx_expression_parse` in [`Options`][] to support
/// expressions according to a certain grammar (typically, a programming
/// language).
-pub type MdxExpressionParse = dyn Fn(&str, MdxExpressionKind) -> MdxSignal;
+pub type MdxExpressionParse = dyn Fn(&str, &MdxExpressionKind) -> MdxSignal;
/// Signature of a function that parses ESM.
///
@@ -1187,3 +1189,9 @@ pub fn micromark_to_mdast(value: &str, options: &Options) -> Result<Node, String
let node = to_mdast(&events, bytes)?;
Ok(node)
}
+
+/// Do not use: exported for quick prototyping, will be removed.
+#[must_use]
+pub fn sanitize_(value: &str) -> String {
+ sanitize(value)
+}
diff --git a/src/mdast.rs b/src/mdast.rs
index 79a39dd..8b5b74d 100644
--- a/src/mdast.rs
+++ b/src/mdast.rs
@@ -1,83 +1,14 @@
-//! [mdast][] syntax tree.
+//! markdown syntax tree: [mdast][].
//!
//! [mdast]: https://github.com/syntax-tree/mdast
+use crate::unist::Position;
use alloc::{
fmt,
string::{String, ToString},
vec::Vec,
};
-/// One place in a source file.
-#[derive(Clone, Eq, PartialEq)]
-pub struct Point {
- /// 1-indexed integer representing a line in a source file.
- pub line: usize,
- /// 1-indexed integer representing a column in a source file.
- pub column: usize,
- /// 0-indexed integer representing a character in a source file.
- pub offset: usize,
-}
-
-impl Point {
- #[must_use]
- pub fn new(line: usize, column: usize, offset: usize) -> Point {
- Point {
- line,
- column,
- offset,
- }
- }
-}
-
-impl fmt::Debug for Point {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(f, "{}:{} ({})", self.line, self.column, self.offset)
- }
-}
-
-/// Location of a node in a source file.
-#[derive(Clone, Eq, PartialEq)]
-pub struct Position {
- /// Represents the place of the first character of the parsed source region.
- pub start: Point,
- /// Represents the place of the first character after the parsed source
- /// region, whether it exists or not.
- pub end: Point,
-}
-
-impl Position {
- #[must_use]
- pub fn new(
- start_line: usize,
- start_column: usize,
- start_offset: usize,
- end_line: usize,
- end_column: usize,
- end_offset: usize,
- ) -> Position {
- Position {
- start: Point::new(start_line, start_column, start_offset),
- end: Point::new(end_line, end_column, end_offset),
- }
- }
-}
-
-impl fmt::Debug for Position {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- write!(
- f,
- "{}:{}-{}:{} ({}-{})",
- self.start.line,
- self.start.column,
- self.end.line,
- self.end.column,
- self.start.offset,
- self.end.offset
- )
- }
-}
-
/// Explicitness of a reference.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ReferenceKind {
@@ -370,7 +301,8 @@ impl Node {
}
}
- pub fn position(&mut self) -> Option<&Position> {
+ #[must_use]
+ pub fn position(&self) -> Option<&Position> {
match self {
Node::Root(x) => x.position.as_ref(),
Node::BlockQuote(x) => x.position.as_ref(),
@@ -1204,6 +1136,7 @@ pub struct MdxJsxAttribute {
#[cfg(test)]
mod tests {
use super::*;
+ use crate::unist::{Point, Position};
use alloc::{string::ToString, vec};
#[test]
diff --git a/src/resolve.rs b/src/resolve.rs
index 2586676..813ce52 100644
--- a/src/resolve.rs
+++ b/src/resolve.rs
@@ -64,18 +64,18 @@ pub enum Name {
/// Call the corresponding resolver.
pub fn call(tokenizer: &mut Tokenizer, name: Name) -> Result<Option<Subresult>, String> {
- let func = match name {
- Name::Label => construct::label_end::resolve,
- Name::Attention => construct::attention::resolve,
- Name::GfmTable => construct::gfm_table::resolve,
- Name::HeadingAtx => construct::heading_atx::resolve,
- Name::HeadingSetext => construct::heading_setext::resolve,
- Name::ListItem => construct::list_item::resolve,
- Name::Content => construct::content::resolve,
- Name::Data => construct::partial_data::resolve,
- Name::String => construct::string::resolve,
- Name::Text => construct::text::resolve,
+ let result = match name {
+ Name::Label => construct::label_end::resolve(tokenizer),
+ Name::Attention => construct::attention::resolve(tokenizer),
+ Name::GfmTable => construct::gfm_table::resolve(tokenizer),
+ Name::HeadingAtx => construct::heading_atx::resolve(tokenizer),
+ Name::HeadingSetext => construct::heading_setext::resolve(tokenizer),
+ Name::ListItem => construct::list_item::resolve(tokenizer),
+ Name::Content => construct::content::resolve(tokenizer)?,
+ Name::Data => construct::partial_data::resolve(tokenizer),
+ Name::String => construct::string::resolve(tokenizer),
+ Name::Text => construct::text::resolve(tokenizer),
};
- func(tokenizer)
+ Ok(result)
}
diff --git a/src/to_mdast.rs b/src/to_mdast.rs
index 9f03a03..42f68a0 100644
--- a/src/to_mdast.rs
+++ b/src/to_mdast.rs
@@ -5,10 +5,10 @@ use crate::mdast::{
AttributeContent, AttributeValue, BlockQuote, Break, Code, Definition, Delete, Emphasis,
FootnoteDefinition, FootnoteReference, Heading, Html, Image, ImageReference, InlineCode,
InlineMath, Link, LinkReference, List, ListItem, Math, MdxFlowExpression, MdxJsxAttribute,
- MdxJsxFlowElement, MdxJsxTextElement, MdxTextExpression, MdxjsEsm, Node, Paragraph, Point,
- Position, ReferenceKind, Root, Strong, Table, TableCell, TableRow, Text, ThematicBreak, Toml,
- Yaml,
+ MdxJsxFlowElement, MdxJsxTextElement, MdxTextExpression, MdxjsEsm, Node, Paragraph,
+ ReferenceKind, Root, Strong, Table, TableCell, TableRow, Text, ThematicBreak, Toml, Yaml,
};
+use crate::unist::{Point, Position};
use crate::util::{
decode_character_reference::{decode_named, decode_numeric},
infer::{gfm_table_align, list_item_loose, list_loose},
diff --git a/src/unist.rs b/src/unist.rs
new file mode 100644
index 0000000..75ef359
--- /dev/null
+++ b/src/unist.rs
@@ -0,0 +1,75 @@
+//! abstract syntax trees: [unist][].
+//!
+//! [unist]: https://github.com/syntax-tree/unist
+
+use alloc::fmt;
+
+/// One place in a source file.
+#[derive(Clone, Eq, PartialEq)]
+pub struct Point {
+ /// 1-indexed integer representing a line in a source file.
+ pub line: usize,
+ /// 1-indexed integer representing a column in a source file.
+ pub column: usize,
+ /// 0-indexed integer representing a character in a source file.
+ pub offset: usize,
+}
+
+impl Point {
+ #[must_use]
+ pub fn new(line: usize, column: usize, offset: usize) -> Point {
+ Point {
+ line,
+ column,
+ offset,
+ }
+ }
+}
+
+impl fmt::Debug for Point {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}:{} ({})", self.line, self.column, self.offset)
+ }
+}
+
+/// Location of a node in a source file.
+#[derive(Clone, Eq, PartialEq)]
+pub struct Position {
+ /// Represents the place of the first character of the parsed source region.
+ pub start: Point,
+ /// Represents the place of the first character after the parsed source
+ /// region, whether it exists or not.
+ pub end: Point,
+}
+
+impl Position {
+ #[must_use]
+ pub fn new(
+ start_line: usize,
+ start_column: usize,
+ start_offset: usize,
+ end_line: usize,
+ end_column: usize,
+ end_offset: usize,
+ ) -> Position {
+ Position {
+ start: Point::new(start_line, start_column, start_offset),
+ end: Point::new(end_line, end_column, end_offset),
+ }
+ }
+}
+
+impl fmt::Debug for Position {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(
+ f,
+ "{}:{}-{}:{} ({}-{})",
+ self.start.line,
+ self.start.column,
+ self.end.line,
+ self.end.column,
+ self.start.offset,
+ self.end.offset
+ )
+ }
+}
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 0099347..8e44758 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -26,6 +26,7 @@ use alloc::{
/// ## References
///
/// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
+#[must_use]
pub fn sanitize(value: &str) -> String {
encode(&*normalize(value), true)
}