From c16ce7361d4d976deaad42e783b7eae5d38ae763 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 14 Oct 2022 14:10:11 +0200 Subject: Refactor to move options to separate file --- src/configuration.rs | 1421 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1421 insertions(+) create mode 100644 src/configuration.rs (limited to 'src/configuration.rs') diff --git a/src/configuration.rs b/src/configuration.rs new file mode 100644 index 0000000..3e8823d --- /dev/null +++ b/src/configuration.rs @@ -0,0 +1,1421 @@ +use crate::util::{ + line_ending::LineEnding, + mdx::{EsmParse as MdxEsmParse, ExpressionParse as MdxExpressionParse}, +}; +use alloc::{boxed::Box, fmt, string::String}; + +/// Control which constructs are enabled. +/// +/// Not all constructs can be configured. +/// Notably, blank lines and paragraphs cannot be turned off. +/// +/// ## Examples +/// +/// ``` +/// use markdown::Constructs; +/// # fn main() { +/// +/// // Use the default trait to get `CommonMark` constructs: +/// let commonmark = Constructs::default(); +/// +/// // To turn on all of GFM, use the `gfm` method: +/// let gfm = Constructs::gfm(); +/// +/// // Or, mix and match: +/// let custom = Constructs { +/// math_flow: true, +/// math_text: true, +/// ..Constructs::gfm() +/// }; +/// # } +/// ``` +#[allow(clippy::struct_excessive_bools)] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Constructs { + /// Attention. + /// + /// ```markdown + /// > | a *b* c **d**. + /// ^^^ ^^^^^ + /// ``` + pub attention: bool, + /// Autolink. + /// + /// ```markdown + /// > | a b . + /// ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^ + /// ``` + pub autolink: bool, + /// Block quote. + /// + /// ```markdown + /// > | > a + /// ^^^ + /// ``` + pub block_quote: bool, + /// Character escape. + /// + /// ```markdown + /// > | a \* b + /// ^^ + /// ``` + pub character_escape: bool, + /// Character reference. + /// + /// ```markdown + /// > | a & b + /// ^^^^^ + /// ``` + pub character_reference: bool, + /// Code (indented). + /// + /// ```markdown + /// > | a + /// ^^^^^ + /// ``` + pub code_indented: bool, + /// Code (fenced). + /// + /// ```markdown + /// > | ~~~js + /// ^^^^^ + /// > | console.log(1) + /// ^^^^^^^^^^^^^^ + /// > | ~~~ + /// ^^^ + /// ``` + pub code_fenced: bool, + /// Code (text). + /// + /// ```markdown + /// > | a `b` c + /// ^^^ + /// ``` + pub code_text: bool, + /// Definition. + /// + /// ```markdown + /// > | [a]: b "c" + /// ^^^^^^^^^^ + /// ``` + pub definition: bool, + /// Frontmatter. + /// + /// ````markdown + /// > | --- + /// ^^^ + /// > | title: Neptune + /// ^^^^^^^^^^^^^^ + /// > | --- + /// ^^^ + /// ```` + pub frontmatter: bool, + /// GFM: autolink literal. + /// + /// ```markdown + /// > | https://example.com + /// ^^^^^^^^^^^^^^^^^^^ + /// ``` + pub gfm_autolink_literal: bool, + /// GFM: footnote definition. + /// + /// ```markdown + /// > | [^a]: b + /// ^^^^^^^ + /// ``` + pub gfm_footnote_definition: bool, + /// GFM: footnote label start. + /// + /// ```markdown + /// > | a[^b] + /// ^^ + /// ``` + pub gfm_label_start_footnote: bool, + /// + /// ```markdown + /// > | a ~b~ c. + /// ^^^ + /// ``` + pub gfm_strikethrough: bool, + /// GFM: table. + /// + /// ```markdown + /// > | | a | + /// ^^^^^ + /// > | | - | + /// ^^^^^ + /// > | | b | + /// ^^^^^ + /// ``` + pub gfm_table: bool, + /// GFM: task list item. + /// + /// ```markdown + /// > | * [x] y. + /// ^^^ + /// ``` + pub gfm_task_list_item: bool, + /// Hard break (escape). + /// + /// ```markdown + /// > | a\ + /// ^ + /// | b + /// ``` + pub hard_break_escape: bool, + /// Hard break (trailing). + /// + /// ```markdown + /// > | a␠␠ + /// ^^ + /// | b + /// ``` + pub hard_break_trailing: bool, + /// Heading (atx). + /// + /// ```markdown + /// > | # a + /// ^^^ + /// ``` + pub heading_atx: bool, + /// Heading (setext). + /// + /// ```markdown + /// > | a + /// ^^ + /// > | == + /// ^^ + /// ``` + pub heading_setext: bool, + /// HTML (flow). + /// + /// ```markdown + /// > |
+ /// ^^^^^ + /// ``` + pub html_flow: bool, + /// HTML (text). + /// + /// ```markdown + /// > | a c + /// ^^^ + /// ``` + pub html_text: bool, + /// Label start (image). + /// + /// ```markdown + /// > | a ![b](c) d + /// ^^ + /// ``` + pub label_start_image: bool, + /// Label start (link). + /// + /// ```markdown + /// > | a [b](c) d + /// ^ + /// ``` + pub label_start_link: bool, + /// Label end. + /// + /// ```markdown + /// > | a [b](c) d + /// ^^^^ + /// ``` + pub label_end: bool, + /// List items. + /// + /// ```markdown + /// > | * a + /// ^^^ + /// ``` + pub list_item: bool, + /// Math (flow). + /// + /// ```markdown + /// > | $$ + /// ^^ + /// > | \frac{1}{2} + /// ^^^^^^^^^^^ + /// > | $$ + /// ^^ + /// ``` + pub math_flow: bool, + /// Math (text). + /// + /// ```markdown + /// > | a $b$ c + /// ^^^ + /// ``` + pub math_text: bool, + /// MDX: ESM. + /// + /// ```markdown + /// > | import a from 'b' + /// ^^^^^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: to support ESM, you *must* pass + /// > [`mdx_esm_parse`][MdxEsmParse] in [`ParseOptions`][] too. + /// > Otherwise, ESM is treated as normal markdown. + pub mdx_esm: bool, + /// MDX: expression (flow). + /// + /// ```markdown + /// > | {Math.PI} + /// ^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: You *can* pass + /// > [`mdx_expression_parse`][MdxExpressionParse] in [`ParseOptions`][] + /// > too, to parse expressions according to a certain grammar (typically, + /// > a programming language). + /// > Otherwise, expressions are parsed with a basic algorithm that only + /// > cares about braces. + pub mdx_expression_flow: bool, + /// MDX: expression (text). + /// + /// ```markdown + /// > | a {Math.PI} c + /// ^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: You *can* pass + /// > [`mdx_expression_parse`][MdxExpressionParse] in [`ParseOptions`][] + /// > too, to parse expressions according to a certain grammar (typically, + /// > a programming language). + /// > Otherwise, expressions are parsed with a basic algorithm that only + /// > cares about braces. + pub mdx_expression_text: bool, + /// MDX: JSX (flow). + /// + /// ```markdown + /// > | + /// ^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: You *must* pass `html_flow: false` to use this, + /// > as it’s preferred when on over `mdx_jsx_flow`. + /// + /// > 👉 **Note**: You *can* pass + /// > [`mdx_expression_parse`][MdxExpressionParse] in [`ParseOptions`][] + /// > too, to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). + /// > Otherwise, expressions are parsed with a basic algorithm that only + /// > cares about braces. + pub mdx_jsx_flow: bool, + /// MDX: JSX (text). + /// + /// ```markdown + /// > | a c + /// ^^^^^^^^^^^^^ + /// ``` + /// + /// > 👉 **Note**: You *must* pass `html_text: false` to use this, + /// > as it’s preferred when on over `mdx_jsx_text`. + /// + /// > 👉 **Note**: You *can* pass + /// > [`mdx_expression_parse`][MdxExpressionParse] in [`ParseOptions`][] + /// > too, to parse expressions in JSX according to a certain grammar + /// > (typically, a programming language). + /// > Otherwise, expressions are parsed with a basic algorithm that only + /// > cares about braces. + pub mdx_jsx_text: bool, + /// Thematic break. + /// + /// ```markdown + /// > | *** + /// ^^^ + /// ``` + pub thematic_break: bool, +} + +impl Default for Constructs { + /// `CommonMark`. + /// + /// `CommonMark` is a relatively strong specification of how markdown + /// works. + /// Most markdown parsers try to follow it. + /// + /// For more information, see the `CommonMark` specification: + /// . + fn default() -> Self { + Self { + attention: true, + autolink: true, + block_quote: true, + character_escape: true, + character_reference: true, + code_indented: true, + code_fenced: true, + code_text: true, + definition: true, + frontmatter: false, + gfm_autolink_literal: false, + gfm_label_start_footnote: false, + gfm_footnote_definition: false, + gfm_strikethrough: false, + gfm_table: false, + gfm_task_list_item: false, + hard_break_escape: true, + hard_break_trailing: true, + heading_atx: true, + heading_setext: true, + html_flow: true, + html_text: true, + label_start_image: true, + label_start_link: true, + label_end: true, + list_item: true, + math_flow: false, + math_text: false, + mdx_esm: false, + mdx_expression_flow: false, + mdx_expression_text: false, + mdx_jsx_flow: false, + mdx_jsx_text: false, + thematic_break: true, + } + } +} + +impl Constructs { + /// GFM. + /// + /// GFM stands for **GitHub flavored markdown**. + /// GFM extends `CommonMark` and adds support for autolink literals, + /// footnotes, strikethrough, tables, and tasklists. + /// + /// For more information, see the GFM specification: + /// . + pub fn gfm() -> Self { + Self { + gfm_autolink_literal: true, + gfm_footnote_definition: true, + gfm_label_start_footnote: true, + gfm_strikethrough: true, + gfm_table: true, + gfm_task_list_item: true, + ..Self::default() + } + } + + /// MDX. + /// + /// This turns on `CommonMark`, turns off some conflicting constructs + /// (autolinks, code (indented), and HTML), and turns on MDX (ESM, + /// expressions, and JSX). + /// + /// For more information, see the MDX website: + /// . + /// + /// > 👉 **Note**: to support ESM, you *must* pass + /// > [`mdx_esm_parse`][MdxEsmParse] in [`ParseOptions`][] too. + /// > Otherwise, ESM is treated as normal markdown. + /// > + /// > You *can* pass + /// > [`mdx_expression_parse`][MdxExpressionParse] + /// > to parse expressions according to a certain grammar (typically, a + /// > programming language). + /// > Otherwise, expressions are parsed with a basic algorithm that only + /// > cares about braces. + pub fn mdx() -> Self { + Self { + autolink: false, + code_indented: false, + html_flow: false, + html_text: false, + mdx_esm: true, + mdx_expression_flow: true, + mdx_expression_text: true, + mdx_jsx_flow: true, + mdx_jsx_text: true, + ..Self::default() + } + } +} + +/// Configuration that describes how to compile to HTML. +/// +/// You likely either want to turn on the dangerous options +/// (`allow_dangerous_html`, `allow_dangerous_protocol`) when dealing with +/// input you trust, or want to customize how GFM footnotes are compiled +/// (typically because the input markdown is not in English). +/// +/// ## Examples +/// +/// ``` +/// use markdown::CompileOptions; +/// # fn main() { +/// +/// // Use the default trait to get safe defaults: +/// let safe = CompileOptions::default(); +/// +/// // Live dangerously / trust the author: +/// let danger = CompileOptions { +/// allow_dangerous_html: true, +/// allow_dangerous_protocol: true, +/// ..CompileOptions::default() +/// }; +/// +/// // In French: +/// let enFrançais = CompileOptions { +/// gfm_footnote_label: Some("Notes de bas de page".into()), +/// gfm_footnote_back_label: Some("Arrière".into()), +/// ..CompileOptions::default() +/// }; +/// # } +/// ``` +#[allow(clippy::struct_excessive_bools)] +#[derive(Clone, Debug, Default)] +pub struct CompileOptions { + /// Whether to allow (dangerous) HTML. + /// + /// The default is `false`, which still parses the HTML according to + /// `CommonMark` but shows the HTML as text instead of as elements. + /// + /// Pass `true` for trusted content to get actual HTML elements. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html, to_html_with_options, CompileOptions, Options}; + /// # fn main() -> Result<(), String> { + /// + /// // `markdown-rs` is safe by default: + /// assert_eq!( + /// to_html("Hi, venus!"), + /// "

Hi, <i>venus</i>!

" + /// ); + /// + /// // Turn `allow_dangerous_html` on to allow potentially dangerous HTML: + /// assert_eq!( + /// to_html_with_options( + /// "Hi, venus!", + /// &Options { + /// compile: CompileOptions { + /// allow_dangerous_html: true, + /// ..CompileOptions::default() + /// }, + /// ..Options::default() + /// } + /// )?, + /// "

Hi, venus!

" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub allow_dangerous_html: bool, + + /// Whether to allow dangerous protocols in links and images. + /// + /// The default is `false`, which drops URLs in links and images that use + /// dangerous protocols. + /// + /// Pass `true` for trusted content to support all protocols. + /// + /// URLs that have no protocol (which means it’s relative to the current + /// page, such as `./some/page.html`) and URLs that have a safe protocol + /// (for images: `http`, `https`; for links: `http`, `https`, `irc`, + /// `ircs`, `mailto`, `xmpp`), are safe. + /// All other URLs are dangerous and dropped. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html, to_html_with_options, CompileOptions, Options}; + /// # fn main() -> Result<(), String> { + /// + /// // `markdown-rs` is safe by default: + /// assert_eq!( + /// to_html(""), + /// "

javascript:alert(1)

" + /// ); + /// + /// // Turn `allow_dangerous_protocol` on to allow potentially dangerous protocols: + /// assert_eq!( + /// to_html_with_options( + /// "", + /// &Options { + /// compile: CompileOptions { + /// allow_dangerous_protocol: true, + /// ..CompileOptions::default() + /// }, + /// ..Options::default() + /// } + /// )?, + /// "

javascript:alert(1)

" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub allow_dangerous_protocol: bool, + + /// Default line ending to use when compiling to HTML, for line endings not + /// in `value`. + /// + /// Generally, `markdown-rs` copies line endings (`\r`, `\n`, `\r\n`) in + /// the markdown document over to the compiled HTML. + /// In some cases, such as `> a`, CommonMark requires that extra line + /// endings are added: `
\n

a

\n
`. + /// + /// To create that line ending, the document is checked for the first line + /// ending that is used. + /// If there is no line ending, `default_line_ending` is used. + /// If that isn’t configured, `\n` is used. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html, to_html_with_options, CompileOptions, LineEnding, Options}; + /// # fn main() -> Result<(), String> { + /// + /// // `markdown-rs` uses `\n` by default: + /// assert_eq!( + /// to_html("> a"), + /// "
\n

a

\n
" + /// ); + /// + /// // Define `default_line_ending` to configure the default: + /// assert_eq!( + /// to_html_with_options( + /// "> a", + /// &Options { + /// compile: CompileOptions { + /// default_line_ending: LineEnding::CarriageReturnLineFeed, + /// ..CompileOptions::default() + /// }, + /// ..Options::default() + /// } + /// )?, + /// "
\r\n

a

\r\n
" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub default_line_ending: LineEnding, + + /// Textual label to use for the footnotes section. + /// + /// The default value is `"Footnotes"`. + /// Change it when the markdown is not in English. + /// + /// This label is typically hidden visually (assuming a `sr-only` CSS class + /// is defined that does that), and thus affects screen readers only. + /// If you do have such a class, but want to show this section to everyone, + /// pass different attributes with the `gfm_footnote_label_attributes` + /// option. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // `"Footnotes"` is used by default: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options::gfm() + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// + /// // Pass `gfm_footnote_label` to use something else: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// parse: ParseOptions::gfm(), + /// compile: CompileOptions { + /// gfm_footnote_label: Some("Notes de bas de page".into()), + /// ..CompileOptions::gfm() + /// } + /// } + /// )?, + /// "

1

\n

Notes de bas de page

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub gfm_footnote_label: Option, + + /// HTML tag name to use for the footnote label element. + /// + /// The default value is `"h2"`. + /// Change it to match your document structure. + /// + /// This label is typically hidden visually (assuming a `sr-only` CSS class + /// is defined that does that), and thus affects screen readers only. + /// If you do have such a class, but want to show this section to everyone, + /// pass different attributes with the `gfm_footnote_label_attributes` + /// option. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // `"h2"` is used by default: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options::gfm() + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// + /// // Pass `gfm_footnote_label_tag_name` to use something else: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// parse: ParseOptions::gfm(), + /// compile: CompileOptions { + /// gfm_footnote_label_tag_name: Some("h1".into()), + /// ..CompileOptions::gfm() + /// } + /// } + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub gfm_footnote_label_tag_name: Option, + + /// Attributes to use on the footnote label. + /// + /// The default value is `"class=\"sr-only\""`. + /// Change it to show the label and add other attributes. + /// + /// This label is typically hidden visually (assuming a `sr-only` CSS class + /// is defined that does that), and thus affects screen readers only. + /// If you do have such a class, but want to show this section to everyone, + /// pass an empty string. + /// You can also add different attributes. + /// + /// > 👉 **Note**: `id="footnote-label"` is always added, because footnote + /// > calls use it with `aria-describedby` to provide an accessible label. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // `"class=\"sr-only\""` is used by default: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options::gfm() + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// + /// // Pass `gfm_footnote_label_attributes` to use something else: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// parse: ParseOptions::gfm(), + /// compile: CompileOptions { + /// gfm_footnote_label_attributes: Some("class=\"footnote-heading\"".into()), + /// ..CompileOptions::gfm() + /// } + /// } + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub gfm_footnote_label_attributes: Option, + + /// Textual label to describe the backreference back to footnote calls. + /// + /// The default value is `"Back to content"`. + /// Change it when the markdown is not in English. + /// + /// This label is used in the `aria-label` attribute on each backreference + /// (the `↩` links). + /// It affects users of assistive technology. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // `"Back to content"` is used by default: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options::gfm() + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// + /// // Pass `gfm_footnote_back_label` to use something else: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// parse: ParseOptions::gfm(), + /// compile: CompileOptions { + /// gfm_footnote_back_label: Some("Arrière".into()), + /// ..CompileOptions::gfm() + /// } + /// } + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub gfm_footnote_back_label: Option, + + /// Prefix to use before the `id` attribute on footnotes to prevent them + /// from *clobbering*. + /// + /// The default is `"user-content-"`. + /// Pass `Some("".into())` for trusted markdown and when you are careful + /// with polyfilling. + /// You could pass a different prefix. + /// + /// DOM clobbering is this: + /// + /// ```html + ///

+ /// + /// ``` + /// + /// The above example shows that elements are made available by browsers, + /// by their ID, on the `window` object. + /// This is a security risk because you might be expecting some other + /// variable at that place. + /// It can also break polyfills. + /// Using a prefix solves these problems. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // `"user-content-"` is used by default: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options::gfm() + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// + /// // Pass `gfm_footnote_clobber_prefix` to use something else: + /// assert_eq!( + /// to_html_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// parse: ParseOptions::gfm(), + /// compile: CompileOptions { + /// gfm_footnote_clobber_prefix: Some("".into()), + /// ..CompileOptions::gfm() + /// } + /// } + /// )?, + /// "

1

\n

Footnotes

\n
    \n
  1. \n

    b

    \n
  2. \n
\n
\n" + /// ); + /// # Ok(()) + /// # } + /// ``` + pub gfm_footnote_clobber_prefix: Option, + + /// Whether to support the GFM tagfilter. + /// + /// This option does nothing if `allow_dangerous_html` is not turned on. + /// The default is `false`, which does not apply the GFM tagfilter to HTML. + /// Pass `true` for output that is a bit closer to GitHub’s actual output. + /// + /// The tagfilter is kinda weird and kinda useless. + /// The tag filter is a naïve attempt at XSS protection. + /// You should use a proper HTML sanitizing algorithm instead. + /// + /// ## Examples + /// + /// ``` + /// use markdown::{to_html_with_options, CompileOptions, Options, ParseOptions}; + /// # fn main() -> Result<(), String> { + /// + /// // With `allow_dangerous_html`, `markdown-rs` passes HTML through untouched: + /// assert_eq!( + /// to_html_with_options( + /// "