aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/code_fenced.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 13:29:10 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-26 13:29:40 +0200
commit670f1d82e01ea2394b21d7d1857f41bdc67b3fce (patch)
treed38fd96745b443dc5ece52c771fa5e39653937c4 /src/construct/code_fenced.rs
parentf41688c067be261279804b8ab3e04cd5d67f492f (diff)
downloadmarkdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.gz
markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.tar.bz2
markdown-rs-670f1d82e01ea2394b21d7d1857f41bdc67b3fce.zip
Add support for math (flow)
Diffstat (limited to 'src/construct/code_fenced.rs')
-rw-r--r--src/construct/code_fenced.rs569
1 files changed, 0 insertions, 569 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
deleted file mode 100644
index edb2d93..0000000
--- a/src/construct/code_fenced.rs
+++ /dev/null
@@ -1,569 +0,0 @@
-//! Code (fenced) occurs in the [flow][] content type.
-//!
-//! ## Grammar
-//!
-//! Code (fenced) forms with the following BNF
-//! (<small>see [construct][crate::construct] for character groups</small>):
-//!
-//! ```bnf
-//! code_fenced ::= fence_open *( eol *byte ) [ eol fence_close ]
-//!
-//! fence_open ::= sequence [ 1*space_or_tab info [ 1*space_or_tab meta ] ] *space_or_tab
-//! ; Restriction: the number of markers in the closing fence sequence must be
-//! ; equal to or greater than the number of markers in the opening fence
-//! ; sequence.
-//! ; Restriction: the marker in the closing fence sequence must match the
-//! ; marker in the opening fence sequence
-//! fence_close ::= sequence *space_or_tab
-//! sequence ::= 3*'`' | 3*'~'
-//! ; Restriction: the `` ` `` character cannot occur in `info` if it is the marker.
-//! info ::= 1*text
-//! ; Restriction: the `` ` `` character cannot occur in `meta` if it is the marker.
-//! meta ::= 1*text *( *space_or_tab 1*text )
-//! ```
-//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
-//!
-//! The above grammar does not show how indentation (with `space_or_tab`) of
-//! each line is handled.
-//! To parse code (fenced), let `x` be the number of `space_or_tab` characters
-//! before the opening fence sequence.
-//! Each line of text is then allowed (not required) to be indented with up
-//! to `x` spaces or tabs, which are then ignored as an indent instead of being
-//! considered as part of the code.
-//! This indent does not affect the closing fence.
-//! It can be indented up to a separate 3 spaces or tabs.
-//! A bigger indent makes it part of the code instead of a fence.
-//!
-//! The `info` and `meta` parts are interpreted as the [string][] content type.
-//! That means that [character escapes][character_escape] and
-//! [character references][character_reference] are allowed.
-//!
-//! The optional `meta` part is ignored: it is not used when parsing or
-//! rendering.
-//!
-//! The optional `info` part is used and is expected to specify the programming
-//! language that the code is in.
-//! Which value it holds depends on what your syntax highlighter supports, if
-//! one is used.
-//!
-//! In markdown, it is also possible to use [code (text)][raw_text] in the
-//! [text][] content type.
-//! It is also possible to create code with the
-//! [code (indented)][code_indented] construct.
-//!
-//! ## HTML
-//!
-//! Code (fenced) relates to both the `<pre>` and the `<code>` elements in
-//! HTML.
-//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
-//! element*][html_code] in the HTML spec for more info.
-//!
-//! The `info` is, when rendering to HTML, typically exposed as a class.
-//! This behavior stems from the HTML spec ([*§ 4.5.15 The `code`
-//! element*][html_code]).
-//! For example:
-//!
-//! ```markdown
-//! ~~~css
-//! * { color: tomato }
-//! ~~~
-//! ```
-//!
-//! Yields:
-//!
-//! ```html
-//! <pre><code class="language-css">* { color: tomato }
-//! </code></pre>
-//! ```
-//!
-//! ## Recommendation
-//!
-//! It is recommended to use code (fenced) instead of code (indented).
-//! Code (fenced) is more explicit, similar to code (text), and has support
-//! for specifying the programming language.
-//!
-//! ## Tokens
-//!
-//! * [`CodeFenced`][Name::CodeFenced]
-//! * [`CodeFencedFence`][Name::CodeFencedFence]
-//! * [`CodeFencedFenceInfo`][Name::CodeFencedFenceInfo]
-//! * [`CodeFencedFenceMeta`][Name::CodeFencedFenceMeta]
-//! * [`CodeFencedFenceSequence`][Name::CodeFencedFenceSequence]
-//! * [`CodeFlowChunk`][Name::CodeFlowChunk]
-//! * [`LineEnding`][Name::LineEnding]
-//! * [`SpaceOrTab`][Name::SpaceOrTab]
-//!
-//! ## References
-//!
-//! * [`code-fenced.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js)
-//! * [*§ 4.5 Fenced code blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#fenced-code-blocks)
-//!
-//! [flow]: crate::construct::flow
-//! [string]: crate::construct::string
-//! [text]: crate::construct::text
-//! [character_escape]: crate::construct::character_escape
-//! [character_reference]: crate::construct::character_reference
-//! [code_indented]: crate::construct::code_indented
-//! [raw_text]: crate::construct::raw_text
-//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
-
-use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::event::{Content, Link, Name};
-use crate::state::{Name as StateName, State};
-use crate::tokenizer::Tokenizer;
-use crate::util::{
- constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE},
- slice::{Position, Slice},
-};
-
-/// Start of fenced code.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn start(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.parse_state.options.constructs.code_fenced {
- if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.enter(Name::CodeFenced);
- tokenizer.enter(Name::CodeFencedFence);
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceOpen),
- State::Nok,
- );
- return State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.options.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ));
- }
-
- if matches!(tokenizer.current, Some(b'`' | b'~')) {
- tokenizer.enter(Name::CodeFenced);
- tokenizer.enter(Name::CodeFencedFence);
- return State::Retry(StateName::CodeFencedBeforeSequenceOpen);
- }
- }
-
- State::Nok
-}
-
-/// In opening fence, after prefix, at sequence.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
- let tail = tokenizer.events.last();
- let mut prefix = 0;
-
- if let Some(event) = tail {
- if event.name == Name::SpaceOrTab {
- prefix = Slice::from_position(
- tokenizer.parse_state.bytes,
- &Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1),
- )
- .len();
- }
- }
-
- if let Some(b'`' | b'~') = tokenizer.current {
- tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
- tokenizer.tokenize_state.size_c = prefix;
- tokenizer.enter(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedSequenceOpen)
- } else {
- State::Nok
- }
-}
-
-/// In opening fence sequence.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
- tokenizer.tokenize_state.size += 1;
- tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceOpen)
- } else if tokenizer.tokenize_state.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.exit(Name::CodeFencedFenceSequence);
- tokenizer.attempt(State::Next(StateName::CodeFencedInfoBefore), State::Nok);
- State::Retry(space_or_tab(tokenizer))
- } else {
- tokenizer.exit(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedInfoBefore)
- }
-}
-
-/// In opening fence, after the sequence (and optional whitespace), before info.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn info_before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFencedFence);
- // Do not form containers.
- tokenizer.concrete = true;
- tokenizer.check(
- State::Next(StateName::CodeFencedAtNonLazyBreak),
- State::Next(StateName::CodeFencedAfter),
- );
- State::Retry(StateName::NonLazyContinuationStart)
- }
- _ => {
- tokenizer.enter(Name::CodeFencedFenceInfo);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous: None,
- next: None,
- content: Content::String,
- },
- );
- State::Retry(StateName::CodeFencedInfo)
- }
- }
-}
-
-/// In info.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn info(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceInfo);
- State::Retry(StateName::CodeFencedInfoBefore)
- }
- Some(b'\t' | b' ') => {
- tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceInfo);
- tokenizer.attempt(State::Next(StateName::CodeFencedMetaBefore), State::Nok);
- State::Retry(space_or_tab(tokenizer))
- }
- Some(byte) => {
- if tokenizer.tokenize_state.marker == byte && byte == b'`' {
- tokenizer.concrete = false;
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- } else {
- tokenizer.consume();
- State::Next(StateName::CodeFencedInfo)
- }
- }
- }
-}
-
-/// In opening fence, after info and whitespace, before meta.
-///
-/// ```markdown
-/// > | ~~~js eval
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn meta_before(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => State::Retry(StateName::CodeFencedInfoBefore),
- _ => {
- tokenizer.enter(Name::CodeFencedFenceMeta);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous: None,
- next: None,
- content: Content::String,
- },
- );
- State::Retry(StateName::CodeFencedMeta)
- }
- }
-}
-
-/// In meta.
-///
-/// ```markdown
-/// > | ~~~js eval
-/// ^
-/// | console.log(1)
-/// | ~~~
-/// ```
-pub fn meta(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::Data);
- tokenizer.exit(Name::CodeFencedFenceMeta);
- State::Retry(StateName::CodeFencedInfoBefore)
- }
- Some(byte) => {
- if tokenizer.tokenize_state.marker == byte && byte == b'`' {
- tokenizer.concrete = false;
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- State::Nok
- } else {
- tokenizer.consume();
- State::Next(StateName::CodeFencedMeta)
- }
- }
- }
-}
-
-/// At eol/eof in code, before a non-lazy closing fence or content.
-///
-/// ```markdown
-/// > | ~~~js
-/// ^
-/// > | console.log(1)
-/// ^
-/// | ~~~
-/// ```
-pub fn at_non_lazy_break(tokenizer: &mut Tokenizer) -> State {
- tokenizer.attempt(
- State::Next(StateName::CodeFencedAfter),
- State::Next(StateName::CodeFencedContentBefore),
- );
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeFencedCloseStart)
-}
-
-/// Before closing fence, at optional whitespace.
-///
-/// ```markdown
-/// | ~~~js
-/// | console.log(1)
-/// > | ~~~
-/// ^
-/// ```
-pub fn close_start(tokenizer: &mut Tokenizer) -> State {
- tokenizer.enter(Name::CodeFencedFence);
-
- if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeSequenceClose),
- State::Nok,
- );
-
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- if tokenizer.parse_state.options.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- },
- ))
- } else {
- State::Retry(StateName::CodeFencedBeforeSequenceClose)
- }
-}
-
-/// In closing fence, after optional whitespace, at sequence.
-///
-/// ```markdown
-/// | ~~~js
-/// | console.log(1)
-/// > | ~~~
-/// ^
-/// ```
-pub fn before_sequence_close(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
- tokenizer.enter(Name::CodeFencedFenceSequence);
- State::Retry(StateName::CodeFencedSequenceClose)
- } else {
- State::Nok
- }
-}
-
-/// In closing fence sequence.
-///
-/// ```markdown
-/// | ~~~js
-/// | console.log(1)
-/// > | ~~~
-/// ^
-/// ```
-pub fn sequence_close(tokenizer: &mut Tokenizer) -> State {
- if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
- tokenizer.tokenize_state.size_b += 1;
- tokenizer.consume();
- State::Next(StateName::CodeFencedSequenceClose)
- } else if tokenizer.tokenize_state.size_b >= CODE_FENCED_SEQUENCE_SIZE_MIN
- && tokenizer.tokenize_state.size_b >= tokenizer.tokenize_state.size
- {
- tokenizer.tokenize_state.size_b = 0;
- tokenizer.exit(Name::CodeFencedFenceSequence);
-
- if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.attempt(
- State::Next(StateName::CodeFencedAfterSequenceClose),
- State::Nok,
- );
- State::Retry(space_or_tab(tokenizer))
- } else {
- State::Retry(StateName::CodeFencedAfterSequenceClose)
- }
- } else {
- tokenizer.tokenize_state.size_b = 0;
- State::Nok
- }
-}
-
-/// After closing fence sequence, after optional whitespace.
-///
-/// ```markdown
-/// | ~~~js
-/// | console.log(1)
-/// > | ~~~
-/// ^
-/// ```
-pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFencedFence);
- State::Ok
- }
- _ => State::Nok,
- }
-}
-
-/// Before closing fence, at eol.
-///
-/// ```markdown
-/// | ~~~js
-/// > | console.log(1)
-/// ^
-/// | ~~~
-/// ```
-pub fn content_before(tokenizer: &mut Tokenizer) -> State {
- tokenizer.enter(Name::LineEnding);
- tokenizer.consume();
- tokenizer.exit(Name::LineEnding);
- State::Next(StateName::CodeFencedContentStart)
-}
-
-/// Before code content, definitely not before a closing fence.
-///
-/// ```markdown
-/// | ~~~js
-/// > | console.log(1)
-/// ^
-/// | ~~~
-/// ```
-pub fn content_start(tokenizer: &mut Tokenizer) -> State {
- if matches!(tokenizer.current, Some(b'\t' | b' ')) {
- tokenizer.attempt(
- State::Next(StateName::CodeFencedBeforeContentChunk),
- State::Nok,
- );
- State::Retry(space_or_tab_min_max(
- tokenizer,
- 0,
- tokenizer.tokenize_state.size_c,
- ))
- } else {
- State::Retry(StateName::CodeFencedBeforeContentChunk)
- }
-}
-
-/// Before code content, after optional prefix.
-///
-/// ```markdown
-/// | ~~~js
-/// > | console.log(1)
-/// ^
-/// | ~~~
-/// ```
-pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.check(
- State::Next(StateName::CodeFencedAtNonLazyBreak),
- State::Next(StateName::CodeFencedAfter),
- );
- State::Retry(StateName::NonLazyContinuationStart)
- }
- _ => {
- tokenizer.enter(Name::CodeFlowChunk);
- State::Retry(StateName::CodeFencedContentChunk)
- }
- }
-}
-
-/// In code content.
-///
-/// ```markdown
-/// | ~~~js
-/// > | console.log(1)
-/// ^^^^^^^^^^^^^^
-/// | ~~~
-/// ```
-pub fn content_chunk(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => {
- tokenizer.exit(Name::CodeFlowChunk);
- State::Retry(StateName::CodeFencedBeforeContentChunk)
- }
- _ => {
- tokenizer.consume();
- State::Next(StateName::CodeFencedContentChunk)
- }
- }
-}
-
-/// After fenced code.
-///
-/// ```markdown
-/// | ~~~js
-/// | console.log(1)
-/// > | ~~~
-/// ^
-/// ```
-pub fn after(tokenizer: &mut Tokenizer) -> State {
- tokenizer.exit(Name::CodeFenced);
- tokenizer.tokenize_state.marker = 0;
- tokenizer.tokenize_state.size_c = 0;
- tokenizer.tokenize_state.size = 0;
- // Feel free to interrupt.
- tokenizer.interrupt = false;
- // No longer concrete.
- tokenizer.concrete = false;
- State::Ok
-}