How to use the markdown.preprocessors.Preprocessor function in Markdown

To help you get started, we’ve selected a few Markdown examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dragondjf / QMarkdowner / markdown / preprocessors.py View on Github external
pass


class NormalizeWhitespace(Preprocessor):
    """ Normalize whitespace for consistant parsing. """

    def run(self, lines):
        source = '\n'.join(lines)
        source = source.replace(util.STX, "").replace(util.ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = source.expandtabs(self.markdown.tab_length)
        source = re.sub(r'(?<=\n) +\n', '\n', source)
        return source.split('\n')


class HtmlBlockPreprocessor(Preprocessor):
    """Remove html blocks from the text and store them for later retrieval."""

    right_tag_patterns = ["", "%s>"]
    attrs_pattern = r"""
        \s+(?P[^&gt;"'/= ]+)=(?P<q>['"])(?P.*?)(?P=q)   # attr="value"
        |                                                         # OR 
        \s+(?P[^&gt;"'/= ]+)=(?P[^&gt; ]+)               # attr=value
        |                                                         # OR
        \s+(?P[^&gt;"'/= ]+)                                  # attr
        """
    left_tag_pattern = r'^\&lt;(?P[^&gt; ]+)(?P(%s)*)\s*\/?\&gt;?' % attrs_pattern
    attrs_re = re.compile(attrs_pattern, re.VERBOSE)
    left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
    markdown_in_raw = False

    def _get_left_tag(self, block):</q>
github facelessuser / pymdown-extensions / pymdownx / _bypassnorm.py View on Github external
SOH = '\u0001'  # start
EOT = '\u0004'  # end


class PreNormalizePreprocessor(Preprocessor):
    """Preprocessor to remove workaround symbols."""

    def run(self, lines):
        """Remove workaround placeholder markers before adding actual workaround placeholders."""

        source = '\n'.join(lines)
        source = source.replace(SOH, '').replace(EOT, '')
        return source.split('\n')


class PostNormalizePreprocessor(Preprocessor):
    """Preprocessor to clean up normalization bypass hack."""

    def run(self, lines):
        """Convert alternate placeholder symbols to actual placeholder symbols."""

        source = '\n'.join(lines)
        source = source.replace(SOH, STX).replace(EOT, ETX)
        return source.split('\n')


class BypassNormExtension(Extension):
    """Bypass whitespace normalization."""

    def __init__(self, *args, **kwargs):
        """Initialize."""
github airbnb / knowledge-repo / knowledge_repo / converters / html.py View on Github external
# Generate HTML element for new span
            el = etree.Element('span')
            el.text = text
            if id:
                el.attrib['id'] = id
            if class_names:
                el.attrib['class'] = " ".join(class_names)
            return el

    def extendMarkdown(self, md, md_globals):
        span_matcher = self.SpanMatchHandler(self.SPAN_PATTERN)
        md.inlinePatterns['inline_span'] = span_matcher


class IndentsAsCellOutputPreprocessor(Preprocessor):
    """
    Ensure all indented blocks are followed by a blank line to allow html
    preprocessors to extract html elements (like scripts) properly.
    """

    def run(self, lines):
        in_block = False
        block_startable = True
        for i, line in enumerate(lines):
            if not line.startswith(' ' * self.markdown.tab_length):
                if in_block:
                    if line != "":
                        lines.insert(i, "")
                    in_block = False
                else:
                    block_startable = True if line == "" else False
github PageBot / PageBot / Lib / pagebot / contributions / markdown / footnotes.py View on Github external
(self.footnotes.index(identifier)+1)
            )
            backlink.text = FN_BACKLINK_TEXT

            if list(li):
                node = li[-1]
                if node.tag == "p":
                    node.text = node.text + NBSP_PLACEHOLDER
                    node.append(backlink)
                else:
                    p = etree.SubElement(li, "p")
                    p.append(backlink)
        return div


class FootnotePreprocessor(Preprocessor):
    """ Find all footnote references and store for later use. """

    def __init__(self, footnotes):
        super().__init__()
        self.footnotes = footnotes

    def run(self, lines):
        """
        Loop through lines and find, set, and remove footnote definitions.

        Keywords:

        * lines: A list of lines of text

        Return: A list of lines of text with footnote definitions removed.
github django-wiki / django-wiki / src / wiki / core / markdown / mdx / codehilite.py View on Github external
code,
        linenums=config['linenums'],
        guess_lang=config['guess_lang'],
        css_class=config['css_class'],
        style=config['pygments_style'],
        noclasses=config['noclasses'],
        tab_length=tab_length,
        use_pygments=config['use_pygments'],
        lang=lang,
    )
    html = code.hilite()
    html = """<div class="codehilite-wrap">{}</div>""".format(html)
    return html


class WikiFencedBlockPreprocessor(Preprocessor):
    """
    This is a replacement of markdown.extensions.fenced_code which will
    directly and without configuration options invoke the vanilla CodeHilite
    extension.
    """
    FENCED_BLOCK_RE = re.compile(r'''
(?P^(?:~{3,}|`{3,}))[ ]*         # Opening ``` or ~~~
(\{?\.?(?P[a-zA-Z0-9_+-]*))?[ ]*  # Optional {, and lang
# Optional highlight lines, single- or double-quote-delimited
(hl_lines=(?P"|')(?P.*?)(?P=quot))?[ ]*
}?[ ]*\n                                # Optional closing }
(?P<code>.*?)(?&lt;=\n)
(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)
    CODE_WRAP = '<pre>%s</pre>'

    def __init__(self, md):</code>
github rafalp / Misago / misago / markdown / extensions / quotes.py View on Github external
# Global vars
QUOTE_AUTHOR_RE = re.compile(r'^(?P(&gt;|\s)+)?@(?P(\w|\d)+)$')

class QuoteTitlesExtension(markdown.Extension):
    def extendMarkdown(self, md):
        md.registerExtension(self)
        md.preprocessors.add('mi_quote_title',
                             QuoteTitlesPreprocessor(md),
                             '&gt;fenced_code_block')
        md.postprocessors.add('mi_quote_title',
                              QuoteTitlesPostprocessor(md),
                              '_end')


class QuoteTitlesPreprocessor(markdown.preprocessors.Preprocessor):
    def __init__(self, md):
        markdown.preprocessors.Preprocessor.__init__(self, md)

    def run(self, lines):
        clean = []
        for l, line in enumerate(lines):
            try:
                if line.strip():
                    at_match = QUOTE_AUTHOR_RE.match(line.strip())
                    if at_match and lines[l + 1].strip()[0] == '&gt;':
                        username = '&lt;%(token)s:quotetitle&gt;@%(name)s' % {'token': self.markdown.mi_token, 'name': at_match.group('username')}
                        if at_match.group('arrows'):
                            clean.append('&gt; %s%s' % (at_match.group('arrows'), username))
                        else:
                            clean.append('&gt; %s' % username)
                    else:
github jpfleury / gedit-markdown / plugins / markdown-preview / markdown / extensions / abbr.py View on Github external
import markdown, re
from markdown import etree

# Global Vars
ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')

class AbbrExtension(markdown.Extension):
    """ Abbreviation Extension for Python-Markdown. """

    def extendMarkdown(self, md, md_globals):
        """ Insert AbbrPreprocessor before ReferencePreprocessor. """
        md.preprocessors.add('abbr', AbbrPreprocessor(md), '&lt;reference')
        
           
class AbbrPreprocessor(markdown.preprocessors.Preprocessor):
    """ Abbreviation Preprocessor - parse text for abbr references. """

    def run(self, lines):
        '''
        Find and remove all Abbreviation references from the text.
        Each reference is set as a new AbbrPattern in the markdown instance.
        
        '''
        new_text = []
        for line in lines:
            m = ABBR_REF_RE.match(line)
            if m:
                abbr = m.group('abbr').strip()
                title = m.group('title').strip()
                self.markdown.inlinePatterns['abbr-%s'%abbr] = \
                    AbbrPattern(self._generate_pattern(abbr), title)</title></abbr>
github aishack / aishack / 3rdparty / markdown / extensions / latex.py View on Github external
import markdown


from subprocess import call, PIPE


# Defines our basic inline image
IMG_EXPR = "<img src="data:image/png;base64,%s" id="%s" alt="%s" class="latex-inline math-%s">"


# Base CSS template
IMG_CSS = "<style>img.latex-inline { vertical-align: middle; }</style>\n"


class LaTeXPreprocessor(markdown.preprocessors.Preprocessor):
    # These are our cached expressions that are stored in latex.cache
    cached = {}

    # Basic LaTex Setup as well as our list of expressions to parse
    tex_preamble = r"""\documentclass{article}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{bm}
\usepackage[usenames,dvipsnames]{color}
\pagestyle{empty}
"""

    def __init__(self, configs):
        try:
            cache_file = open('latex.cache', 'r+')
github django-wiki / django-wiki / src / wiki / plugins / macros / mdx / macro.py View on Github external
KWARG_RE = re.compile(
    r'\s*(?P\w+)(:(?P([^\']+|%s)))?' %
    re_sq_short,
    re.IGNORECASE)


class MacroExtension(markdown.Extension):

    """ Macro plugin markdown extension for django-wiki. """

    def extendMarkdown(self, md, md_globals):
        """ Insert MacroPreprocessor before ReferencePreprocessor. """
        md.preprocessors.add('dw-macros', MacroPreprocessor(md), '&gt;html_block')


class MacroPreprocessor(markdown.preprocessors.Preprocessor):

    """django-wiki macro preprocessor - parse text for various [some_macro] and
    [some_macro (kw:arg)*] references. """

    def run(self, lines):
        # Look at all those indentations.
        # That's insane, let's get a helper library
        # Please note that this pattern is also in plugins.images
        new_text = []
        for line in lines:
            m = MACRO_RE.match(line)
            if m:
                macro = m.group('macro').strip()
                if macro in settings.METHODS and hasattr(self, macro):
                    kwargs = m.group('kwargs')
                    if kwargs:
github dellsystem / wikinotes / mdx / mdx_wiki_fenced_code.py View on Github external
)
CODE_WRAP = '<pre>%s</pre>'
LANG_TAG = ' class="%s"'

class FencedCodeExtension(markdown.Extension):

    def extendMarkdown(self, md, md_globals):
        """ Add FencedBlockPreprocessor to the Markdown instance. """
        md.registerExtension(self)

        md.preprocessors.add('fenced_code_block',
                                 FencedBlockPreprocessor(md),
                                 "_begin")


class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):

    def __init__(self, md):
        markdown.preprocessors.Preprocessor.__init__(self, md)

        self.checked_for_codehilite = False
        self.codehilite_conf = {}

    def run(self, lines):
        """ Match and store Fenced Code Blocks in the HtmlStash. """

        # Check for code hilite extension
        if not self.checked_for_codehilite:
            for ext in self.markdown.registeredExtensions:
                if isinstance(ext, CodeHiliteExtension):
                    self.codehilite_conf = ext.config
                    break