svv: svv/markup.py@13b5dd3a7a5f

from markdown import *

# root tag
DOC_TAG = 'root'

class Markup (object) :
    """
        Custom implementation of markdown.Markdown, that supports direct etree access, and has a more limited set of output element types.

        <root> :
            <p> :
                text

            <h1>/<h2>/<h3>/.. :
                text

            <ul>/<ol> :
                <li> :
                    text/<p>
                    <p>
                    ...
         
        text :
            Currently no inline markup yet, just pure text
    """

    def __init__ (self) :
        """
            Setup parser.
        """

        ## Block parsing
        self.parser = blockparser.BlockParser()

        # internal block parsing, doesn't generate any elements
        self.parser.blockprocessors['empty'] = blockprocessors.EmptyBlockProcessor(self.parser)

        # nested ol/ul and li
        self.parser.blockprocessors['indent'] = blockprocessors.ListIndentProcessor(self.parser)

        # h1,h2,h3 etc
        self.parser.blockprocessors['hashheader'] = blockprocessors.HashHeaderProcessor(self.parser)
        self.parser.blockprocessors['setextheader'] = blockprocessors.SetextHeaderProcessor(self.parser)

        # ol/ul
        self.parser.blockprocessors['olist'] = blockprocessors.OListProcessor(self.parser)
        self.parser.blockprocessors['ulist'] = blockprocessors.UListProcessor(self.parser)
        
        # remaining things as paragraphs
        self.parser.blockprocessors['paragraph'] = blockprocessors.ParagraphProcessor(self.parser)
        
        ## Inline patterns
        self.inlinePatterns = odict.OrderedDict()

        # XXX: none for now

        ## Tree processors
        self.treeprocessors = odict.OrderedDict()
        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
        
        # No postprocessors; we don't generate HTML

    def _normalize_input (self, source) :
        """
            Normalize given input before processing..
        """
        

        source = source.replace(STX, "").replace(ETX, "")
        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
        source = re.sub(r'\n\s+\n', '\n\n', source)
        source = source.expandtabs(TAB_LENGTH)
        
        return source

    def parse (self, text) :
        """
            Parse the given plaintext markup, yielding an etree.Element(DOC_TAG)

                text        - the unicode input
        """
        
        # normalize
        text = self._normalize_input(text)

        # as lines
        lines = text.split("\n")

        # parse
        root = self.parser.parseDocument(lines).getroot()

        # process tree
        for treeprocessor in self.treeprocessors.values() :
            newRoot = treeprocessor.run(root)

            if newRoot :
                root = newRoot

        # fix up the root
        root.tag = DOC_TAG
        
        # ok
        return root
author	Tero Marttila <terom@fixme.fi>
	Fri, 07 Jan 2011 03:19:37 +0200
changeset 35	13b5dd3a7a5f
parent 30	97d5d37333d2
permissions	-rw-r--r--