markup: Implement a Markup class using python-markdown to parse a simplified variant of markdown into a document tree
authorTero Marttila <terom@fixme.fi>
Fri, 07 Jan 2011 01:22:52 +0200
changeset 30 97d5d37333d2
parent 29 9c7ddcaa2e90
child 31 e1b63e4d10f4
markup: Implement a Markup class using python-markdown to parse a simplified variant of markdown into a document tree
svv/markup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/svv/markup.py	Fri Jan 07 01:22:52 2011 +0200
@@ -0,0 +1,105 @@
+from markdown import *
+
+# root tag
+DOC_TAG = 'root'
+
+class Markup (object) :
+    """
+        Custom implementation of markdown.Markdown, that supports direct etree access, and has a more limited set of output element types.
+
+        <root> :
+            <p> :
+                text
+
+            <h1>/<h2>/<h3>/.. :
+                text
+
+            <ul>/<ol> :
+                <li> :
+                    text/<p>
+                    <p>
+                    ...
+         
+        text :
+            Currently no inline markup yet, just pure text
+    """
+
+    def __init__ (self) :
+        """
+            Setup parser.
+        """
+
+        ## Block parsing
+        self.parser = blockparser.BlockParser()
+
+        # internal block parsing, doesn't generate any elements
+        self.parser.blockprocessors['empty'] = blockprocessors.EmptyBlockProcessor(self.parser)
+
+        # nested ol/ul and li
+        self.parser.blockprocessors['indent'] = blockprocessors.ListIndentProcessor(self.parser)
+
+        # h1,h2,h3 etc
+        self.parser.blockprocessors['hashheader'] = blockprocessors.HashHeaderProcessor(self.parser)
+        self.parser.blockprocessors['setextheader'] = blockprocessors.SetextHeaderProcessor(self.parser)
+
+        # ol/ul
+        self.parser.blockprocessors['olist'] = blockprocessors.OListProcessor(self.parser)
+        self.parser.blockprocessors['ulist'] = blockprocessors.UListProcessor(self.parser)
+        
+        # remaining things as paragraphs
+        self.parser.blockprocessors['paragraph'] = blockprocessors.ParagraphProcessor(self.parser)
+        
+        ## Inline patterns
+        self.inlinePatterns = odict.OrderedDict()
+
+        # XXX: none for now
+
+        ## Tree processors
+        self.treeprocessors = odict.OrderedDict()
+        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
+        
+        # No postprocessors; we don't generate HTML
+
+    def _normalize_input (self, source) :
+        """
+            Normalize given input before processing..
+        """
+        
+
+        source = source.replace(STX, "").replace(ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        source = re.sub(r'\n\s+\n', '\n\n', source)
+        source = source.expandtabs(TAB_LENGTH)
+        
+        return source
+
+    def parse (self, text) :
+        """
+            Parse the given plaintext markup, yielding an etree.Element(DOC_TAG)
+
+                text        - the unicode input
+        """
+        
+        # normalize
+        text = self._normalize_input(text)
+
+        # as lines
+        lines = text.split("\n")
+
+        # parse
+        root = self.parser.parseDocument(lines).getroot()
+
+        # process tree
+        for treeprocessor in self.treeprocessors.values() :
+            newRoot = treeprocessor.run(root)
+
+            if newRoot :
+                root = newRoot
+
+        # fix up the root
+        root.tag = DOC_TAG
+        
+        # ok
+        return root
+
+