markup: Implement a Markup class using python-markdown to parse a simplified variant of markdown into a document tree
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/svv/markup.py Fri Jan 07 01:22:52 2011 +0200
@@ -0,0 +1,105 @@
+from markdown import *
+
+# root tag
+DOC_TAG = 'root'
+
+class Markup (object) :
+ """
+ Custom implementation of markdown.Markdown, that supports direct etree access, and has a more limited set of output element types.
+
+ <root> :
+ <p> :
+ text
+
+ <h1>/<h2>/<h3>/.. :
+ text
+
+ <ul>/<ol> :
+ <li> :
+ text/<p>
+ <p>
+ ...
+
+ text :
+ Currently no inline markup yet, just pure text
+ """
+
+ def __init__ (self) :
+ """
+ Setup parser.
+ """
+
+ ## Block parsing
+ self.parser = blockparser.BlockParser()
+
+ # internal block parsing, doesn't generate any elements
+ self.parser.blockprocessors['empty'] = blockprocessors.EmptyBlockProcessor(self.parser)
+
+ # nested ol/ul and li
+ self.parser.blockprocessors['indent'] = blockprocessors.ListIndentProcessor(self.parser)
+
+ # h1,h2,h3 etc
+ self.parser.blockprocessors['hashheader'] = blockprocessors.HashHeaderProcessor(self.parser)
+ self.parser.blockprocessors['setextheader'] = blockprocessors.SetextHeaderProcessor(self.parser)
+
+ # ol/ul
+ self.parser.blockprocessors['olist'] = blockprocessors.OListProcessor(self.parser)
+ self.parser.blockprocessors['ulist'] = blockprocessors.UListProcessor(self.parser)
+
+ # remaining things as paragraphs
+ self.parser.blockprocessors['paragraph'] = blockprocessors.ParagraphProcessor(self.parser)
+
+ ## Inline patterns
+ self.inlinePatterns = odict.OrderedDict()
+
+ # XXX: none for now
+
+ ## Tree processors
+ self.treeprocessors = odict.OrderedDict()
+ self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
+
+ # No postprocessors; we don't generate HTML
+
+ def _normalize_input (self, source) :
+ """
+ Normalize given input before processing..
+ """
+
+
+ source = source.replace(STX, "").replace(ETX, "")
+ source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+ source = re.sub(r'\n\s+\n', '\n\n', source)
+ source = source.expandtabs(TAB_LENGTH)
+
+ return source
+
+ def parse (self, text) :
+ """
+ Parse the given plaintext markup, yielding an etree.Element(DOC_TAG)
+
+ text - the unicode input
+ """
+
+ # normalize
+ text = self._normalize_input(text)
+
+ # as lines
+ lines = text.split("\n")
+
+ # parse
+ root = self.parser.parseDocument(lines).getroot()
+
+ # process tree
+ for treeprocessor in self.treeprocessors.values() :
+ newRoot = treeprocessor.run(root)
+
+ if newRoot :
+ root = newRoot
+
+ # fix up the root
+ root.tag = DOC_TAG
+
+ # ok
+ return root
+
+