from markdown import *
# root tag
DOC_TAG = 'root'
class Markup (object) :
"""
Custom implementation of markdown.Markdown, that supports direct etree access, and has a more limited set of output element types.
<root> :
<p> :
text
<h1>/<h2>/<h3>/.. :
text
<ul>/<ol> :
<li> :
text/<p>
<p>
...
text :
Currently no inline markup yet, just pure text
"""
def __init__ (self) :
"""
Setup parser.
"""
## Block parsing
self.parser = blockparser.BlockParser()
# internal block parsing, doesn't generate any elements
self.parser.blockprocessors['empty'] = blockprocessors.EmptyBlockProcessor(self.parser)
# nested ol/ul and li
self.parser.blockprocessors['indent'] = blockprocessors.ListIndentProcessor(self.parser)
# h1,h2,h3 etc
self.parser.blockprocessors['hashheader'] = blockprocessors.HashHeaderProcessor(self.parser)
self.parser.blockprocessors['setextheader'] = blockprocessors.SetextHeaderProcessor(self.parser)
# ol/ul
self.parser.blockprocessors['olist'] = blockprocessors.OListProcessor(self.parser)
self.parser.blockprocessors['ulist'] = blockprocessors.UListProcessor(self.parser)
# remaining things as paragraphs
self.parser.blockprocessors['paragraph'] = blockprocessors.ParagraphProcessor(self.parser)
## Inline patterns
self.inlinePatterns = odict.OrderedDict()
# XXX: none for now
## Tree processors
self.treeprocessors = odict.OrderedDict()
self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
# No postprocessors; we don't generate HTML
def _normalize_input (self, source) :
"""
Normalize given input before processing..
"""
source = source.replace(STX, "").replace(ETX, "")
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
source = re.sub(r'\n\s+\n', '\n\n', source)
source = source.expandtabs(TAB_LENGTH)
return source
def parse (self, text) :
"""
Parse the given plaintext markup, yielding an etree.Element(DOC_TAG)
text - the unicode input
"""
# normalize
text = self._normalize_input(text)
# as lines
lines = text.split("\n")
# parse
root = self.parser.parseDocument(lines).getroot()
# process tree
for treeprocessor in self.treeprocessors.values() :
newRoot = treeprocessor.run(root)
if newRoot :
root = newRoot
# fix up the root
root.tag = DOC_TAG
# ok
return root