"""
Generating XHTML output from nested python objects
XXX: use a 'real' XML builder?
To use:
>>> from html import tags
>>> str(tags.a(href="http://www.google.com")("Google <this>!"))
'<a href="http://www.google.com">\\n\\tGoogle <this>!\\n</a>\\n'
"""
from cgi import escape
import itertools as _itertools, types as _types
class IRenderable (object) :
"""
Something that's renderable as the contents of a HTML tag.
This is just used by Container for rendering Tags as actual HTML, vs just plain strs as escaped data.
Additionally, some str vs unicode vs file stuff..
"""
def render_raw_lines (self, indent=u'\t') :
"""
Render the indented lines for tag and contents, without newlines
"""
abstract
def render_lines (self, indent=u'\t', newline=u'\n') :
"""
Render full output lines with given newlines
>>> list(Tag('xx', 'yy').render_lines())
[u'<xx>\\n', u'\\tyy\\n', u'</xx>\\n']
"""
for line in self.render_raw_lines(indent=indent) :
yield line + newline
def render_unicode (self, **render_opts) :
"""
Render full tag as a single unicode string
>>> Tag('xx', 'yy').render_unicode()
u'<xx>\\n\\tyy\\n</xx>\\n'
"""
return "".join(self.render_lines(**render_opts))
def render_str (self, encoding='ascii', **render_opts) :
"""
Render full tag as an encoded string
>>> Tag('xx', 'yy').render_str()
'<xx>\\n\\tyy\\n</xx>\\n'
"""
return self.render_unicode(**render_opts).encode(encoding)
def render_out (self, stream, encoding=None, **render_opts) :
"""
Render output into the given stream, encoding using the given encoding if given.
>>> from StringIO import StringIO; buf = StringIO(); Tag('xx', 'yy').render_out(buf, 'ascii'); buf.getvalue()
'<xx>\\n\\tyy\\n</xx>\\n'
"""
for line in self.render_lines(**render_opts) :
if encoding :
line = line.encode(encoding)
stream.write(line)
def render_file (self, file, encoding=None, **render_opts) :
"""
Render output to given file, overwriteing anything already there
"""
self.render_out(file.open_write(encoding), **render_opts)
# default output
__str__ = render_str
__unicode__ = render_unicode
# default .render method
render = render_unicode
class Container (IRenderable) :
"""
A container holds a sequence of other renderable items.
This is just used as the superclass for Tag, and just serves to gives us useful handling for e.g. generators as
tag contents (iterate through them instead of repr'ing them).
"""
@classmethod
def process_contents (cls, contents) :
"""
Postprocess contents iterable to return new list.
Items that are None will be omitted from the return value.
Certain core sequence types will be recognized and flattened for output: tuples, lists, and generators.
>>> list(Container.process_contents([]))
[]
>>> list(Container.process_contents([None]))
[]
>>> list(Container.process_contents([u'foo']))
[u'foo']
"""
for content in contents :
if content is None :
continue
# Hardcoded list of special-case nested contents
elif isinstance(content, (_types.TupleType, _types.ListType, _types.GeneratorType)) :
for subcontent in cls.process_contents(content) :
yield subcontent
else :
# normal, handle as IRenderable/unicode data
yield content
def __init__ (self, *contents) :
"""
Construct this container with the given sub-items
"""
# store postprocessed
self.contents = list(self.process_contents(contents))
def render_raw_lines (self, **render_opts) :
"""
Render our contents as a series of non-indented lines, with the contents handling indentation themselves.
>>> list(Container(5).render_raw_lines())
[u'5']
>>> list(Container('line1', 'line2').render_raw_lines())
[u'line1', u'line2']
>>> list(Container('a', Tag('b', 'bb'), 'c').render_raw_lines())
[u'a', u'<b>', u'\\tbb', u'</b>', u'c']
>>> list(Container(Tag('hr'), Tag('foo')('bar')).render_raw_lines())
[u'<hr />', u'<foo>', u'\\tbar', u'</foo>']
"""
for content in self.contents :
if isinstance(content, IRenderable) :
# sub-items
for line in content.render_raw_lines(**render_opts) :
yield line
else :
# escape raw values
yield escape(unicode(content))
def __repr__ (self) :
return 'Container(%s)' % ', '.join(repr(c) for c in self.contents)
class Tag (Container) :
"""
A HTML tag, with attributes and contents, which can a mixture of data and other renderables(tags).
This is the core object, and the ~only one you really need to pay attention to.
Provides various kinds of rendering output via IRenderable.
"""
@staticmethod
def process_attrs (attrs) :
"""
Postprocess attributes.
Key-value pairs where the value is None will be ommitted, and any trailing underscores in the key omitted.
TODO: only remove one underscore
>>> dict(Tag.process_attrs(dict()))
{}
>>> dict(Tag.process_attrs(dict(foo='bar')))
{'foo': 'bar'}
>>> dict(Tag.process_attrs(dict(class_='bar', frob=None)))
{'class': 'bar'}
"""
return ((k.rstrip('_'), v) for k, v in attrs.iteritems() if v is not None)
def __init__ (self, _name, *contents, **attrs) :
"""
Construct tag with given name/attributes or contents.
The filtering rules desribed in process_contents/process_attrs apply.
>>> Tag('foo')
Tag('foo')
>>> Tag('foo', 'quux')
Tag('foo', 'quux')
>>> Tag('foo', 'quux', bar=5)
Tag('foo', 'quux', bar=5)
>>> Tag('foo', class_='ten')
Tag('foo', class='ten')
>>> Tag('bar', name='foo')
Tag('bar', name='foo')
"""
# store contents as container
super(Tag, self).__init__(*contents)
# store postprocessed stuff
self.name = _name
self.attrs = dict(self.process_attrs(attrs))
def __call__ (self, *contents, **attrs) :
"""
Return a new Tag with this tag's attributes and contents, as well as the given attributes/contents.
The filtering rules desribed in process_contents/process_attrs apply.
>>> Tag('foo')('bar')
Tag('foo', 'bar')
>>> Tag('a', href='index.html')("Home")
Tag('a', 'Home', href='index.html')
>>> Tag('bar', None)(5, foo=None, class_='bar')
Tag('bar', 5, class='bar')
>>> Tag('a')('b')('c')(asdf=5)
Tag('a', 'b', 'c', asdf=5)
>>> t1 = Tag('a'); t2 = t1('b'); t1
Tag('a')
"""
# merge attrs/contents
# XXX: new_attrs is not an iterator...
new_attrs = dict(_itertools.chain(self.attrs.iteritems(), attrs.iteritems()))
new_contents = _itertools.chain(self.contents, contents)
# build new tag
return Tag(self.name, *new_contents, **new_attrs)
@staticmethod
def format_attr (name, value) :
"""
Format a single HTML tag attribute
>>> Tag.format_attr('name', 'value')
u'name="value"'
>>> Tag.format_attr('this', '<a"b>')
u'this="<a"b>"'
>>> Tag.format_attr('xx', 1337)
u'xx="1337"'
"""
return u'%s="%s"' % (name, escape(unicode(value), True))
def render_attrs (self) :
"""
Return the HTML attributes string
>>> Tag('x', foo=5, bar='<').render_attrs()
u'foo="5" bar="<"'
"""
return " ".join(self.format_attr(n, v) for n, v in self.attrs.iteritems() if not n.startswith('_'))
def render_raw_lines (self, indent=u'\t') :
"""
Render the tag and indented content
>>> list(Tag('xx', 'yy', zz='foo').render_raw_lines(indent=' '))
[u'<xx zz="foo">', u' yy', u'</xx>']
"""
# opts
selfclosing = self.attrs.get('_selfclosing')
whitespace_sensitive = self.attrs.get('_whitespace_sensitive')
# render attr string, including preceding space
attrs_stuff = (" " + self.render_attrs()) if self.attrs else ""
if self.contents or selfclosing is False:
if not whitespace_sensitive :
# wrapping tags
yield u"<%s%s>" % (self.name, attrs_stuff)
# subcontents
for line in super(Tag, self).render_raw_lines(indent=indent) :
yield indent + line
yield u"</%s>" % (self.name, )
else :
# whole tag
yield u"<%s%s>%s</%s>" % (self.name, attrs_stuff, ''.join(super(Tag, self).render_raw_lines(indent=indent)), self.name)
else :
# singleton tag
yield u"<%s%s />" % (self.name, attrs_stuff)
def __repr__ (self) :
return 'Tag(%s)' % ', '.join(
[
repr(self.name)
] + [
repr(c) for c in self.contents
] + [
'%s=%r' % (name, value) for name, value in self.attrs.iteritems()
]
)
class Text (IRenderable) :
"""
Raw HTML text
"""
def __init__ (self, line) :
"""
Initialize to render as the given lines
"""
self.lines = [line]
def render_raw_lines (self, indent=u'\t') :
return self.lines
class Document (IRenderable) :
"""
A full XHTML document with XML header, doctype, head and body.
XXX: current rendering is a bit of a kludge
<?xml version="..." encoding="..." ?>
<!DOCTYPE ...>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
...
</head>
<body>
...
</body>
</html>
"""
def __init__ (self,
head, body,
xml_version='1.0', xml_encoding='utf-8',
doctype='html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"',
html_xmlns='http://www.w3.org/1999/xhtml', html_lang='en'
) :
# store
self.xml_version = xml_version
self.xml_encoding = xml_encoding
self.doctype = doctype
# build the document
self.document = Tag('html', **{'xmlns': html_xmlns, 'xml:lang': html_lang})(
Tag('head', head),
Tag('body', body),
)
def render_raw_lines (self, **render_opts) :
"""
Render the two header lines, and then the document
"""
yield '<?xml version="%s" encoding="%s" ?>' % (self.xml_version, self.xml_encoding)
yield '<!DOCTYPE %s>' % (self.doctype)
for line in self.document.render_raw_lines(**render_opts) :
yield line
def _check_encoding (self, encoding) :
if encoding and encoding != self.xml_encoding :
raise ValueError("encoding mismatch: %r should be %r" % (encoding, self.xml_encoding))
def render_str (self, encoding=None, **render_opts) :
"""
Wrap render_str to verify that the right encoding is used
"""
self._check_encoding(encoding)
return super(XHTMLDocument, self).render_str(self.xml_encoding, **render_opts)
def render_out (self, stream, encoding=None, **render_opts) :
"""
Wrap render_out to verify that the right encoding is used
"""
self._check_encoding(encoding)
return super(XHTMLDocument, self).render_out(stream, self.xml_encoding, **render_opts)
class TagFactory (object) :
"""
Build Tags with names give as attribute names
>>> str(TagFactory().raw("><")
'><'
"""
# full XHTML document
document = Document
# raw HTML
raw = Text
def __getattr__ (self, name) :
"""
Get a Tag object with the given name, but no contents
>>> TagFactory().a(href='bar')('quux')
Tag('a', 'quux', href='bar')
"""
return Tag(name)
# pretty names
container = Container
tag = Tag
tags = TagFactory()
raw = Text
document = Document
# testing
if __name__ == '__main__' :
import doctest
doctest.testmod()