"""
Generate XHTML output from python code.
>>> from html import tags
>>> unicode(tags.a(href="http://www.google.com")("Google <this>!"))
u'<a href="http://www.google.com">\\n\\tGoogle <this>!\\n</a>'
"""
import itertools as itertools
import types as types
from xml.sax import saxutils
class Renderable (object) :
"""
Structured data that's flattened into indented lines of text.
"""
def flatten (self) :
"""
Flatten this object into a series of (identlevel, line) tuples.
"""
raise NotImplementedError()
def iter (self, indent='\t') :
"""
Yield a series of lines for this render.
"""
for indent_level, line in self.flatten() :
yield (indent * indent_level) + line
def unicode (self, newline=u'\n', **opts) :
"""
Render as a single unicode string.
No newline is returned at the end of the string.
>>> Tag.build('a', 'b').unicode(newline='X', indent='Y')
u'<a>XYbX</a>'
"""
return newline.join(self.iter(**opts))
# required for print
def str (self, newline='\n', encoding='ascii', **opts) :
"""
Render as a single string.
"""
# XXX: try and render as non-unicode, i.e. binary data in the tree?
return newline.join(line.encode(encoding) for line in self.iter(**opts))
# formal interface using defaults
__iter__ = iter
__unicode__ = unicode
__str__ = str
class Text (Renderable) :
"""
Plain un-structured/un-processed HTML text for output
>>> Text('foo')
Text(u'foo')
>>> list(Text('<foo>'))
[u'<foo>']
>>> list(tag('a', Text('<foo>')))
[u'<a>', u'\\t<foo>', u'</a>']
"""
def __init__ (self, text) :
self.text = unicode(text)
def flatten (self) :
yield (0, self.text)
def __repr__ (self) :
return "Text(%r)" % (self.text, )
class Tag (Renderable) :
"""
An immutable HTML tag structure, with the tag's name, attributes and contents.
"""
# types of nested items to flatten
CONTAINER_TYPES = (types.TupleType, types.ListType, types.GeneratorType)
# types of items to keep as-is
ITEM_TYPES = (Renderable, )
@classmethod
def process_contents (cls, *args) :
"""
Yield the HTML tag's contents from the given sequence of positional arguments as a series of flattened
items, eagerly converting them to unicode.
If no arguments are given, we don't have any children:
>>> bool(list(Tag.process_contents()))
False
Items that are None will be ignored:
>>> list(Tag.process_contents(None))
[]
Various Python container types are recursively flattened:
>>> list(Tag.process_contents([1, 2]))
[u'1', u'2']
>>> list(Tag.process_contents([1], [2]))
[u'1', u'2']
>>> list(Tag.process_contents([1, [2]]))
[u'1', u'2']
>>> list(Tag.process_contents(n + 1 for n in xrange(2)))
[u'1', u'2']
>>> list(Tag.process_contents((1, 2)))
[u'1', u'2']
>>> list(Tag.process_contents((1), (2, )))
[u'1', u'2']
Our own HTML-aware objects are returned as-is:
>>> list(Tag.process_contents(Tag.build('foo')))
[tag('foo')]
>>> list(Tag.process_contents(Text('bar')))
[Text(u'bar')]
All other objects are converted to unicode:
>>> list(Tag.process_contents('foo', u'bar', 0.123, False))
[u'foo', u'bar', u'0.123', u'False']
"""
for arg in args :
if arg is None :
# skip null: None
continue
elif isinstance(arg, cls.CONTAINER_TYPES) :
# flatten nested container: tuple/list/generator
for node in arg :
# recurse
for item in cls.process_contents(node) :
yield item
elif isinstance(arg, cls.ITEM_TYPES) :
# yield item: Renderable
yield arg
else :
# as unicode
yield unicode(arg)
@classmethod
def process_attrs (cls, **kwargs) :
"""
Yield the HTML tag attributes from the given set of keyword arguments as a series of (name, value) tuples.
Keyword-only options (`_key=value`) are filtered out:
>>> dict(Tag.process_attrs(_opt=True))
{}
Attributes with a value of None/False are filtered out:
>>> dict(Tag.process_attrs(foo=None, bar=False))
{}
A value given as True is returned as the key's value:
>>> dict(Tag.process_attrs(quux=True))
{'quux': u'quux'}
A (single) trailing underscore in the attribute name is removed:
>>> dict(Tag.process_attrs(class_='foo'))
{'class': u'foo'}
>>> dict(Tag.process_attrs(data__='foo'))
{'data_': u'foo'}
"""
for key, value in kwargs.iteritems() :
# keyword arguments are always pure strings
assert type(key) is str
if value is None or value is False:
# omit
continue
if key.startswith('_') :
# option
continue
if key.endswith('_') :
# strip underscore
key = key[:-1]
if value is True :
# flag attr
value = key
yield key, unicode(value)
@classmethod
def process_opts (cls, **kwargs) :
"""
Return a series of of the keyword-only _options, extracted from the given dict of keyword arguments, as
(k, v) tuples.
>>> Tag.process_opts(foo='bar', _bar=False)
(('bar', False),)
"""
return tuple((k.lstrip('_'), v) for k, v in kwargs.iteritems() if k.startswith('_'))
@classmethod
def build (cls, _name, *args, **kwargs) :
"""
Factory function for constructing Tags by directly passing in contents/attributes/options as Python function
arguments/keyword arguments.
The first positional argument is the tag's name:
>>> Tag.build('foo')
tag('foo')
Further positional arguments are the tag's contents:
>>> Tag.build('foo', 'quux', 'bar')
tag('foo', u'quux', u'bar')
All the rules used by process_contents() are available:
>>> Tag.build('foo', [1, None], None, (n for n in xrange(2)))
tag('foo', u'1', u'0', u'1')
The special-case for a genexp as the only argument works:
>>> f = lambda *args: Tag.build('foo', *args)
>>> f('hi' for n in xrange(2))
tag('foo', u'hi', u'hi')
Attributes are passed as keyword arguments, with or without contents:
>>> Tag.build('foo', id=1)
tag('foo', id=u'1')
>>> Tag.build('foo', 'quux', bar=5)
tag('foo', u'quux', bar=u'5')
>>> Tag.build('foo', class_='ten')
tag('foo', class=u'ten')
The attribute names don't conflict with positional argument names:
>>> Tag.build('bar', name='foo')
tag('bar', name=u'foo')
Options are handled as the 'real' keyword arguments:
>>> print Tag.build('foo', _selfclosing=False)
<foo></foo>
>>> print Tag.build('foo', _foo='bar')
Traceback (most recent call last):
...
TypeError: __init__() got an unexpected keyword argument 'foo'
"""
# pre-process incoming user values
contents = list(cls.process_contents(*args))
attrs = dict(cls.process_attrs(**kwargs))
# XXX: use Python 2.6 keyword-only arguments instead?
options = dict(cls.process_opts(**kwargs))
return cls(_name, contents, attrs, **options)
def __init__ (self, name, contents, attrs, selfclosing=None, whitespace_sensitive=None) :
"""
Initialize internal Tag state with the given tag identifier, flattened list of content items, dict of
attributes and dict of options.
selfclosing - set to False to render empty tags as <foo></foo> instead of <foo />
(for XHTML -> HTML compatibility)
whitespace_sensitive - do not indent tag content onto separate rows, render the full tag as a single
row
Use the build() factory function to build Tag objects using Python's function call argument semantics.
The tag name is used a pure string identifier:
>>> Tag(u'foo', [], {})
tag('foo')
>>> Tag(u'\\xE4', [], {})
Traceback (most recent call last):
...
UnicodeEncodeError: 'ascii' codec can't encode character u'\\xe4' in position 0: ordinal not in range(128)
Contents have their order preserved:
>>> Tag('foo', [1, 2], {})
tag('foo', 1, 2)
>>> Tag('foo', [2, 1], {})
tag('foo', 2, 1)
Attributes can be given:
>>> Tag('foo', [], dict(foo='bar'))
tag('foo', foo='bar')
Options can be given:
>>> print Tag('foo', [], {}, selfclosing=False)
<foo></foo>
"""
self.name = str(name)
self.contents = contents
self.attrs = attrs
# options
self.selfclosing = selfclosing
self.whitespace_sensitive = whitespace_sensitive
def __call__ (self, *args, **kwargs) :
"""
Return a new Tag as a copy of this tag, but with the given additional attributes/contents.
The same rules for function positional/keyword arguments apply as for build()
>>> Tag.build('foo')('bar')
tag('foo', u'bar')
>>> Tag.build('a', href='index.html')("Home")
tag('a', u'Home', href=u'index.html')
New contents and attributes can be given freely, using the same rules as for Tag.build:
>>> Tag.build('bar', None)(5, foo=None, class_='bar')
tag('bar', u'5', class=u'bar')
Tag contents accumulate in order:
>>> Tag.build('a')('b', ['c'])('d')
tag('a', u'b', u'c', u'd')
Each Tag is immutable, so the called Tag isn't changed, but rather a copy is returned:
>>> t1 = Tag.build('a'); t2 = t1('b'); t1
tag('a')
Attribute values are replaced:
>>> Tag.build('foo', a=2)(a=3)
tag('foo', a=u'3')
Options are also supported:
>>> list(Tag.build('foo')(bar='quux', _selfclosing=False))
[u'<foo bar="quux"></foo>']
"""
# accumulate contents
contents = self.contents + list(self.process_contents(*args))
# merge attrs
attrs = dict(self.attrs)
attrs.update(self.process_attrs(**kwargs))
# options
opts = dict(
selfclosing = self.selfclosing,
whitespace_sensitive = self.whitespace_sensitive,
)
opts.update(self.process_opts(**kwargs))
# build updated tag
return Tag(self.name, contents, attrs, **opts)
def render_attrs (self) :
"""
Return the HTML attributes string
>>> Tag.build('x', foo=5, bar='<', quux=None).render_attrs()
u'foo="5" bar="<"'
>>> Tag.build('x', foo='a"b').render_attrs()
u'foo=\\'a"b\\''
"""
return " ".join(
(
u'%s=%s' % (name, saxutils.quoteattr(value))
) for name, value in self.attrs.iteritems()
)
def flatten_items (self, indent=1) :
"""
Flatten our content into a series of indented lines.
>>> list(Tag.build('tag', 5).flatten_items())
[(1, u'5')]
>>> list(Tag.build('tag', 'line1', 'line2').flatten_items())
[(1, u'line1'), (1, u'line2')]
Nested :
>>> list(Tag.build('tag', 'a', Tag.build('b', 'bb'), 'c').flatten_items())
[(1, u'a'), (1, u'<b>'), (2, u'bb'), (1, u'</b>'), (1, u'c')]
>>> list(Tag.build('tag', Tag.build('hr'), Tag.build('foo')('bar')).flatten_items())
[(1, u'<hr />'), (1, u'<foo>'), (2, u'bar'), (1, u'</foo>')]
"""
for item in self.contents :
if isinstance(item, Renderable) :
# recursively flatten items
for line_indent, line in item.flatten() :
# indented
yield indent + line_indent, line
else :
# render HTML-escaped raw value
# escape raw values
yield indent, saxutils.escape(item)
def flatten (self) :
"""
Render the tag and all content as a flattened series of indented lines.
Empty tags collapse per default:
>>> list(Tag.build('foo').flatten())
[(0, u'<foo />')]
>>> list(Tag.build('bar', id=5).flatten())
[(0, u'<bar id="5" />')]
Values are indented inside the start tag:
>>> list(Tag.build('foo', 'bar', a=5).flatten())
[(0, u'<foo a="5">'), (1, u'bar'), (0, u'</foo>')]
Nested tags are further indented:
>>> list(Tag.build('1', '1.1', Tag.build('1.2', '1.2.1'), '1.3', a=5).flatten())
[(0, u'<1 a="5">'), (1, u'1.1'), (1, u'<1.2>'), (2, u'1.2.1'), (1, u'</1.2>'), (1, u'1.3'), (0, u'</1>')]
Empty tags are rendered with a separate closing tag on the same line, if desired:
>>> list(Tag.build('foo', _selfclosing=False).flatten())
[(0, u'<foo></foo>')]
>>> list(Tag.build('foo', src='asdf', _selfclosing=False).flatten())
[(0, u'<foo src="asdf"></foo>')]
Tags that are declared as whitespace-sensitive are collapsed onto the same line:
>>> list(Tag.build('foo', _whitespace_sensitive=True).flatten())
[(0, u'<foo />')]
>>> list(Tag.build('foo', _whitespace_sensitive=True, _selfclosing=False).flatten())
[(0, u'<foo></foo>')]
>>> list(Tag.build('foo', 'bar', _whitespace_sensitive=True).flatten())
[(0, u'<foo>bar</foo>')]
>>> list(Tag.build('foo', 'bar\\nasdf\\tx', _whitespace_sensitive=True).flatten())
[(0, u'<foo>bar\\nasdf\\tx</foo>')]
>>> list(Tag.build('foo', 'bar', Tag.build('quux', 'asdf'), 'asdf', _whitespace_sensitive=True).flatten())
[(0, u'<foo>bar<quux>asdf</quux>asdf</foo>')]
Embedded HTML given as string values is escaped:
>>> list(Tag.build('foo', '<asdf>'))
[u'<foo>', u'\\t<asdf>', u'</foo>']
Embedded quotes in attribute values are esacaped:
>>> list(Tag.build('foo', style='ok;" onload="...'))
[u'<foo style=\\'ok;" onload="...\\' />']
>>> list(Tag.build('foo', style='ok;\\'" onload=..."\\''))
[u'<foo style="ok;\\'" onload=..."\\'" />']
"""
# optional attr spec
if self.attrs :
attrs = " " + self.render_attrs()
else :
attrs = ""
if not self.contents and self.selfclosing is False :
# empty tag, but don't use the self-closing syntax..
yield 0, u"<%s%s></%s>" % (self.name, attrs, self.name)
elif not self.contents :
# self-closing xml tag
# do note that this is invalid HTML, and the space before the / is relevant for parsing it as HTML
yield 0, u"<%s%s />" % (self.name, attrs)
elif self.whitespace_sensitive :
# join together each line for each child, discarding the indent
content = u''.join(line for indent, line in self.flatten_items())
# render full tag on a single line
yield 0, u"<%s%s>%s</%s>" % (self.name, attrs, content, self.name)
else :
# start tag
yield 0, u"<%s%s>" % (self.name, attrs)
# contents, indented one level below the start tag
for indent, line in self.flatten_items(indent=1) :
yield indent, line
# close tag
yield 0, u"</%s>" % (self.name, )
def __repr__ (self) :
return 'tag(%s)' % ', '.join(
[
repr(self.name)
] + [
repr(c) for c in self.contents
] + [
'%s=%r' % (name, value) for name, value in self.attrs.iteritems()
]
)
# factory function for Tag
tag = Tag.build
class Document (Renderable) :
"""
A full XHTML 1.0 document with optional XML header, doctype, html[@xmlns].
>>> list(Document(tags.html('...')))
[u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', u'<html xmlns="http://www.w3.org/1999/xhtml">', u'\\t...', u'</html>']
"""
def __init__ (self, root,
doctype='html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"',
html_xmlns='http://www.w3.org/1999/xhtml',
xml_version=None, xml_encoding=None,
) :
# add xmlns attr to root node
self.root = root(xmlns=html_xmlns)
# store
self.doctype = doctype
self.xml_declaration = {}
if xml_version :
self.xml_declaration['version'] = xml_version
if xml_encoding :
self.xml_declaration['encoding'] = xml_encoding
def flatten (self) :
"""
Return the header lines along with the normally formatted <html> tag
"""
if self.xml_declaration :
yield 0, u'<?xml %s ?>' % (' '.join('%s="%s"' % kv for kv in self.xml_declaration.iteritems()))
if self.doctype :
yield 0, u'<!DOCTYPE %s>' % (self.doctype)
# <html>
for indent, line in self.root.flatten() :
yield indent, line
class TagFactory (object) :
"""
Build Tags with names give as attribute names
>>> list(TagFactory().a(href='#')('Yay'))
[u'<a href="#">', u'\\tYay', u'</a>']
>>> list(TagFactory().raw("><"))
[u'><']
"""
# full XHTML document
document = Document
# raw HTML
raw = Text
def __getattr__ (self, name) :
"""
Get a Tag object with the given name, but no contents
>>> TagFactory().a
tag('a')
"""
return Tag(name, [], {})
# static instance
tags = TagFactory()
# testing
if __name__ == '__main__' :
import doctest
doctest.testmod()