diff -r 5100b359906c -r d45fc43c6073 pvl/web/html.py --- a/pvl/web/html.py Tue Feb 24 12:47:09 2015 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,641 +0,0 @@ -""" - Generate XHTML output from python code. - - >>> from html import tags - >>> unicode(tags.a(href="http://www.google.com")("Google !")) - u'\\n\\tGoogle <this>!\\n' -""" - -# XXX: needs some refactoring for Text vs Tag now -# XXX: not all tags work in self-closing form, e.g. empty html.title() breaks badly - -import itertools as itertools -import types as types -from xml.sax import saxutils - -class Renderable (object) : - """ - Structured data that's flattened into indented lines of text. - """ - - # types of nested items to flatten - CONTAINER_TYPES = (types.TupleType, types.ListType, types.GeneratorType) - - @classmethod - def process_contents (cls, *args) : - """ - Yield the HTML tag's contents from the given sequence of positional arguments as a series of flattened - items, eagerly converting them to unicode. - - If no arguments are given, we don't have any children: - - >>> bool(list(Tag.process_contents())) - False - - Items that are None will be ignored: - - >>> list(Tag.process_contents(None)) - [] - - Various Python container types are recursively flattened: - - >>> list(Tag.process_contents([1, 2])) - [u'1', u'2'] - >>> list(Tag.process_contents([1], [2])) - [u'1', u'2'] - >>> list(Tag.process_contents([1, [2]])) - [u'1', u'2'] - >>> list(Tag.process_contents(n + 1 for n in xrange(2))) - [u'1', u'2'] - >>> list(Tag.process_contents((1, 2))) - [u'1', u'2'] - >>> list(Tag.process_contents((1), (2, ))) - [u'1', u'2'] - - Our own HTML-aware objects are returned as-is: - - >>> list(Tag.process_contents(Tag.build('foo'))) - [tag('foo')] - >>> list(Tag.process_contents(Text(u'bar'))) - [Text(u'bar')] - - All other objects are converted to unicode: - - >>> list(Tag.process_contents('foo', u'bar', 0.123, False)) - [u'foo', u'bar', u'0.123', u'False'] - - """ - - for arg in args : - if arg is None : - # skip null: None - continue - - elif isinstance(arg, cls.CONTAINER_TYPES) : - # flatten nested container: tuple/list/generator - for node in arg : - # recurse - for item in cls.process_contents(node) : - yield item - - elif isinstance(arg, Renderable) : - # yield item: Renderable - yield arg - - else : - # as unicode - yield unicode(arg) - - - def flatten (self) : - """ - Flatten this object into a series of (identlevel, line) tuples. - """ - - raise NotImplementedError() - - def iter (self, indent='\t') : - """ - Yield a series of lines for this render. - """ - - for indent_level, line in self.flatten() : - yield (indent * indent_level) + line - - def unicode (self, newline=u'\n', **opts) : - """ - Render as a single unicode string. - - No newline is returned at the end of the string. - - >>> Tag.build('a', 'b').unicode(newline='X', indent='Y') - u'XYbX' - """ - - return newline.join(self.iter(**opts)) - - # required for print - def str (self, newline='\n', encoding='ascii', **opts) : - """ - Render as a single string. - """ - - # XXX: try and render as non-unicode, i.e. binary data in the tree? - return newline.join(line.encode(encoding) for line in self.iter(**opts)) - - # formal interface using defaults - __iter__ = iter - __unicode__ = unicode - __str__ = str - -class Text (Renderable) : - """ - Plain un-structured/un-processed HTML text for output - - >>> Text(u'foo') - Text(u'foo') - >>> list(Text('')) - [u''] - >>> list(Text('', tag('p', 'test'))) - [u'', u'

', u'\\ttest', u'

'] - >>> list(tag('a', Text(''))) - [u'', u'\\t', u''] - >>> list(Text(range(2))) - [u'0', u'1'] - - """ - - def __init__ (self, *contents) : - self.contents = self.process_contents(*contents) - - def flatten (self, indent=0) : - for item in self.contents : - if isinstance(item, Renderable) : - # recursively flatten items - for line_indent, line in item.flatten() : - # indented - yield indent + line_indent, line - - else : - # render raw value - yield indent, unicode(item) - - def __repr__ (self) : - return "Text(%s)" % (', '.join(repr(item) for item in self.contents)) - -class Tag (Renderable) : - """ - An immutable HTML tag structure, with the tag's name, attributes and contents. - """ - - @classmethod - def process_attrs (cls, **kwargs) : - """ - Yield the HTML tag attributes from the given set of keyword arguments as a series of (name, value) tuples. - - Keyword-only options (`_key=value`) are filtered out: - - >>> dict(Tag.process_attrs(_opt=True)) - {} - - Attributes with a value of None/False are filtered out: - - >>> dict(Tag.process_attrs(foo=None, bar=False)) - {} - - A value given as True is returned as the key's value: - - >>> dict(Tag.process_attrs(quux=True)) - {'quux': u'quux'} - - A (single) trailing underscore in the attribute name is removed: - - >>> dict(Tag.process_attrs(class_='foo')) - {'class': u'foo'} - >>> dict(Tag.process_attrs(data__='foo')) - {'data_': u'foo'} - """ - - for key, value in kwargs.iteritems() : - # keyword arguments are always pure strings - assert type(key) is str - - if value is None or value is False: - # omit - continue - - if key.startswith('_') : - # option - continue - - if key.endswith('_') : - # strip underscore - key = key[:-1] - - if '_' in key : - key = key.replace('_', '-') - - if value is True : - # flag attr - value = key - - yield key, unicode(value) - - @classmethod - def process_opts (cls, **kwargs) : - """ - Return a series of of the keyword-only _options, extracted from the given dict of keyword arguments, as - (k, v) tuples. - - >>> Tag.process_opts(foo='bar', _bar=False) - (('bar', False),) - """ - - return tuple((k.lstrip('_'), v) for k, v in kwargs.iteritems() if k.startswith('_')) - - @classmethod - def build (cls, _name, *args, **kwargs) : - """ - Factory function for constructing Tags by directly passing in contents/attributes/options as Python function - arguments/keyword arguments. - - The first positional argument is the tag's name: - - >>> Tag.build('foo') - tag('foo') - - Further positional arguments are the tag's contents: - - >>> Tag.build('foo', 'quux', 'bar') - tag('foo', u'quux', u'bar') - - All the rules used by process_contents() are available: - - >>> Tag.build('foo', [1, None], None, (n for n in xrange(2))) - tag('foo', u'1', u'0', u'1') - - The special-case for a genexp as the only argument works: - - >>> f = lambda *args: Tag.build('foo', *args) - >>> f('hi' for n in xrange(2)) - tag('foo', u'hi', u'hi') - - Attributes are passed as keyword arguments, with or without contents: - - >>> Tag.build('foo', id=1) - tag('foo', id=u'1') - >>> Tag.build('foo', 'quux', bar=5) - tag('foo', u'quux', bar=u'5') - >>> Tag.build('foo', class_='ten') - tag('foo', class=u'ten') - - The attribute names don't conflict with positional argument names: - - >>> Tag.build('bar', name='foo') - tag('bar', name=u'foo') - - Options are handled as the 'real' keyword arguments: - - >>> print Tag.build('foo', _selfclosing=False) - - >>> print Tag.build('foo', _foo='bar') - Traceback (most recent call last): - ... - TypeError: __init__() got an unexpected keyword argument 'foo' - """ - - # pre-process incoming user values - contents = list(cls.process_contents(*args)) - attrs = dict(cls.process_attrs(**kwargs)) - - # XXX: use Python 2.6 keyword-only arguments instead? - options = dict(cls.process_opts(**kwargs)) - - return cls(_name, contents, attrs, **options) - - def __init__ (self, name, contents=None, attrs=None, selfclosing=None, whitespace_sensitive=None, escape=True) : - """ - Initialize internal Tag state with the given tag identifier, flattened list of content items, dict of - attributes and dict of options. - - selfclosing - set to False to render empty tags as instead of - (for XHTML -> HTML compatibility) - - whitespace_sensitive - do not indent tag content onto separate rows, render the full tag as a single - row - - escape - html-escape non-Renderable's (text) - - Use the build() factory function to build Tag objects using Python's function call argument semantics. - - The tag name is used a pure string identifier: - - >>> Tag(u'foo', [], {}) - tag('foo') - >>> Tag(u'\\xE4', [], {}) - Traceback (most recent call last): - ... - UnicodeEncodeError: 'ascii' codec can't encode character u'\\xe4' in position 0: ordinal not in range(128) - - Contents have their order preserved: - - >>> Tag('foo', [1, 2], {}) - tag('foo', 1, 2) - >>> Tag('foo', [2, 1], {}) - tag('foo', 2, 1) - - Attributes can be given: - - >>> Tag('foo', [], dict(foo='bar')) - tag('foo', foo='bar') - - Options can be given: - - >>> print Tag('foo', [], {}, selfclosing=False) - - """ - - self.name = str(name) - self.contents = contents or [] - self.attrs = attrs or {} - - # options - self.selfclosing = selfclosing - self.whitespace_sensitive = whitespace_sensitive - self.escape = escape - - def __call__ (self, *args, **kwargs) : - """ - Return a new Tag as a copy of this tag, but with the given additional attributes/contents. - - The same rules for function positional/keyword arguments apply as for build() - - >>> Tag.build('foo')('bar') - tag('foo', u'bar') - >>> Tag.build('a', href='index.html')("Home") - tag('a', u'Home', href=u'index.html') - - New contents and attributes can be given freely, using the same rules as for Tag.build: - - >>> Tag.build('bar', None)(5, foo=None, class_='bar') - tag('bar', u'5', class=u'bar') - - Tag contents accumulate in order: - - >>> Tag.build('a')('b', ['c'])('d') - tag('a', u'b', u'c', u'd') - - Each Tag is immutable, so the called Tag isn't changed, but rather a copy is returned: - - >>> t1 = Tag.build('a'); t2 = t1('b'); t1 - tag('a') - - Attribute values are replaced: - - >>> Tag.build('foo', a=2)(a=3) - tag('foo', a=u'3') - - Options are also supported: - - >>> list(Tag.build('foo')(bar='quux', _selfclosing=False)) - [u''] - """ - - # accumulate contents - contents = self.contents + list(self.process_contents(*args)) - - # merge attrs - attrs = dict(self.attrs) - attrs.update(self.process_attrs(**kwargs)) - - # options - opts = dict( - selfclosing = self.selfclosing, - whitespace_sensitive = self.whitespace_sensitive, - ) - opts.update(self.process_opts(**kwargs)) - - # build updated tag - return Tag(self.name, contents, attrs, **opts) - - def render_attrs (self) : - """ - Return the HTML attributes string - - >>> Tag.build('x', foo=5, bar='<', quux=None).render_attrs() - u'foo="5" bar="<"' - >>> Tag.build('x', foo='a"b').render_attrs() - u'foo=\\'a"b\\'' - """ - - return " ".join( - ( - u'%s=%s' % (name, saxutils.quoteattr(value)) - ) for name, value in self.attrs.iteritems() - ) - - def flatten_items (self, indent=1) : - """ - Flatten our content into a series of indented lines. - - >>> list(Tag.build('tag', 5).flatten_items()) - [(1, u'5')] - >>> list(Tag.build('tag', 'line1', 'line2').flatten_items()) - [(1, u'line1'), (1, u'line2')] - - Nested : - >>> list(Tag.build('tag', 'a', Tag.build('b', 'bb'), 'c').flatten_items()) - [(1, u'a'), (1, u''), (2, u'bb'), (1, u''), (1, u'c')] - >>> list(Tag.build('tag', Tag.build('hr'), Tag.build('foo')('bar')).flatten_items()) - [(1, u'
'), (1, u''), (2, u'bar'), (1, u'')] - """ - - for item in self.contents : - if isinstance(item, Renderable) : - # recursively flatten items - for line_indent, line in item.flatten() : - # indented - yield indent + line_indent, line - - elif self.escape : - # render HTML-escaped raw value - # escape raw values - yield indent, saxutils.escape(item) - - else : - # render raw value - yield indent, unicode(item) - - def flatten (self) : - """ - Render the tag and all content as a flattened series of indented lines. - - Empty tags collapse per default: - - >>> list(Tag.build('foo').flatten()) - [(0, u'')] - >>> list(Tag.build('bar', id=5).flatten()) - [(0, u'')] - - Values are indented inside the start tag: - - >>> list(Tag.build('foo', 'bar', a=5).flatten()) - [(0, u''), (1, u'bar'), (0, u'')] - - Nested tags are further indented: - - >>> list(Tag.build('1', '1.1', Tag.build('1.2', '1.2.1'), '1.3', a=5).flatten()) - [(0, u'<1 a="5">'), (1, u'1.1'), (1, u'<1.2>'), (2, u'1.2.1'), (1, u''), (1, u'1.3'), (0, u'')] - - Empty tags are rendered with a separate closing tag on the same line, if desired: - - >>> list(Tag.build('foo', _selfclosing=False).flatten()) - [(0, u'')] - >>> list(Tag.build('foo', src='asdf', _selfclosing=False).flatten()) - [(0, u'')] - - Tags that are declared as whitespace-sensitive are collapsed onto the same line: - - >>> list(Tag.build('foo', _whitespace_sensitive=True).flatten()) - [(0, u'')] - >>> list(Tag.build('foo', _whitespace_sensitive=True, _selfclosing=False).flatten()) - [(0, u'')] - >>> list(Tag.build('foo', 'bar', _whitespace_sensitive=True).flatten()) - [(0, u'bar')] - >>> list(Tag.build('foo', 'bar\\nasdf\\tx', _whitespace_sensitive=True).flatten()) - [(0, u'bar\\nasdf\\tx')] - >>> list(Tag.build('foo', 'bar', Tag.build('quux', 'asdf'), 'asdf', _whitespace_sensitive=True).flatten()) - [(0, u'barasdfasdf')] - - Embedded HTML given as string values is escaped: - - >>> list(Tag.build('foo', '')) - [u'', u'\\t<asdf>', u''] - - Embedded quotes in attribute values are esacaped: - - >>> list(Tag.build('foo', style='ok;" onload="...')) - [u''] - """ - - # optional attr spec - if self.attrs : - attrs = " " + self.render_attrs() - - else : - attrs = "" - - if not self.contents and self.selfclosing is False : - # empty tag, but don't use the self-closing syntax.. - yield 0, u"<%s%s>" % (self.name, attrs, self.name) - - elif not self.contents : - # self-closing xml tag - # do note that this is invalid HTML, and the space before the / is relevant for parsing it as HTML - yield 0, u"<%s%s />" % (self.name, attrs) - - elif self.whitespace_sensitive : - # join together each line for each child, discarding the indent - content = u''.join(line for indent, line in self.flatten_items()) - - # render full tag on a single line - yield 0, u"<%s%s>%s" % (self.name, attrs, content, self.name) - - else : - # start tag - yield 0, u"<%s%s>" % (self.name, attrs) - - # contents, indented one level below the start tag - for indent, line in self.flatten_items(indent=1) : - yield indent, line - - # close tag - yield 0, u"" % (self.name, ) - - def __repr__ (self) : - return 'tag(%s)' % ', '.join( - [ - repr(self.name) - ] + [ - repr(c) for c in self.contents - ] + [ - '%s=%r' % (name, value) for name, value in self.attrs.iteritems() - ] - ) - -# factory function for Tag -tag = Tag.build - - -class Document (Renderable) : - """ - A full XHTML 1.0 document with optional XML header, doctype, html[@xmlns]. - - >>> list(Document(tags.html('...'))) - [u'', u'', u'\\t...', u''] - """ - - DOCTYPE = 'html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"' - HTML_XMLNS = 'http://www.w3.org/1999/xhtml' - HTML_LANG = None - - def __init__ (self, root, - doctype=DOCTYPE, - html_xmlns=HTML_XMLNS, - html_lang=HTML_LANG, - xml_version=None, xml_encoding=None, - ) : - # add xmlns attr to root node - self.root = root(xmlns=html_xmlns, lang=html_lang) - - # store - self.doctype = doctype - self.xml_declaration = {} - - if xml_version : - self.xml_declaration['version'] = xml_version - - if xml_encoding : - self.xml_declaration['encoding'] = xml_encoding - - def flatten (self) : - """ - Return the header lines along with the normally formatted tag - """ - - if self.xml_declaration : - yield 0, u'' % (' '.join('%s="%s"' % kv for kv in self.xml_declaration.iteritems())) - - if self.doctype : - yield 0, u'' % (self.doctype) - - # - for indent, line in self.root.flatten() : - yield indent, line - -class TagFactory (object) : - """ - Build Tags with names give as attribute names - - >>> list(TagFactory().a(href='#')('Yay')) - [u'', u'\\tYay', u''] - - >>> list(TagFactory()("><")) - [u'><'] - """ - - # full XHTML document - document = Document - - def __getattr__ (self, name) : - """ - Get a Tag object with the given name, but no contents - - >>> TagFactory().a - tag('a') - """ - - return Tag(name) - - def __call__ (self, *values) : - """ - Raw HTML. - """ - - return Text(*values) - -class HTML5TagFactory (TagFactory) : - span = Tag('span', selfclosing=False) - -# static instance -tags = TagFactory() -html5 = HTML5TagFactory() - -# testing -if __name__ == '__main__' : - import doctest - - doctest.testmod() -