--- a/pvl/web/html.py Tue Feb 24 12:47:09 2015 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,641 +0,0 @@
-"""
- Generate XHTML output from python code.
-
- >>> from html import tags
- >>> unicode(tags.a(href="http://www.google.com")("Google <this>!"))
- u'<a href="http://www.google.com">\\n\\tGoogle <this>!\\n</a>'
-"""
-
-# XXX: needs some refactoring for Text vs Tag now
-# XXX: not all tags work in self-closing form, e.g. empty html.title() breaks badly
-
-import itertools as itertools
-import types as types
-from xml.sax import saxutils
-
-class Renderable (object) :
- """
- Structured data that's flattened into indented lines of text.
- """
-
- # types of nested items to flatten
- CONTAINER_TYPES = (types.TupleType, types.ListType, types.GeneratorType)
-
- @classmethod
- def process_contents (cls, *args) :
- """
- Yield the HTML tag's contents from the given sequence of positional arguments as a series of flattened
- items, eagerly converting them to unicode.
-
- If no arguments are given, we don't have any children:
-
- >>> bool(list(Tag.process_contents()))
- False
-
- Items that are None will be ignored:
-
- >>> list(Tag.process_contents(None))
- []
-
- Various Python container types are recursively flattened:
-
- >>> list(Tag.process_contents([1, 2]))
- [u'1', u'2']
- >>> list(Tag.process_contents([1], [2]))
- [u'1', u'2']
- >>> list(Tag.process_contents([1, [2]]))
- [u'1', u'2']
- >>> list(Tag.process_contents(n + 1 for n in xrange(2)))
- [u'1', u'2']
- >>> list(Tag.process_contents((1, 2)))
- [u'1', u'2']
- >>> list(Tag.process_contents((1), (2, )))
- [u'1', u'2']
-
- Our own HTML-aware objects are returned as-is:
-
- >>> list(Tag.process_contents(Tag.build('foo')))
- [tag('foo')]
- >>> list(Tag.process_contents(Text(u'bar')))
- [Text(u'bar')]
-
- All other objects are converted to unicode:
-
- >>> list(Tag.process_contents('foo', u'bar', 0.123, False))
- [u'foo', u'bar', u'0.123', u'False']
-
- """
-
- for arg in args :
- if arg is None :
- # skip null: None
- continue
-
- elif isinstance(arg, cls.CONTAINER_TYPES) :
- # flatten nested container: tuple/list/generator
- for node in arg :
- # recurse
- for item in cls.process_contents(node) :
- yield item
-
- elif isinstance(arg, Renderable) :
- # yield item: Renderable
- yield arg
-
- else :
- # as unicode
- yield unicode(arg)
-
-
- def flatten (self) :
- """
- Flatten this object into a series of (identlevel, line) tuples.
- """
-
- raise NotImplementedError()
-
- def iter (self, indent='\t') :
- """
- Yield a series of lines for this render.
- """
-
- for indent_level, line in self.flatten() :
- yield (indent * indent_level) + line
-
- def unicode (self, newline=u'\n', **opts) :
- """
- Render as a single unicode string.
-
- No newline is returned at the end of the string.
-
- >>> Tag.build('a', 'b').unicode(newline='X', indent='Y')
- u'<a>XYbX</a>'
- """
-
- return newline.join(self.iter(**opts))
-
- # required for print
- def str (self, newline='\n', encoding='ascii', **opts) :
- """
- Render as a single string.
- """
-
- # XXX: try and render as non-unicode, i.e. binary data in the tree?
- return newline.join(line.encode(encoding) for line in self.iter(**opts))
-
- # formal interface using defaults
- __iter__ = iter
- __unicode__ = unicode
- __str__ = str
-
-class Text (Renderable) :
- """
- Plain un-structured/un-processed HTML text for output
-
- >>> Text(u'foo')
- Text(u'foo')
- >>> list(Text('<foo>'))
- [u'<foo>']
- >>> list(Text('<foo>', tag('p', 'test')))
- [u'<foo>', u'<p>', u'\\ttest', u'</p>']
- >>> list(tag('a', Text('<foo>')))
- [u'<a>', u'\\t<foo>', u'</a>']
- >>> list(Text(range(2)))
- [u'0', u'1']
-
- """
-
- def __init__ (self, *contents) :
- self.contents = self.process_contents(*contents)
-
- def flatten (self, indent=0) :
- for item in self.contents :
- if isinstance(item, Renderable) :
- # recursively flatten items
- for line_indent, line in item.flatten() :
- # indented
- yield indent + line_indent, line
-
- else :
- # render raw value
- yield indent, unicode(item)
-
- def __repr__ (self) :
- return "Text(%s)" % (', '.join(repr(item) for item in self.contents))
-
-class Tag (Renderable) :
- """
- An immutable HTML tag structure, with the tag's name, attributes and contents.
- """
-
- @classmethod
- def process_attrs (cls, **kwargs) :
- """
- Yield the HTML tag attributes from the given set of keyword arguments as a series of (name, value) tuples.
-
- Keyword-only options (`_key=value`) are filtered out:
-
- >>> dict(Tag.process_attrs(_opt=True))
- {}
-
- Attributes with a value of None/False are filtered out:
-
- >>> dict(Tag.process_attrs(foo=None, bar=False))
- {}
-
- A value given as True is returned as the key's value:
-
- >>> dict(Tag.process_attrs(quux=True))
- {'quux': u'quux'}
-
- A (single) trailing underscore in the attribute name is removed:
-
- >>> dict(Tag.process_attrs(class_='foo'))
- {'class': u'foo'}
- >>> dict(Tag.process_attrs(data__='foo'))
- {'data_': u'foo'}
- """
-
- for key, value in kwargs.iteritems() :
- # keyword arguments are always pure strings
- assert type(key) is str
-
- if value is None or value is False:
- # omit
- continue
-
- if key.startswith('_') :
- # option
- continue
-
- if key.endswith('_') :
- # strip underscore
- key = key[:-1]
-
- if '_' in key :
- key = key.replace('_', '-')
-
- if value is True :
- # flag attr
- value = key
-
- yield key, unicode(value)
-
- @classmethod
- def process_opts (cls, **kwargs) :
- """
- Return a series of of the keyword-only _options, extracted from the given dict of keyword arguments, as
- (k, v) tuples.
-
- >>> Tag.process_opts(foo='bar', _bar=False)
- (('bar', False),)
- """
-
- return tuple((k.lstrip('_'), v) for k, v in kwargs.iteritems() if k.startswith('_'))
-
- @classmethod
- def build (cls, _name, *args, **kwargs) :
- """
- Factory function for constructing Tags by directly passing in contents/attributes/options as Python function
- arguments/keyword arguments.
-
- The first positional argument is the tag's name:
-
- >>> Tag.build('foo')
- tag('foo')
-
- Further positional arguments are the tag's contents:
-
- >>> Tag.build('foo', 'quux', 'bar')
- tag('foo', u'quux', u'bar')
-
- All the rules used by process_contents() are available:
-
- >>> Tag.build('foo', [1, None], None, (n for n in xrange(2)))
- tag('foo', u'1', u'0', u'1')
-
- The special-case for a genexp as the only argument works:
-
- >>> f = lambda *args: Tag.build('foo', *args)
- >>> f('hi' for n in xrange(2))
- tag('foo', u'hi', u'hi')
-
- Attributes are passed as keyword arguments, with or without contents:
-
- >>> Tag.build('foo', id=1)
- tag('foo', id=u'1')
- >>> Tag.build('foo', 'quux', bar=5)
- tag('foo', u'quux', bar=u'5')
- >>> Tag.build('foo', class_='ten')
- tag('foo', class=u'ten')
-
- The attribute names don't conflict with positional argument names:
-
- >>> Tag.build('bar', name='foo')
- tag('bar', name=u'foo')
-
- Options are handled as the 'real' keyword arguments:
-
- >>> print Tag.build('foo', _selfclosing=False)
- <foo></foo>
- >>> print Tag.build('foo', _foo='bar')
- Traceback (most recent call last):
- ...
- TypeError: __init__() got an unexpected keyword argument 'foo'
- """
-
- # pre-process incoming user values
- contents = list(cls.process_contents(*args))
- attrs = dict(cls.process_attrs(**kwargs))
-
- # XXX: use Python 2.6 keyword-only arguments instead?
- options = dict(cls.process_opts(**kwargs))
-
- return cls(_name, contents, attrs, **options)
-
- def __init__ (self, name, contents=None, attrs=None, selfclosing=None, whitespace_sensitive=None, escape=True) :
- """
- Initialize internal Tag state with the given tag identifier, flattened list of content items, dict of
- attributes and dict of options.
-
- selfclosing - set to False to render empty tags as <foo></foo> instead of <foo />
- (for XHTML -> HTML compatibility)
-
- whitespace_sensitive - do not indent tag content onto separate rows, render the full tag as a single
- row
-
- escape - html-escape non-Renderable's (text)
-
- Use the build() factory function to build Tag objects using Python's function call argument semantics.
-
- The tag name is used a pure string identifier:
-
- >>> Tag(u'foo', [], {})
- tag('foo')
- >>> Tag(u'\\xE4', [], {})
- Traceback (most recent call last):
- ...
- UnicodeEncodeError: 'ascii' codec can't encode character u'\\xe4' in position 0: ordinal not in range(128)
-
- Contents have their order preserved:
-
- >>> Tag('foo', [1, 2], {})
- tag('foo', 1, 2)
- >>> Tag('foo', [2, 1], {})
- tag('foo', 2, 1)
-
- Attributes can be given:
-
- >>> Tag('foo', [], dict(foo='bar'))
- tag('foo', foo='bar')
-
- Options can be given:
-
- >>> print Tag('foo', [], {}, selfclosing=False)
- <foo></foo>
- """
-
- self.name = str(name)
- self.contents = contents or []
- self.attrs = attrs or {}
-
- # options
- self.selfclosing = selfclosing
- self.whitespace_sensitive = whitespace_sensitive
- self.escape = escape
-
- def __call__ (self, *args, **kwargs) :
- """
- Return a new Tag as a copy of this tag, but with the given additional attributes/contents.
-
- The same rules for function positional/keyword arguments apply as for build()
-
- >>> Tag.build('foo')('bar')
- tag('foo', u'bar')
- >>> Tag.build('a', href='index.html')("Home")
- tag('a', u'Home', href=u'index.html')
-
- New contents and attributes can be given freely, using the same rules as for Tag.build:
-
- >>> Tag.build('bar', None)(5, foo=None, class_='bar')
- tag('bar', u'5', class=u'bar')
-
- Tag contents accumulate in order:
-
- >>> Tag.build('a')('b', ['c'])('d')
- tag('a', u'b', u'c', u'd')
-
- Each Tag is immutable, so the called Tag isn't changed, but rather a copy is returned:
-
- >>> t1 = Tag.build('a'); t2 = t1('b'); t1
- tag('a')
-
- Attribute values are replaced:
-
- >>> Tag.build('foo', a=2)(a=3)
- tag('foo', a=u'3')
-
- Options are also supported:
-
- >>> list(Tag.build('foo')(bar='quux', _selfclosing=False))
- [u'<foo bar="quux"></foo>']
- """
-
- # accumulate contents
- contents = self.contents + list(self.process_contents(*args))
-
- # merge attrs
- attrs = dict(self.attrs)
- attrs.update(self.process_attrs(**kwargs))
-
- # options
- opts = dict(
- selfclosing = self.selfclosing,
- whitespace_sensitive = self.whitespace_sensitive,
- )
- opts.update(self.process_opts(**kwargs))
-
- # build updated tag
- return Tag(self.name, contents, attrs, **opts)
-
- def render_attrs (self) :
- """
- Return the HTML attributes string
-
- >>> Tag.build('x', foo=5, bar='<', quux=None).render_attrs()
- u'foo="5" bar="<"'
- >>> Tag.build('x', foo='a"b').render_attrs()
- u'foo=\\'a"b\\''
- """
-
- return " ".join(
- (
- u'%s=%s' % (name, saxutils.quoteattr(value))
- ) for name, value in self.attrs.iteritems()
- )
-
- def flatten_items (self, indent=1) :
- """
- Flatten our content into a series of indented lines.
-
- >>> list(Tag.build('tag', 5).flatten_items())
- [(1, u'5')]
- >>> list(Tag.build('tag', 'line1', 'line2').flatten_items())
- [(1, u'line1'), (1, u'line2')]
-
- Nested :
- >>> list(Tag.build('tag', 'a', Tag.build('b', 'bb'), 'c').flatten_items())
- [(1, u'a'), (1, u'<b>'), (2, u'bb'), (1, u'</b>'), (1, u'c')]
- >>> list(Tag.build('tag', Tag.build('hr'), Tag.build('foo')('bar')).flatten_items())
- [(1, u'<hr />'), (1, u'<foo>'), (2, u'bar'), (1, u'</foo>')]
- """
-
- for item in self.contents :
- if isinstance(item, Renderable) :
- # recursively flatten items
- for line_indent, line in item.flatten() :
- # indented
- yield indent + line_indent, line
-
- elif self.escape :
- # render HTML-escaped raw value
- # escape raw values
- yield indent, saxutils.escape(item)
-
- else :
- # render raw value
- yield indent, unicode(item)
-
- def flatten (self) :
- """
- Render the tag and all content as a flattened series of indented lines.
-
- Empty tags collapse per default:
-
- >>> list(Tag.build('foo').flatten())
- [(0, u'<foo />')]
- >>> list(Tag.build('bar', id=5).flatten())
- [(0, u'<bar id="5" />')]
-
- Values are indented inside the start tag:
-
- >>> list(Tag.build('foo', 'bar', a=5).flatten())
- [(0, u'<foo a="5">'), (1, u'bar'), (0, u'</foo>')]
-
- Nested tags are further indented:
-
- >>> list(Tag.build('1', '1.1', Tag.build('1.2', '1.2.1'), '1.3', a=5).flatten())
- [(0, u'<1 a="5">'), (1, u'1.1'), (1, u'<1.2>'), (2, u'1.2.1'), (1, u'</1.2>'), (1, u'1.3'), (0, u'</1>')]
-
- Empty tags are rendered with a separate closing tag on the same line, if desired:
-
- >>> list(Tag.build('foo', _selfclosing=False).flatten())
- [(0, u'<foo></foo>')]
- >>> list(Tag.build('foo', src='asdf', _selfclosing=False).flatten())
- [(0, u'<foo src="asdf"></foo>')]
-
- Tags that are declared as whitespace-sensitive are collapsed onto the same line:
-
- >>> list(Tag.build('foo', _whitespace_sensitive=True).flatten())
- [(0, u'<foo />')]
- >>> list(Tag.build('foo', _whitespace_sensitive=True, _selfclosing=False).flatten())
- [(0, u'<foo></foo>')]
- >>> list(Tag.build('foo', 'bar', _whitespace_sensitive=True).flatten())
- [(0, u'<foo>bar</foo>')]
- >>> list(Tag.build('foo', 'bar\\nasdf\\tx', _whitespace_sensitive=True).flatten())
- [(0, u'<foo>bar\\nasdf\\tx</foo>')]
- >>> list(Tag.build('foo', 'bar', Tag.build('quux', 'asdf'), 'asdf', _whitespace_sensitive=True).flatten())
- [(0, u'<foo>bar<quux>asdf</quux>asdf</foo>')]
-
- Embedded HTML given as string values is escaped:
-
- >>> list(Tag.build('foo', '<asdf>'))
- [u'<foo>', u'\\t<asdf>', u'</foo>']
-
- Embedded quotes in attribute values are esacaped:
-
- >>> list(Tag.build('foo', style='ok;" onload="...'))
- [u'<foo style=\\'ok;" onload="...\\' />']
- >>> list(Tag.build('foo', style='ok;\\'" onload=..."\\''))
- [u'<foo style="ok;\\'" onload=..."\\'" />']
- """
-
- # optional attr spec
- if self.attrs :
- attrs = " " + self.render_attrs()
-
- else :
- attrs = ""
-
- if not self.contents and self.selfclosing is False :
- # empty tag, but don't use the self-closing syntax..
- yield 0, u"<%s%s></%s>" % (self.name, attrs, self.name)
-
- elif not self.contents :
- # self-closing xml tag
- # do note that this is invalid HTML, and the space before the / is relevant for parsing it as HTML
- yield 0, u"<%s%s />" % (self.name, attrs)
-
- elif self.whitespace_sensitive :
- # join together each line for each child, discarding the indent
- content = u''.join(line for indent, line in self.flatten_items())
-
- # render full tag on a single line
- yield 0, u"<%s%s>%s</%s>" % (self.name, attrs, content, self.name)
-
- else :
- # start tag
- yield 0, u"<%s%s>" % (self.name, attrs)
-
- # contents, indented one level below the start tag
- for indent, line in self.flatten_items(indent=1) :
- yield indent, line
-
- # close tag
- yield 0, u"</%s>" % (self.name, )
-
- def __repr__ (self) :
- return 'tag(%s)' % ', '.join(
- [
- repr(self.name)
- ] + [
- repr(c) for c in self.contents
- ] + [
- '%s=%r' % (name, value) for name, value in self.attrs.iteritems()
- ]
- )
-
-# factory function for Tag
-tag = Tag.build
-
-
-class Document (Renderable) :
- """
- A full XHTML 1.0 document with optional XML header, doctype, html[@xmlns].
-
- >>> list(Document(tags.html('...')))
- [u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', u'<html xmlns="http://www.w3.org/1999/xhtml">', u'\\t...', u'</html>']
- """
-
- DOCTYPE = 'html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"'
- HTML_XMLNS = 'http://www.w3.org/1999/xhtml'
- HTML_LANG = None
-
- def __init__ (self, root,
- doctype=DOCTYPE,
- html_xmlns=HTML_XMLNS,
- html_lang=HTML_LANG,
- xml_version=None, xml_encoding=None,
- ) :
- # add xmlns attr to root node
- self.root = root(xmlns=html_xmlns, lang=html_lang)
-
- # store
- self.doctype = doctype
- self.xml_declaration = {}
-
- if xml_version :
- self.xml_declaration['version'] = xml_version
-
- if xml_encoding :
- self.xml_declaration['encoding'] = xml_encoding
-
- def flatten (self) :
- """
- Return the header lines along with the normally formatted <html> tag
- """
-
- if self.xml_declaration :
- yield 0, u'<?xml %s ?>' % (' '.join('%s="%s"' % kv for kv in self.xml_declaration.iteritems()))
-
- if self.doctype :
- yield 0, u'<!DOCTYPE %s>' % (self.doctype)
-
- # <html>
- for indent, line in self.root.flatten() :
- yield indent, line
-
-class TagFactory (object) :
- """
- Build Tags with names give as attribute names
-
- >>> list(TagFactory().a(href='#')('Yay'))
- [u'<a href="#">', u'\\tYay', u'</a>']
-
- >>> list(TagFactory()("><"))
- [u'><']
- """
-
- # full XHTML document
- document = Document
-
- def __getattr__ (self, name) :
- """
- Get a Tag object with the given name, but no contents
-
- >>> TagFactory().a
- tag('a')
- """
-
- return Tag(name)
-
- def __call__ (self, *values) :
- """
- Raw HTML.
- """
-
- return Text(*values)
-
-class HTML5TagFactory (TagFactory) :
- span = Tag('span', selfclosing=False)
-
-# static instance
-tags = TagFactory()
-html5 = HTML5TagFactory()
-
-# testing
-if __name__ == '__main__' :
- import doctest
-
- doctest.testmod()
-