terom@0: """
terom@60: Generate XHTML output from python code.
terom@0:
terom@0: >>> from html import tags
terom@60: >>> unicode(tags.a(href="http://www.google.com")("Google !"))
terom@60: u'\\n\\tGoogle <this>!\\n'
terom@0: """
terom@0:
terom@60: import itertools as itertools
terom@60: import types as types
terom@60: from xml.sax import saxutils
terom@0:
terom@60: class Renderable (object) :
terom@0: """
terom@60: Structured data that's flattened into indented lines of text.
terom@0: """
terom@0:
terom@60: def flatten (self) :
terom@0: """
terom@60: Flatten this object into a series of (identlevel, line) tuples.
terom@0: """
terom@60:
terom@60: raise NotImplementedError()
terom@0:
terom@60: def iter (self, indent='\t') :
terom@0: """
terom@60: Yield a series of lines for this render.
terom@60: """
terom@60:
terom@60: for indent_level, line in self.flatten() :
terom@60: yield (indent * indent_level) + line
terom@0:
terom@60: def unicode (self, newline=u'\n', **opts) :
terom@0: """
terom@60: Render as a single unicode string.
terom@0:
terom@60: No newline is returned at the end of the string.
terom@0:
terom@60: >>> Tag.build('a', 'b').unicode(newline='X', indent='Y')
terom@60: u'XYbX'
terom@0: """
terom@0:
terom@60: return newline.join(self.iter(**opts))
terom@60:
terom@60: # required for print
terom@60: def str (self, newline='\n', encoding='ascii', **opts) :
terom@0: """
terom@60: Render as a single string.
terom@60: """
terom@60:
terom@60: # XXX: try and render as non-unicode, i.e. binary data in the tree?
terom@60: return newline.join(line.encode(encoding) for line in self.iter(**opts))
terom@60:
terom@60: # formal interface using defaults
terom@60: __iter__ = iter
terom@60: __unicode__ = unicode
terom@60: __str__ = str
terom@0:
terom@60: class Text (Renderable) :
terom@60: """
terom@60: Plain un-structured/un-processed HTML text for output
terom@61:
terom@61: >>> Text('foo')
terom@61: Text(u'foo')
terom@61: >>> list(Text(''))
terom@61: [u'']
terom@61: >>> list(tag('a', Text('')))
terom@61: [u'', u'\\t', u'']
terom@60: """
terom@60:
terom@60: def __init__ (self, text) :
terom@60: self.text = unicode(text)
terom@0:
terom@60: def flatten (self) :
terom@60: yield (0, self.text)
terom@60:
terom@60: def __repr__ (self) :
terom@60: return "Text(%r)" % (self.text, )
terom@60:
terom@60: class Tag (Renderable) :
terom@60: """
terom@60: An immutable HTML tag structure, with the tag's name, attributes and contents.
terom@60: """
terom@60:
terom@60: # types of nested items to flatten
terom@60: CONTAINER_TYPES = (types.TupleType, types.ListType, types.GeneratorType)
terom@60:
terom@60: # types of items to keep as-is
terom@60: ITEM_TYPES = (Renderable, )
terom@60:
terom@60: @classmethod
terom@60: def process_contents (cls, *args) :
terom@0: """
terom@60: Yield the HTML tag's contents from the given sequence of positional arguments as a series of flattened
terom@60: items, eagerly converting them to unicode.
terom@60:
terom@60: If no arguments are given, we don't have any children:
terom@60:
terom@60: >>> bool(list(Tag.process_contents()))
terom@60: False
terom@60:
terom@60: Items that are None will be ignored:
terom@60:
terom@60: >>> list(Tag.process_contents(None))
terom@60: []
terom@60:
terom@60: Various Python container types are recursively flattened:
terom@60:
terom@60: >>> list(Tag.process_contents([1, 2]))
terom@60: [u'1', u'2']
terom@60: >>> list(Tag.process_contents([1], [2]))
terom@60: [u'1', u'2']
terom@60: >>> list(Tag.process_contents([1, [2]]))
terom@60: [u'1', u'2']
terom@60: >>> list(Tag.process_contents(n + 1 for n in xrange(2)))
terom@60: [u'1', u'2']
terom@60: >>> list(Tag.process_contents((1, 2)))
terom@60: [u'1', u'2']
terom@60: >>> list(Tag.process_contents((1), (2, )))
terom@60: [u'1', u'2']
terom@60:
terom@60: Our own HTML-aware objects are returned as-is:
terom@60:
terom@60: >>> list(Tag.process_contents(Tag.build('foo')))
terom@60: [tag('foo')]
terom@60: >>> list(Tag.process_contents(Text('bar')))
terom@60: [Text(u'bar')]
terom@60:
terom@60: All other objects are converted to unicode:
terom@60:
terom@60: >>> list(Tag.process_contents('foo', u'bar', 0.123, False))
terom@60: [u'foo', u'bar', u'0.123', u'False']
terom@60:
terom@0: """
terom@0:
terom@60: for arg in args :
terom@60: if arg is None :
terom@60: # skip null: None
terom@0: continue
terom@0:
terom@60: elif isinstance(arg, cls.CONTAINER_TYPES) :
terom@60: # flatten nested container: tuple/list/generator
terom@60: for node in arg :
terom@60: # recurse
terom@60: for item in cls.process_contents(node) :
terom@60: yield item
terom@60:
terom@60: elif isinstance(arg, cls.ITEM_TYPES) :
terom@60: # yield item: Renderable
terom@60: yield arg
terom@0:
terom@0: else :
terom@60: # as unicode
terom@60: yield unicode(arg)
terom@0:
terom@60: @classmethod
terom@60: def process_attrs (cls, **kwargs) :
terom@0: """
terom@60: Yield the HTML tag attributes from the given set of keyword arguments as a series of (name, value) tuples.
terom@0:
terom@60: Keyword-only options (`_key=value`) are filtered out:
terom@60:
terom@60: >>> dict(Tag.process_attrs(_opt=True))
terom@60: {}
terom@0:
terom@60: Attributes with a value of None/False are filtered out:
terom@0:
terom@60: >>> dict(Tag.process_attrs(foo=None, bar=False))
terom@60: {}
terom@60:
terom@60: A value given as True is returned as the key's value:
terom@0:
terom@60: >>> dict(Tag.process_attrs(quux=True))
terom@60: {'quux': u'quux'}
terom@60:
terom@60: A (single) trailing underscore in the attribute name is removed:
terom@60:
terom@60: >>> dict(Tag.process_attrs(class_='foo'))
terom@60: {'class': u'foo'}
terom@60: >>> dict(Tag.process_attrs(data__='foo'))
terom@60: {'data_': u'foo'}
terom@0: """
terom@0:
terom@60: for key, value in kwargs.iteritems() :
terom@60: # keyword arguments are always pure strings
terom@60: assert type(key) is str
terom@0:
terom@60: if value is None or value is False:
terom@60: # omit
terom@60: continue
terom@60:
terom@60: if key.startswith('_') :
terom@60: # option
terom@60: continue
terom@60:
terom@60: if key.endswith('_') :
terom@60: # strip underscore
terom@60: key = key[:-1]
terom@60:
terom@60: if value is True :
terom@60: # flag attr
terom@60: value = key
terom@60:
terom@60: yield key, unicode(value)
terom@60:
terom@60: @classmethod
terom@60: def process_opts (cls, **kwargs) :
terom@60: """
terom@60: Return a series of of the keyword-only _options, extracted from the given dict of keyword arguments, as
terom@60: (k, v) tuples.
terom@60:
terom@60: >>> Tag.process_opts(foo='bar', _bar=False)
terom@60: (('bar', False),)
terom@0: """
terom@0:
terom@60: return tuple((k.lstrip('_'), v) for k, v in kwargs.iteritems() if k.startswith('_'))
terom@60:
terom@60: @classmethod
terom@60: def build (cls, _name, *args, **kwargs) :
terom@0: """
terom@60: Factory function for constructing Tags by directly passing in contents/attributes/options as Python function
terom@60: arguments/keyword arguments.
terom@0:
terom@60: The first positional argument is the tag's name:
terom@60:
terom@60: >>> Tag.build('foo')
terom@60: tag('foo')
terom@60:
terom@60: Further positional arguments are the tag's contents:
terom@0:
terom@60: >>> Tag.build('foo', 'quux', 'bar')
terom@60: tag('foo', u'quux', u'bar')
terom@60:
terom@60: All the rules used by process_contents() are available:
terom@60:
terom@60: >>> Tag.build('foo', [1, None], None, (n for n in xrange(2)))
terom@60: tag('foo', u'1', u'0', u'1')
terom@60:
terom@60: The special-case for a genexp as the only argument works:
terom@60:
terom@60: >>> f = lambda *args: Tag.build('foo', *args)
terom@60: >>> f('hi' for n in xrange(2))
terom@60: tag('foo', u'hi', u'hi')
terom@60:
terom@60: Attributes are passed as keyword arguments, with or without contents:
terom@60:
terom@60: >>> Tag.build('foo', id=1)
terom@60: tag('foo', id=u'1')
terom@60: >>> Tag.build('foo', 'quux', bar=5)
terom@60: tag('foo', u'quux', bar=u'5')
terom@60: >>> Tag.build('foo', class_='ten')
terom@60: tag('foo', class=u'ten')
terom@60:
terom@60: The attribute names don't conflict with positional argument names:
terom@60:
terom@60: >>> Tag.build('bar', name='foo')
terom@60: tag('bar', name=u'foo')
terom@60:
terom@60: Options are handled as the 'real' keyword arguments:
terom@60:
terom@60: >>> print Tag.build('foo', _selfclosing=False)
terom@60:
terom@60: >>> print Tag.build('foo', _foo='bar')
terom@60: Traceback (most recent call last):
terom@60: ...
terom@60: TypeError: __init__() got an unexpected keyword argument 'foo'
terom@0: """
terom@0:
terom@60: # pre-process incoming user values
terom@60: contents = list(cls.process_contents(*args))
terom@60: attrs = dict(cls.process_attrs(**kwargs))
terom@0:
terom@60: # XXX: use Python 2.6 keyword-only arguments instead?
terom@60: options = dict(cls.process_opts(**kwargs))
terom@0:
terom@60: return cls(_name, contents, attrs, **options)
terom@60:
terom@60: def __init__ (self, name, contents, attrs, selfclosing=None, whitespace_sensitive=None) :
terom@60: """
terom@60: Initialize internal Tag state with the given tag identifier, flattened list of content items, dict of
terom@60: attributes and dict of options.
terom@60:
terom@60: selfclosing - set to False to render empty tags as instead of
terom@60: (for XHTML -> HTML compatibility)
terom@60:
terom@60: whitespace_sensitive - do not indent tag content onto separate rows, render the full tag as a single
terom@60: row
terom@60:
terom@60: Use the build() factory function to build Tag objects using Python's function call argument semantics.
terom@60:
terom@60: The tag name is used a pure string identifier:
terom@60:
terom@60: >>> Tag(u'foo', [], {})
terom@60: tag('foo')
terom@60: >>> Tag(u'\\xE4', [], {})
terom@60: Traceback (most recent call last):
terom@60: ...
terom@60: UnicodeEncodeError: 'ascii' codec can't encode character u'\\xe4' in position 0: ordinal not in range(128)
terom@60:
terom@60: Contents have their order preserved:
terom@60:
terom@60: >>> Tag('foo', [1, 2], {})
terom@60: tag('foo', 1, 2)
terom@60: >>> Tag('foo', [2, 1], {})
terom@60: tag('foo', 2, 1)
terom@60:
terom@60: Attributes can be given:
terom@60:
terom@60: >>> Tag('foo', [], dict(foo='bar'))
terom@60: tag('foo', foo='bar')
terom@60:
terom@60: Options can be given:
terom@60:
terom@60: >>> print Tag('foo', [], {}, selfclosing=False)
terom@60:
terom@60: """
terom@60:
terom@60: self.name = str(name)
terom@60: self.contents = contents
terom@60: self.attrs = attrs
terom@60:
terom@60: # options
terom@60: self.selfclosing = selfclosing
terom@60: self.whitespace_sensitive = whitespace_sensitive
terom@60:
terom@60: def __call__ (self, *args, **kwargs) :
terom@60: """
terom@60: Return a new Tag as a copy of this tag, but with the given additional attributes/contents.
terom@60:
terom@60: The same rules for function positional/keyword arguments apply as for build()
terom@60:
terom@60: >>> Tag.build('foo')('bar')
terom@60: tag('foo', u'bar')
terom@60: >>> Tag.build('a', href='index.html')("Home")
terom@60: tag('a', u'Home', href=u'index.html')
terom@60:
terom@60: New contents and attributes can be given freely, using the same rules as for Tag.build:
terom@60:
terom@60: >>> Tag.build('bar', None)(5, foo=None, class_='bar')
terom@60: tag('bar', u'5', class=u'bar')
terom@60:
terom@60: Tag contents accumulate in order:
terom@60:
terom@60: >>> Tag.build('a')('b', ['c'])('d')
terom@60: tag('a', u'b', u'c', u'd')
terom@60:
terom@60: Each Tag is immutable, so the called Tag isn't changed, but rather a copy is returned:
terom@60:
terom@60: >>> t1 = Tag.build('a'); t2 = t1('b'); t1
terom@60: tag('a')
terom@60:
terom@60: Attribute values are replaced:
terom@60:
terom@60: >>> Tag.build('foo', a=2)(a=3)
terom@60: tag('foo', a=u'3')
terom@60:
terom@60: Options are also supported:
terom@60:
terom@60: >>> list(Tag.build('foo')(bar='quux', _selfclosing=False))
terom@60: [u'']
terom@0: """
terom@0:
terom@60: # accumulate contents
terom@60: contents = self.contents + list(self.process_contents(*args))
terom@60:
terom@60: # merge attrs
terom@60: attrs = dict(self.attrs)
terom@60: attrs.update(self.process_attrs(**kwargs))
terom@60:
terom@60: # options
terom@60: opts = dict(
terom@60: selfclosing = self.selfclosing,
terom@60: whitespace_sensitive = self.whitespace_sensitive,
terom@60: )
terom@60: opts.update(self.process_opts(**kwargs))
terom@60:
terom@60: # build updated tag
terom@60: return Tag(self.name, contents, attrs, **opts)
terom@60:
terom@0: def render_attrs (self) :
terom@0: """
terom@0: Return the HTML attributes string
terom@0:
terom@60: >>> Tag.build('x', foo=5, bar='<', quux=None).render_attrs()
terom@0: u'foo="5" bar="<"'
terom@60: >>> Tag.build('x', foo='a"b').render_attrs()
terom@60: u'foo=\\'a"b\\''
terom@0: """
terom@0:
terom@60: return " ".join(
terom@60: (
terom@60: u'%s=%s' % (name, saxutils.quoteattr(value))
terom@60: ) for name, value in self.attrs.iteritems()
terom@60: )
terom@60:
terom@60: def flatten_items (self, indent=1) :
terom@60: """
terom@60: Flatten our content into a series of indented lines.
terom@60:
terom@60: >>> list(Tag.build('tag', 5).flatten_items())
terom@60: [(1, u'5')]
terom@60: >>> list(Tag.build('tag', 'line1', 'line2').flatten_items())
terom@60: [(1, u'line1'), (1, u'line2')]
terom@60:
terom@60: Nested :
terom@60: >>> list(Tag.build('tag', 'a', Tag.build('b', 'bb'), 'c').flatten_items())
terom@60: [(1, u'a'), (1, u''), (2, u'bb'), (1, u''), (1, u'c')]
terom@60: >>> list(Tag.build('tag', Tag.build('hr'), Tag.build('foo')('bar')).flatten_items())
terom@60: [(1, u'
'), (1, u''), (2, u'bar'), (1, u'')]
terom@60: """
terom@60:
terom@60: for item in self.contents :
terom@60: if isinstance(item, Renderable) :
terom@60: # recursively flatten items
terom@60: for line_indent, line in item.flatten() :
terom@60: # indented
terom@60: yield indent + line_indent, line
terom@60:
terom@60: else :
terom@60: # render HTML-escaped raw value
terom@60: # escape raw values
terom@60: yield indent, saxutils.escape(item)
terom@60:
terom@60: def flatten (self) :
terom@60: """
terom@60: Render the tag and all content as a flattened series of indented lines.
terom@60:
terom@60: Empty tags collapse per default:
terom@60:
terom@60: >>> list(Tag.build('foo').flatten())
terom@60: [(0, u'')]
terom@60: >>> list(Tag.build('bar', id=5).flatten())
terom@60: [(0, u'')]
terom@60:
terom@60: Values are indented inside the start tag:
terom@60:
terom@60: >>> list(Tag.build('foo', 'bar', a=5).flatten())
terom@60: [(0, u''), (1, u'bar'), (0, u'')]
terom@60:
terom@60: Nested tags are further indented:
terom@60:
terom@60: >>> list(Tag.build('1', '1.1', Tag.build('1.2', '1.2.1'), '1.3', a=5).flatten())
terom@60: [(0, u'<1 a="5">'), (1, u'1.1'), (1, u'<1.2>'), (2, u'1.2.1'), (1, u'1.2>'), (1, u'1.3'), (0, u'1>')]
terom@60:
terom@60: Empty tags are rendered with a separate closing tag on the same line, if desired:
terom@60:
terom@60: >>> list(Tag.build('foo', _selfclosing=False).flatten())
terom@60: [(0, u'')]
terom@60: >>> list(Tag.build('foo', src='asdf', _selfclosing=False).flatten())
terom@60: [(0, u'')]
terom@60:
terom@60: Tags that are declared as whitespace-sensitive are collapsed onto the same line:
terom@60:
terom@60: >>> list(Tag.build('foo', _whitespace_sensitive=True).flatten())
terom@60: [(0, u'')]
terom@60: >>> list(Tag.build('foo', _whitespace_sensitive=True, _selfclosing=False).flatten())
terom@60: [(0, u'')]
terom@60: >>> list(Tag.build('foo', 'bar', _whitespace_sensitive=True).flatten())
terom@60: [(0, u'bar')]
terom@60: >>> list(Tag.build('foo', 'bar\\nasdf\\tx', _whitespace_sensitive=True).flatten())
terom@60: [(0, u'bar\\nasdf\\tx')]
terom@60: >>> list(Tag.build('foo', 'bar', Tag.build('quux', 'asdf'), 'asdf', _whitespace_sensitive=True).flatten())
terom@60: [(0, u'barasdfasdf')]
terom@60:
terom@60: Embedded HTML given as string values is escaped:
terom@60:
terom@60: >>> list(Tag.build('foo', ''))
terom@60: [u'', u'\\t<asdf>', u'']
terom@60:
terom@60: Embedded quotes in attribute values are esacaped:
terom@60:
terom@60: >>> list(Tag.build('foo', style='ok;" onload="...'))
terom@60: [u'']
terom@60: """
terom@60:
terom@60: # optional attr spec
terom@60: if self.attrs :
terom@60: attrs = " " + self.render_attrs()
terom@60:
terom@60: else :
terom@60: attrs = ""
terom@60:
terom@60: if not self.contents and self.selfclosing is False :
terom@60: # empty tag, but don't use the self-closing syntax..
terom@60: yield 0, u"<%s%s>%s>" % (self.name, attrs, self.name)
terom@60:
terom@60: elif not self.contents :
terom@60: # self-closing xml tag
terom@60: # do note that this is invalid HTML, and the space before the / is relevant for parsing it as HTML
terom@60: yield 0, u"<%s%s />" % (self.name, attrs)
terom@60:
terom@60: elif self.whitespace_sensitive :
terom@60: # join together each line for each child, discarding the indent
terom@60: content = u''.join(line for indent, line in self.flatten_items())
terom@60:
terom@60: # render full tag on a single line
terom@60: yield 0, u"<%s%s>%s%s>" % (self.name, attrs, content, self.name)
terom@60:
terom@60: else :
terom@60: # start tag
terom@60: yield 0, u"<%s%s>" % (self.name, attrs)
terom@60:
terom@60: # contents, indented one level below the start tag
terom@60: for indent, line in self.flatten_items(indent=1) :
terom@60: yield indent, line
terom@60:
terom@60: # close tag
terom@60: yield 0, u"%s>" % (self.name, )
terom@0:
terom@0: def __repr__ (self) :
terom@60: return 'tag(%s)' % ', '.join(
terom@0: [
terom@0: repr(self.name)
terom@0: ] + [
terom@0: repr(c) for c in self.contents
terom@0: ] + [
terom@0: '%s=%r' % (name, value) for name, value in self.attrs.iteritems()
terom@0: ]
terom@0: )
terom@0:
terom@60: # factory function for Tag
terom@60: tag = Tag.build
terom@0:
terom@0:
terom@60: class Document (Renderable) :
terom@60: """
terom@60: A full XHTML 1.0 document with optional XML header, doctype, html[@xmlns].
terom@60:
terom@60: >>> list(Document(tags.html('...')))
terom@60: [u'', u'', u'\\t...', u'']
terom@0: """
terom@0:
terom@60: def __init__ (self, root,
terom@0: doctype='html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"',
terom@60: html_xmlns='http://www.w3.org/1999/xhtml',
terom@60: xml_version=None, xml_encoding=None,
terom@0: ) :
terom@60: # add xmlns attr to root node
terom@60: self.root = root(xmlns=html_xmlns)
terom@0:
terom@60: # store
terom@60: self.doctype = doctype
terom@60: self.xml_declaration = {}
terom@0:
terom@60: if xml_version :
terom@60: self.xml_declaration['version'] = xml_version
terom@0:
terom@60: if xml_encoding :
terom@60: self.xml_declaration['encoding'] = xml_encoding
terom@60:
terom@60: def flatten (self) :
terom@0: """
terom@60: Return the header lines along with the normally formatted tag
terom@0: """
terom@0:
terom@60: if self.xml_declaration :
terom@60: yield 0, u'' % (' '.join('%s="%s"' % kv for kv in self.xml_declaration.iteritems()))
terom@0:
terom@60: if self.doctype :
terom@60: yield 0, u'' % (self.doctype)
terom@0:
terom@60: #
terom@60: for indent, line in self.root.flatten() :
terom@60: yield indent, line
terom@0:
terom@0: class TagFactory (object) :
terom@0: """
terom@0: Build Tags with names give as attribute names
terom@60:
terom@60: >>> list(TagFactory().a(href='#')('Yay'))
terom@60: [u'', u'\\tYay', u'']
terom@50:
terom@60: >>> list(TagFactory().raw("><"))
terom@60: [u'><']
terom@0: """
terom@58:
terom@58: # full XHTML document
terom@58: document = Document
terom@50:
terom@50: # raw HTML
terom@50: raw = Text
terom@0:
terom@0: def __getattr__ (self, name) :
terom@0: """
terom@0: Get a Tag object with the given name, but no contents
terom@0:
terom@60: >>> TagFactory().a
terom@60: tag('a')
terom@0: """
terom@0:
terom@60: return Tag(name, [], {})
terom@0:
terom@60: # static instance
terom@0: tags = TagFactory()
terom@0:
terom@0: # testing
terom@0: if __name__ == '__main__' :
terom@0: import doctest
terom@0:
terom@0: doctest.testmod()
terom@0: