    Generating XHTML output from nested python objects

    XXX: use a 'real' XML builder?

    To use:

    >>> from html import tags
    >>> str(tags.a(href="")("Google <this>!"))
    '<a href="">\\n\\tGoogle &lt;this&gt;!\\n</a>\\n'

from cgi import escape
import itertools as _itertools, types as _types

class IRenderable (object) :
        Something that's renderable as the contents of a HTML tag.

        This is just used by Container for rendering Tags as actual HTML, vs just plain strs as escaped data.

        Additionally, some str vs unicode vs file stuff..

    def render_raw_lines (self, indent=u'\t') :
            Render the indented lines for tag and contents, without newlines


    def render_lines (self, indent=u'\t', newline=u'\n') :
            Render full output lines with given newlines

            >>> list(Tag('xx', 'yy').render_lines())
            [u'<xx>\\n', u'\\tyy\\n', u'</xx>\\n']

        for line in self.render_raw_lines(indent=indent) :
            yield line + newline

    def render_unicode (self, **render_opts) :
            Render full tag as a single unicode string

            >>> Tag('xx', 'yy').render_unicode()

        return "".join(self.render_lines(**render_opts))

    def render_str (self, encoding='ascii', **render_opts) :
            Render full tag as an encoded string

            >>> Tag('xx', 'yy').render_str()

        return self.render_unicode(**render_opts).encode(encoding)

    def render_out (self, stream, encoding=None, **render_opts) :
            Render output into the given stream, encoding using the given encoding if given.

            >>> from StringIO import StringIO; buf = StringIO(); Tag('xx', 'yy').render_out(buf, 'ascii'); buf.getvalue()

        for line in self.render_lines(**render_opts) :
            if encoding :
                line = line.encode(encoding)

    def render_file (self, file, encoding=None, **render_opts) :
            Render output to given file, overwriteing anything already there

        self.render_out(file.open_write(encoding), **render_opts)

    # default output
    __str__ = render_str
    __unicode__ = render_unicode

    # default .render method
    render = render_unicode

class Container (IRenderable) :
        A container holds a sequence of other renderable items.

        This is just used as the superclass for Tag, and just serves to gives us useful handling for e.g. generators as
        tag contents (iterate through them instead of repr'ing them).

    def process_contents (cls, contents) :
            Postprocess contents iterable to return new list.

            Items that are None will be omitted from the return value.

            Certain core sequence types will be recognized and flattened for output: tuples, lists, and generators.

            >>> list(Container.process_contents([]))
            >>> list(Container.process_contents([None]))
            >>> list(Container.process_contents([u'foo']))
        for content in contents :
            if content is None :
            # Hardcoded list of special-case nested contents
            elif isinstance(content, (_types.TupleType, _types.ListType, _types.GeneratorType)) :
                for subcontent in cls.process_contents(content) :
                    yield subcontent

            else :
                # normal, handle as IRenderable/unicode data
                yield content

    def __init__ (self, *contents) :
            Construct this container with the given sub-items
        # store postprocessed
        self.contents = list(self.process_contents(contents))
    def render_raw_lines (self, **render_opts) :
            Render our contents as a series of non-indented lines, with the contents handling indentation themselves.

            >>> list(Container(5).render_raw_lines())
            >>> list(Container('line1', 'line2').render_raw_lines())
            [u'line1', u'line2']
            >>> list(Container('a', Tag('b', 'bb'), 'c').render_raw_lines())
            [u'a', u'<b>', u'\\tbb', u'</b>', u'c']
            >>> list(Container(Tag('hr'), Tag('foo')('bar')).render_raw_lines())
            [u'<hr />', u'<foo>', u'\\tbar', u'</foo>']

        for content in self.contents :
            if isinstance(content, IRenderable) :
                # sub-items
                for line in content.render_raw_lines(**render_opts) :
                    yield line
            else :
                # escape raw values
                yield escape(unicode(content))

    def __repr__ (self) :
        return 'Container(%s)' % ', '.join(repr(c) for c in self.contents)

class Tag (Container) :
        A HTML tag, with attributes and contents, which can a mixture of data and other renderables(tags).

        This is the core object, and the ~only one you really need to pay attention to.

        Provides various kinds of rendering output via IRenderable.

    def process_attrs (attrs) :
            Postprocess attributes.

            Key-value pairs where the value is None will be ommitted, and any trailing underscores in the key omitted.

            TODO: only remove one underscore

            >>> dict(Tag.process_attrs(dict()))
            >>> dict(Tag.process_attrs(dict(foo='bar')))
            {'foo': 'bar'}
            >>> dict(Tag.process_attrs(dict(class_='bar', frob=None)))
            {'class': 'bar'}

        return ((k.rstrip('_'), v) for k, v in attrs.iteritems() if v is not None)

    def __init__ (self, _name, *contents, **attrs) :
            Construct tag with given name/attributes or contents.

            The filtering rules desribed in process_contents/process_attrs apply.

            >>> Tag('foo')
            >>> Tag('foo', 'quux')
            Tag('foo', 'quux')
            >>> Tag('foo', 'quux', bar=5)
            Tag('foo', 'quux', bar=5)
            >>> Tag('foo', class_='ten')
            Tag('foo', class='ten')
            >>> Tag('bar', name='foo')
            Tag('bar', name='foo')
        # store contents as container
        super(Tag, self).__init__(*contents)
        # store postprocessed stuff = _name
        self.attrs = dict(self.process_attrs(attrs))

    def __call__ (self, *contents, **attrs) :
            Return a new Tag with this tag's attributes and contents, as well as the given attributes/contents.

            The filtering rules desribed in process_contents/process_attrs apply.

            >>> Tag('foo')('bar')
            Tag('foo', 'bar')
            >>> Tag('a', href='index.html')("Home")
            Tag('a', 'Home', href='index.html')
            >>> Tag('bar', None)(5, foo=None, class_='bar')
            Tag('bar', 5, class='bar')
            >>> Tag('a')('b')('c')(asdf=5)
            Tag('a', 'b', 'c', asdf=5)
            >>> t1 = Tag('a'); t2 = t1('b'); t1

        # merge attrs/contents
        # XXX: new_attrs is not an iterator...
        new_attrs = dict(_itertools.chain(self.attrs.iteritems(), attrs.iteritems()))
        new_contents = _itertools.chain(self.contents, contents)
        # build new tag
        return Tag(, *new_contents, **new_attrs)

    def format_attr (name, value) :
            Format a single HTML tag attribute

            >>> Tag.format_attr('name', 'value')
            >>> Tag.format_attr('this', '<a"b>')
            >>> Tag.format_attr('xx', 1337)

        return u'%s="%s"' % (name, escape(unicode(value), True))
    def render_attrs (self) :
            Return the HTML attributes string

            >>> Tag('x', foo=5, bar='<').render_attrs()
            u'foo="5" bar="&lt;"'

        return " ".join(self.format_attr(n, v) for n, v in self.attrs.iteritems() if not n.startswith('_'))

    def render_raw_lines (self, indent=u'\t') :
            Render the tag and indented content

            >>> list(Tag('xx', 'yy', zz='foo').render_raw_lines(indent=' '))
            [u'<xx zz="foo">', u' yy', u'</xx>']
        # opts
        selfclosing = self.attrs.get('_selfclosing')
        whitespace_sensitive = self.attrs.get('_whitespace_sensitive')

        # render attr string, including preceding space
        attrs_stuff = (" " + self.render_attrs()) if self.attrs else ""

        if self.contents or selfclosing is False:

            if not whitespace_sensitive :
                # wrapping tags
                yield u"<%s%s>" % (, attrs_stuff)
                # subcontents
                for line in super(Tag, self).render_raw_lines(indent=indent) :
                    yield indent + line

                yield u"</%s>" % (, )
            else :
                # whole tag
                yield u"<%s%s>%s</%s>" % (, attrs_stuff, ''.join(super(Tag, self).render_raw_lines(indent=indent)),
        else :
            # singleton tag
            yield u"<%s%s />" % (, attrs_stuff)

    def __repr__ (self) :
        return 'Tag(%s)' % ', '.join(
            ] + [
                repr(c) for c in self.contents
            ] + [
                '%s=%r' % (name, value) for name, value in self.attrs.iteritems()

class Text (IRenderable) :
        Raw HTML text

    def __init__ (self, line) :
            Initialize to render as the given lines

        self.lines = [line]
    def render_raw_lines (self, indent=u'\t') :
        return self.lines

class Document (IRenderable) :
        A full XHTML document with XML header, doctype, head and body.

        XXX: current rendering is a bit of a kludge

        <?xml version="..." encoding="..." ?>
        <!DOCTYPE ...>
        <html xmlns="" xml:lang="en">

    def __init__ (self, 
        head, body,
        xml_version='1.0', xml_encoding='utf-8', 
        doctype='html PUBLIC "-//W3C//DTD XHTML 1.1//EN" ""',
        html_xmlns='', html_lang='en'
    ) :
        # store
        self.xml_version = xml_version
        self.xml_encoding = xml_encoding
        self.doctype = doctype
        # build the document
        self.document = Tag('html', **{'xmlns': html_xmlns, 'xml:lang': html_lang})(
            Tag('head', head),
            Tag('body', body),

    def render_raw_lines (self, **render_opts) :
            Render the two header lines, and then the document

        yield '<?xml version="%s" encoding="%s" ?>' % (self.xml_version, self.xml_encoding)
        yield '<!DOCTYPE %s>' % (self.doctype)

        for line in self.document.render_raw_lines(**render_opts) :
            yield line
    def _check_encoding (self, encoding) :
        if encoding and encoding != self.xml_encoding :
            raise ValueError("encoding mismatch: %r should be %r" % (encoding, self.xml_encoding))

    def render_str (self, encoding=None, **render_opts) :
            Wrap render_str to verify that the right encoding is used

        return super(XHTMLDocument, self).render_str(self.xml_encoding, **render_opts)

    def render_out (self, stream, encoding=None, **render_opts) :
            Wrap render_out to verify that the right encoding is used

        return super(XHTMLDocument, self).render_out(stream, self.xml_encoding, **render_opts)

class TagFactory (object) :
        Build Tags with names give as attribute names

        >>> str(TagFactory().raw("><")
    # raw HTML
    raw = Text

    def __getattr__ (self, name) :
            Get a Tag object with the given name, but no contents

            >>> TagFactory().a(href='bar')('quux')
            Tag('a', 'quux', href='bar')

        return Tag(name)

# pretty names
container = Container
tag = Tag
tags = TagFactory()
raw = Text
document = Document

# testing
if __name__ == '__main__' :
    import doctest
