svv/html.py
changeset 60 b364279347d9
parent 58 4f4150296cd3
child 61 ce1d012d02fe
--- a/svv/html.py	Thu Jan 20 23:21:14 2011 +0200
+++ b/svv/html.py	Fri Jan 21 04:40:36 2011 +0200
@@ -1,276 +1,398 @@
 """
-    Generating XHTML output from nested python objects
-
-    XXX: use a 'real' XML builder?
-
-    To use:
+    Generate XHTML output from python code.
 
     >>> from html import tags
-    >>> str(tags.a(href="http://www.google.com")("Google <this>!"))
-    '<a href="http://www.google.com">\\n\\tGoogle &lt;this&gt;!\\n</a>\\n'
+    >>> unicode(tags.a(href="http://www.google.com")("Google <this>!"))
+    u'<a href="http://www.google.com">\\n\\tGoogle &lt;this&gt;!\\n</a>'
 """
 
-from cgi import escape
-import itertools as _itertools, types as _types
+import itertools as itertools
+import types as types
+from xml.sax import saxutils
 
-class IRenderable (object) :
+class Renderable (object) :
     """
-        Something that's renderable as the contents of a HTML tag.
-
-        This is just used by Container for rendering Tags as actual HTML, vs just plain strs as escaped data.
-
-        Additionally, some str vs unicode vs file stuff..
+        Structured data that's flattened into indented lines of text.
     """
 
-    def render_raw_lines (self, indent=u'\t') :
-        """
-            Render the indented lines for tag and contents, without newlines
+    def flatten (self) :
         """
-
-        abstract
-
-    def render_lines (self, indent=u'\t', newline=u'\n') :
+            Flatten this object into a series of (identlevel, line) tuples.
         """
-            Render full output lines with given newlines
-
-            >>> list(Tag('xx', 'yy').render_lines())
-            [u'<xx>\\n', u'\\tyy\\n', u'</xx>\\n']
-        """
+        
+        raise NotImplementedError()
 
-        for line in self.render_raw_lines(indent=indent) :
-            yield line + newline
-
-    def render_unicode (self, **render_opts) :
+    def iter (self, indent='\t') :
         """
-            Render full tag as a single unicode string
+            Yield a series of lines for this render.
+        """
+        
+        for indent_level, line in self.flatten() :
+            yield (indent * indent_level) + line
 
-            >>> Tag('xx', 'yy').render_unicode()
-            u'<xx>\\n\\tyy\\n</xx>\\n'
+    def unicode (self, newline=u'\n', **opts) :
         """
-
-        return "".join(self.render_lines(**render_opts))
+            Render as a single unicode string.
 
-    def render_str (self, encoding='ascii', **render_opts) :
-        """
-            Render full tag as an encoded string
+            No newline is returned at the end of the string.
 
-            >>> Tag('xx', 'yy').render_str()
-            '<xx>\\n\\tyy\\n</xx>\\n'
+            >>> Tag.build('a', 'b').unicode(newline='X', indent='Y')
+            u'<a>XYbX</a>'
         """
 
-        return self.render_unicode(**render_opts).encode(encoding)
-
-    def render_out (self, stream, encoding=None, **render_opts) :
-        """
-            Render output into the given stream, encoding using the given encoding if given.
-
-            >>> from StringIO import StringIO; buf = StringIO(); Tag('xx', 'yy').render_out(buf, 'ascii'); buf.getvalue()
-            '<xx>\\n\\tyy\\n</xx>\\n'
+        return newline.join(self.iter(**opts))
+    
+    # required for print
+    def str (self, newline='\n', encoding='ascii', **opts) :
         """
-
-        for line in self.render_lines(**render_opts) :
-            if encoding :
-                line = line.encode(encoding)
+            Render as a single string.
+        """
+        
+        # XXX: try and render as non-unicode, i.e. binary data in the tree?
+        return newline.join(line.encode(encoding) for line in self.iter(**opts))
+    
+    # formal interface using defaults
+    __iter__ = iter
+    __unicode__ = unicode
+    __str__ = str
 
-            stream.write(line)
+class Text (Renderable) :
+    """
+        Plain un-structured/un-processed HTML text for output
+
+
+    """
+
+    def __init__ (self, text) :
+        self.text = unicode(text)
     
-    def render_file (self, file, encoding=None, **render_opts) :
+    def flatten (self) :
+        yield (0, self.text)
+
+    def __repr__ (self) :
+        return "Text(%r)" % (self.text, )
+
+class Tag (Renderable) :
+    """
+        An immutable HTML tag structure, with the tag's name, attributes and contents.
+    """
+    
+    # types of nested items to flatten
+    CONTAINER_TYPES = (types.TupleType, types.ListType, types.GeneratorType)
+
+    # types of items to keep as-is
+    ITEM_TYPES = (Renderable, )
+
+    @classmethod
+    def process_contents (cls, *args) :
         """
-            Render output to given file, overwriteing anything already there
+            Yield the HTML tag's contents from the given sequence of positional arguments as a series of flattened
+            items, eagerly converting them to unicode.
+
+            If no arguments are given, we don't have any children:
+            
+            >>> bool(list(Tag.process_contents()))
+            False
+            
+            Items that are None will be ignored:
+
+            >>> list(Tag.process_contents(None))
+            []
+
+            Various Python container types are recursively flattened:
+
+            >>> list(Tag.process_contents([1, 2]))
+            [u'1', u'2']
+            >>> list(Tag.process_contents([1], [2]))
+            [u'1', u'2']
+            >>> list(Tag.process_contents([1, [2]]))
+            [u'1', u'2']
+            >>> list(Tag.process_contents(n + 1 for n in xrange(2)))
+            [u'1', u'2']
+            >>> list(Tag.process_contents((1, 2)))
+            [u'1', u'2']
+            >>> list(Tag.process_contents((1), (2, )))
+            [u'1', u'2']
+
+            Our own HTML-aware objects are returned as-is:
+            
+            >>> list(Tag.process_contents(Tag.build('foo')))
+            [tag('foo')]
+            >>> list(Tag.process_contents(Text('bar')))
+            [Text(u'bar')]
+            
+            All other objects are converted to unicode:
+            
+            >>> list(Tag.process_contents('foo', u'bar', 0.123, False))
+            [u'foo', u'bar', u'0.123', u'False']
+
         """
 
-        self.render_out(file.open_write(encoding), **render_opts)
-
-    # default output
-    __str__ = render_str
-    __unicode__ = render_unicode
-
-    # default .render method
-    render = render_unicode
-
-class Container (IRenderable) :
-    """
-        A container holds a sequence of other renderable items.
-
-        This is just used as the superclass for Tag, and just serves to gives us useful handling for e.g. generators as
-        tag contents (iterate through them instead of repr'ing them).
-    """
-
-    @classmethod
-    def process_contents (cls, contents) :
-        """
-            Postprocess contents iterable to return new list.
-
-            Items that are None will be omitted from the return value.
-
-            Certain core sequence types will be recognized and flattened for output: tuples, lists, and generators.
-
-            >>> list(Container.process_contents([]))
-            []
-            >>> list(Container.process_contents([None]))
-            []
-            >>> list(Container.process_contents([u'foo']))
-            [u'foo']
-        """
-        
-        for content in contents :
-            if content is None :
+        for arg in args :
+            if arg is None :
+                # skip null: None
                 continue
             
-            # Hardcoded list of special-case nested contents
-            elif isinstance(content, (_types.TupleType, _types.ListType, _types.GeneratorType)) :
-                for subcontent in cls.process_contents(content) :
-                    yield subcontent
+            elif isinstance(arg, cls.CONTAINER_TYPES) :
+                # flatten nested container: tuple/list/generator
+                for node in arg :
+                    # recurse
+                    for item in cls.process_contents(node) :
+                        yield item
+
+            elif isinstance(arg, cls.ITEM_TYPES) :
+                # yield item: Renderable
+                yield arg
 
             else :
-                # normal, handle as IRenderable/unicode data
-                yield content
-
-    def __init__ (self, *contents) :
-        """
-            Construct this container with the given sub-items
-        """
-        
-        # store postprocessed
-        self.contents = list(self.process_contents(contents))
-    
-    def render_raw_lines (self, **render_opts) :
-        """
-            Render our contents as a series of non-indented lines, with the contents handling indentation themselves.
+                # as unicode
+                yield unicode(arg)
 
-            >>> list(Container(5).render_raw_lines())
-            [u'5']
-            >>> list(Container('line1', 'line2').render_raw_lines())
-            [u'line1', u'line2']
-            >>> list(Container('a', Tag('b', 'bb'), 'c').render_raw_lines())
-            [u'a', u'<b>', u'\\tbb', u'</b>', u'c']
-            >>> list(Container(Tag('hr'), Tag('foo')('bar')).render_raw_lines())
-            [u'<hr />', u'<foo>', u'\\tbar', u'</foo>']
+    @classmethod
+    def process_attrs (cls, **kwargs) :
         """
-
-        for content in self.contents :
-            if isinstance(content, IRenderable) :
-                # sub-items
-                for line in content.render_raw_lines(**render_opts) :
-                    yield line
+            Yield the HTML tag attributes from the given set of keyword arguments as a series of (name, value) tuples.
             
-            else :
-                # escape raw values
-                yield escape(unicode(content))
-
-    def __repr__ (self) :
-        return 'Container(%s)' % ', '.join(repr(c) for c in self.contents)
-
-class Tag (Container) :
-    """
-        A HTML tag, with attributes and contents, which can a mixture of data and other renderables(tags).
-
-        This is the core object, and the ~only one you really need to pay attention to.
+            Keyword-only options (`_key=value`) are filtered out:
+                
+            >>> dict(Tag.process_attrs(_opt=True))
+            {}
 
-        Provides various kinds of rendering output via IRenderable.
-    """
-
-    @staticmethod
-    def process_attrs (attrs) :
-        """
-            Postprocess attributes.
+            Attributes with a value of None/False are filtered out:
 
-            Key-value pairs where the value is None will be ommitted, and any trailing underscores in the key omitted.
-
-            TODO: only remove one underscore
+            >>> dict(Tag.process_attrs(foo=None, bar=False))
+            {}
+            
+            A value given as True is returned as the key's value:
 
-            >>> dict(Tag.process_attrs(dict()))
-            {}
-            >>> dict(Tag.process_attrs(dict(foo='bar')))
-            {'foo': 'bar'}
-            >>> dict(Tag.process_attrs(dict(class_='bar', frob=None)))
-            {'class': 'bar'}
+            >>> dict(Tag.process_attrs(quux=True))
+            {'quux': u'quux'}
+
+            A (single) trailing underscore in the attribute name is removed:
+
+            >>> dict(Tag.process_attrs(class_='foo'))
+            {'class': u'foo'}
+            >>> dict(Tag.process_attrs(data__='foo'))
+            {'data_': u'foo'}
         """
 
-        return ((k.rstrip('_'), v) for k, v in attrs.iteritems() if v is not None)
-
-    def __init__ (self, _name, *contents, **attrs) :
-        """
-            Construct tag with given name/attributes or contents.
-
-            The filtering rules desribed in process_contents/process_attrs apply.
+        for key, value in kwargs.iteritems() :
+            # keyword arguments are always pure strings
+            assert type(key) is str
 
-            >>> Tag('foo')
-            Tag('foo')
-            >>> Tag('foo', 'quux')
-            Tag('foo', 'quux')
-            >>> Tag('foo', 'quux', bar=5)
-            Tag('foo', 'quux', bar=5)
-            >>> Tag('foo', class_='ten')
-            Tag('foo', class='ten')
-            >>> Tag('bar', name='foo')
-            Tag('bar', name='foo')
+            if value is None or value is False:
+                # omit
+                continue
+            
+            if key.startswith('_') :
+                # option
+                continue
+
+            if key.endswith('_') :
+                # strip underscore
+                key = key[:-1]
+            
+            if value is True :
+                # flag attr
+                value = key
+            
+            yield key, unicode(value)
+        
+    @classmethod
+    def process_opts (cls, **kwargs) :
+        """
+            Return a series of of the keyword-only _options, extracted from the given dict of keyword arguments, as 
+            (k, v) tuples.
+
+            >>> Tag.process_opts(foo='bar', _bar=False)
+            (('bar', False),)
         """
         
-        # store contents as container
-        super(Tag, self).__init__(*contents)
-        
-        # store postprocessed stuff
-        self.name = _name
-        self.attrs = dict(self.process_attrs(attrs))
-
-    def __call__ (self, *contents, **attrs) :
+        return tuple((k.lstrip('_'), v) for k, v in kwargs.iteritems() if k.startswith('_'))
+    
+    @classmethod
+    def build (cls, _name, *args, **kwargs) :
         """
-            Return a new Tag with this tag's attributes and contents, as well as the given attributes/contents.
+            Factory function for constructing Tags by directly passing in contents/attributes/options as Python function
+            arguments/keyword arguments.
 
-            The filtering rules desribed in process_contents/process_attrs apply.
+            The first positional argument is the tag's name:
+            
+            >>> Tag.build('foo')
+            tag('foo')
+            
+            Further positional arguments are the tag's contents:
 
-            >>> Tag('foo')('bar')
-            Tag('foo', 'bar')
-            >>> Tag('a', href='index.html')("Home")
-            Tag('a', 'Home', href='index.html')
-            >>> Tag('bar', None)(5, foo=None, class_='bar')
-            Tag('bar', 5, class='bar')
-            >>> Tag('a')('b')('c')(asdf=5)
-            Tag('a', 'b', 'c', asdf=5)
-            >>> t1 = Tag('a'); t2 = t1('b'); t1
-            Tag('a')
+            >>> Tag.build('foo', 'quux', 'bar')
+            tag('foo', u'quux', u'bar')
+
+            All the rules used by process_contents() are available:
+            
+            >>> Tag.build('foo', [1, None], None, (n for n in xrange(2)))
+            tag('foo', u'1', u'0', u'1')
+
+            The special-case for a genexp as the only argument works:
+            
+            >>> f = lambda *args: Tag.build('foo', *args)
+            >>> f('hi' for n in xrange(2))
+            tag('foo', u'hi', u'hi')
+            
+            Attributes are passed as keyword arguments, with or without contents:
+
+            >>> Tag.build('foo', id=1)
+            tag('foo', id=u'1')
+            >>> Tag.build('foo', 'quux', bar=5)
+            tag('foo', u'quux', bar=u'5')
+            >>> Tag.build('foo', class_='ten')
+            tag('foo', class=u'ten')
+            
+            The attribute names don't conflict with positional argument names:
+
+            >>> Tag.build('bar', name='foo')
+            tag('bar', name=u'foo')
+
+            Options are handled as the 'real' keyword arguments:
+
+            >>> print Tag.build('foo', _selfclosing=False)
+            <foo></foo>
+            >>> print Tag.build('foo', _foo='bar')
+            Traceback (most recent call last):
+                ...
+            TypeError: __init__() got an unexpected keyword argument 'foo'
         """
 
-        # merge attrs/contents
-        # XXX: new_attrs is not an iterator...
-        new_attrs = dict(_itertools.chain(self.attrs.iteritems(), attrs.iteritems()))
-        new_contents = _itertools.chain(self.contents, contents)
-        
-        # build new tag
-        return Tag(self.name, *new_contents, **new_attrs)
+        # pre-process incoming user values
+        contents = list(cls.process_contents(*args))
+        attrs = dict(cls.process_attrs(**kwargs))
 
-    @staticmethod
-    def format_attr (name, value) :
-        """
-            Format a single HTML tag attribute
+        # XXX: use Python 2.6 keyword-only arguments instead?
+        options = dict(cls.process_opts(**kwargs))
 
-            >>> Tag.format_attr('name', 'value')
-            u'name="value"'
-            >>> Tag.format_attr('this', '<a"b>')
-            u'this="&lt;a&quot;b&gt;"'
-            >>> Tag.format_attr('xx', 1337)
-            u'xx="1337"'
+        return cls(_name, contents, attrs, **options)
+
+    def __init__ (self, name, contents, attrs, selfclosing=None, whitespace_sensitive=None) :
+        """
+            Initialize internal Tag state with the given tag identifier, flattened list of content items, dict of
+            attributes and dict of options.
+
+                selfclosing             - set to False to render empty tags as <foo></foo> instead of <foo /> 
+                                          (for XHTML -> HTML compatibility)
+
+                whitespace_sensitive    - do not indent tag content onto separate rows, render the full tag as a single
+                                          row
+
+            Use the build() factory function to build Tag objects using Python's function call argument semantics.
+            
+            The tag name is used a pure string identifier:
+
+            >>> Tag(u'foo', [], {})
+            tag('foo')
+            >>> Tag(u'\\xE4', [], {})
+            Traceback (most recent call last):
+                ...
+            UnicodeEncodeError: 'ascii' codec can't encode character u'\\xe4' in position 0: ordinal not in range(128)
+
+            Contents have their order preserved:
+
+            >>> Tag('foo', [1, 2], {})
+            tag('foo', 1, 2)
+            >>> Tag('foo', [2, 1], {})
+            tag('foo', 2, 1)
+
+            Attributes can be given:
+            
+            >>> Tag('foo', [], dict(foo='bar'))
+            tag('foo', foo='bar')
+
+            Options can be given:
+
+            >>> print Tag('foo', [], {}, selfclosing=False)
+            <foo></foo>
+        """
+        
+        self.name = str(name)
+        self.contents = contents
+        self.attrs = attrs
+
+        # options
+        self.selfclosing = selfclosing
+        self.whitespace_sensitive = whitespace_sensitive
+
+    def __call__ (self, *args, **kwargs) :
+        """
+            Return a new Tag as a copy of this tag, but with the given additional attributes/contents.
+
+            The same rules for function positional/keyword arguments apply as for build()
+
+            >>> Tag.build('foo')('bar')
+            tag('foo', u'bar')
+            >>> Tag.build('a', href='index.html')("Home")
+            tag('a', u'Home', href=u'index.html')
+
+            New contents and attributes can be given freely, using the same rules as for Tag.build:
+
+            >>> Tag.build('bar', None)(5, foo=None, class_='bar')
+            tag('bar', u'5', class=u'bar')
+
+            Tag contents accumulate in order:
+
+            >>> Tag.build('a')('b', ['c'])('d')
+            tag('a', u'b', u'c', u'd')
+
+            Each Tag is immutable, so the called Tag isn't changed, but rather a copy is returned:
+
+            >>> t1 = Tag.build('a'); t2 = t1('b'); t1
+            tag('a')
+
+            Attribute values are replaced:
+
+            >>> Tag.build('foo', a=2)(a=3)
+            tag('foo', a=u'3')
+
+            Options are also supported:
+
+            >>> list(Tag.build('foo')(bar='quux', _selfclosing=False))
+            [u'<foo bar="quux"></foo>']
         """
 
-        return u'%s="%s"' % (name, escape(unicode(value), True))
- 
+        # accumulate contents
+        contents = self.contents + list(self.process_contents(*args))
+
+        # merge attrs
+        attrs = dict(self.attrs)
+        attrs.update(self.process_attrs(**kwargs))
+
+        # options
+        opts = dict(
+            selfclosing = self.selfclosing,
+            whitespace_sensitive = self.whitespace_sensitive,
+        )
+        opts.update(self.process_opts(**kwargs))
+
+        # build updated tag
+        return Tag(self.name, contents, attrs, **opts)
+
     def render_attrs (self) :
         """
             Return the HTML attributes string
 
-            >>> Tag('x', foo=5, bar='<').render_attrs()
+            >>> Tag.build('x', foo=5, bar='<', quux=None).render_attrs()
             u'foo="5" bar="&lt;"'
+            >>> Tag.build('x', foo='a"b').render_attrs()
+            u'foo=\\'a"b\\''
         """
 
-        return " ".join(self.format_attr(n, v) for n, v in self.attrs.iteritems() if not n.startswith('_'))
+        return " ".join(
+            (
+                u'%s=%s' % (name, saxutils.quoteattr(value))
+            ) for name, value in self.attrs.iteritems()
+        )
+
 
     def render_raw_lines (self, indent=u'\t') :
         """
             Render the tag and indented content
 
-            >>> list(Tag('xx', 'yy', zz='foo').render_raw_lines(indent=' '))
-            [u'<xx zz="foo">', u' yy', u'</xx>']
         """
         
         # opts
@@ -298,10 +420,125 @@
         else :
             # singleton tag
             yield u"<%s%s />" % (self.name, attrs_stuff)
-    
+ 
+    def flatten_items (self, indent=1) :
+        """
+            Flatten our content into a series of indented lines.
+
+            >>> list(Tag.build('tag', 5).flatten_items())
+            [(1, u'5')]
+            >>> list(Tag.build('tag', 'line1', 'line2').flatten_items())
+            [(1, u'line1'), (1, u'line2')]
+
+            Nested :
+            >>> list(Tag.build('tag', 'a', Tag.build('b', 'bb'), 'c').flatten_items())
+            [(1, u'a'), (1, u'<b>'), (2, u'bb'), (1, u'</b>'), (1, u'c')]
+            >>> list(Tag.build('tag', Tag.build('hr'), Tag.build('foo')('bar')).flatten_items())
+            [(1, u'<hr />'), (1, u'<foo>'), (2, u'bar'), (1, u'</foo>')]
+        """
+
+        for item in self.contents :
+            if isinstance(item, Renderable) :
+                # recursively flatten items
+                for line_indent, line in item.flatten() :
+                    # indented
+                    yield indent + line_indent, line
+
+            else :
+                # render HTML-escaped raw value
+                # escape raw values
+                yield indent, saxutils.escape(item)
+   
+    def flatten (self) :
+        """
+            Render the tag and all content as a flattened series of indented lines.
+            
+            Empty tags collapse per default:
+
+            >>> list(Tag.build('foo').flatten())
+            [(0, u'<foo />')]
+            >>> list(Tag.build('bar', id=5).flatten())
+            [(0, u'<bar id="5" />')]
+
+            Values are indented inside the start tag:
+
+            >>> list(Tag.build('foo', 'bar', a=5).flatten())
+            [(0, u'<foo a="5">'), (1, u'bar'), (0, u'</foo>')]
+            
+            Nested tags are further indented:
+
+            >>> list(Tag.build('1', '1.1', Tag.build('1.2', '1.2.1'), '1.3', a=5).flatten())
+            [(0, u'<1 a="5">'), (1, u'1.1'), (1, u'<1.2>'), (2, u'1.2.1'), (1, u'</1.2>'), (1, u'1.3'), (0, u'</1>')]
+
+            Empty tags are rendered with a separate closing tag on the same line, if desired:
+
+            >>> list(Tag.build('foo', _selfclosing=False).flatten())
+            [(0, u'<foo></foo>')]
+            >>> list(Tag.build('foo', src='asdf', _selfclosing=False).flatten())
+            [(0, u'<foo src="asdf"></foo>')]
+
+            Tags that are declared as whitespace-sensitive are collapsed onto the same line:
+
+            >>> list(Tag.build('foo', _whitespace_sensitive=True).flatten())
+            [(0, u'<foo />')]
+            >>> list(Tag.build('foo', _whitespace_sensitive=True, _selfclosing=False).flatten())
+            [(0, u'<foo></foo>')]
+            >>> list(Tag.build('foo', 'bar', _whitespace_sensitive=True).flatten())
+            [(0, u'<foo>bar</foo>')]
+            >>> list(Tag.build('foo', 'bar\\nasdf\\tx', _whitespace_sensitive=True).flatten())
+            [(0, u'<foo>bar\\nasdf\\tx</foo>')]
+            >>> list(Tag.build('foo', 'bar', Tag.build('quux', 'asdf'), 'asdf', _whitespace_sensitive=True).flatten())
+            [(0, u'<foo>bar<quux>asdf</quux>asdf</foo>')]
+
+            Embedded HTML given as string values is escaped:
+
+            >>> list(Tag.build('foo', '<asdf>'))
+            [u'<foo>', u'\\t&lt;asdf&gt;', u'</foo>']
+
+            Embedded quotes in attribute values are esacaped:
+
+            >>> list(Tag.build('foo', style='ok;" onload="...'))
+            [u'<foo style=\\'ok;" onload="...\\' />']
+            >>> list(Tag.build('foo', style='ok;\\'" onload=..."\\''))
+            [u'<foo style="ok;\\'&quot; onload=...&quot;\\'" />']
+        """
+
+        # optional attr spec
+        if self.attrs :
+            attrs = " " + self.render_attrs()
+
+        else :
+            attrs = ""
+
+        if not self.contents and self.selfclosing is False :
+            # empty tag, but don't use the self-closing syntax..
+            yield 0, u"<%s%s></%s>" % (self.name, attrs, self.name)
+
+        elif not self.contents  :
+            # self-closing xml tag
+            # do note that this is invalid HTML, and the space before the / is relevant for parsing it as HTML
+            yield 0, u"<%s%s />" % (self.name, attrs)
+
+        elif self.whitespace_sensitive :
+            # join together each line for each child, discarding the indent
+            content = u''.join(line for indent, line in self.flatten_items())
+
+            # render full tag on a single line
+            yield 0, u"<%s%s>%s</%s>" % (self.name, attrs, content, self.name)
+
+        else :
+            # start tag
+            yield 0, u"<%s%s>" % (self.name, attrs)
+
+            # contents, indented one level below the start tag
+            for indent, line in self.flatten_items(indent=1) :
+                yield indent, line
+
+            # close tag
+            yield 0, u"</%s>" % (self.name, )
 
     def __repr__ (self) :
-        return 'Tag(%s)' % ', '.join(
+        return 'tag(%s)' % ', '.join(
             [
                 repr(self.name)
             ] + [
@@ -311,96 +548,60 @@
             ]
         )
 
-class Text (IRenderable) :
-    """
-        Raw HTML text
-    """
-
-    def __init__ (self, line) :
-        """
-            Initialize to render as the given lines
-        """
-
-        self.lines = [line]
-    
-    def render_raw_lines (self, indent=u'\t') :
-        return self.lines
+# factory function for Tag
+tag = Tag.build
 
-class Document (IRenderable) :
-    """
-        A full XHTML document with XML header, doctype, head and body.
-
-        XXX: current rendering is a bit of a kludge
 
-        <?xml version="..." encoding="..." ?>
-        <!DOCTYPE ...>
-        
-        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
-            <head>
-                ...
-            </head>
-            <body>
-                ...
-            </body>
-        </html>
+class Document (Renderable) :
+    """
+        A full XHTML 1.0 document with optional XML header, doctype, html[@xmlns].
+
+        >>> list(Document(tags.html('...')))
+        [u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">', u'<html xmlns="http://www.w3.org/1999/xhtml">', u'\\t...', u'</html>']
     """
 
-    def __init__ (self, 
-        head, body,
-        xml_version='1.0', xml_encoding='utf-8', 
+    def __init__ (self, root,
         doctype='html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"',
-        html_xmlns='http://www.w3.org/1999/xhtml', html_lang='en'
+        html_xmlns='http://www.w3.org/1999/xhtml',
+        xml_version=None, xml_encoding=None, 
     ) :
-        # store
-        self.xml_version = xml_version
-        self.xml_encoding = xml_encoding
-        self.doctype = doctype
-        
-        # build the document
-        self.document = Tag('html', **{'xmlns': html_xmlns, 'xml:lang': html_lang})(
-            Tag('head', head),
-            Tag('body', body),
-        )
-
-    def render_raw_lines (self, **render_opts) :
-        """
-            Render the two header lines, and then the document
-        """
+        # add xmlns attr to root node
+        self.root = root(xmlns=html_xmlns)
 
-        yield '<?xml version="%s" encoding="%s" ?>' % (self.xml_version, self.xml_encoding)
-        yield '<!DOCTYPE %s>' % (self.doctype)
+        # store
+        self.doctype = doctype
+        self.xml_declaration = {}
 
-        for line in self.document.render_raw_lines(**render_opts) :
-            yield line
-    
-    def _check_encoding (self, encoding) :
-        if encoding and encoding != self.xml_encoding :
-            raise ValueError("encoding mismatch: %r should be %r" % (encoding, self.xml_encoding))
+        if xml_version :
+            self.xml_declaration['version'] = xml_version
 
-    def render_str (self, encoding=None, **render_opts) :
+        if xml_encoding :
+            self.xml_declaration['encoding'] = xml_encoding
+
+    def flatten (self) :
         """
-            Wrap render_str to verify that the right encoding is used
+            Return the header lines along with the normally formatted <html> tag
         """
-
-        self._check_encoding(encoding)
         
-        return super(XHTMLDocument, self).render_str(self.xml_encoding, **render_opts)
+        if self.xml_declaration :
+            yield 0, u'<?xml %s ?>' % (' '.join('%s="%s"' % kv for kv in self.xml_declaration.iteritems()))
 
-    def render_out (self, stream, encoding=None, **render_opts) :
-        """
-            Wrap render_out to verify that the right encoding is used
-        """
+        if self.doctype :
+            yield 0, u'<!DOCTYPE %s>' % (self.doctype)
 
-        self._check_encoding(encoding)
-        
-        return super(XHTMLDocument, self).render_out(stream, self.xml_encoding, **render_opts)
+        # <html>
+        for indent, line in self.root.flatten() :
+            yield indent, line
 
 class TagFactory (object) :
     """
         Build Tags with names give as attribute names
+        
+        >>> list(TagFactory().a(href='#')('Yay'))
+        [u'<a href="#">', u'\\tYay', u'</a>']
 
-        >>> str(TagFactory().raw("><")
-        '><'
+        >>> list(TagFactory().raw("><"))
+        [u'><']
     """
 
     # full XHTML document
@@ -413,18 +614,14 @@
         """
             Get a Tag object with the given name, but no contents
 
-            >>> TagFactory().a(href='bar')('quux')
-            Tag('a', 'quux', href='bar')
+            >>> TagFactory().a
+            tag('a')
         """
 
-        return Tag(name)
+        return Tag(name, [], {})
 
-# pretty names
-container = Container
-tag = Tag
+# static instance
 tags = TagFactory()
-raw = Text
-document = Document
 
 # testing
 if __name__ == '__main__' :