author Tero Marttila <>
Fri, 05 Jun 2009 17:25:58 +0300
changeset 53 14d73f544764
parent 51 0f39cb5e4b11
child 55 77abe8dca695
permissions -rw-r--r--
expressive HTML-rendering module with doctests
    Filesystem path handling

import os, os.path

class Node (object) :
        A filesystem object is basically just complicated representation of a path.
        On the plus side, it has a parent node and can handle unicode/binary paths.

    # the binary name
    fsname = None

    # the unicode name
    name = None
    def decode_fsname (self, fsname) :
            Decode the given raw byte string representing a filesystem name into an user-readable unicode name.

            XXX: currently just hardcoded as utf-8

        return fsname.decode('utf-8', 'replace')
    def encode_name (self, name) :
            Returns a suitable fsname for the given unicode name or strict ASCII str

            XXX: currently just hardcoded as utf-8
        # this should fail for non-ASCII str
        return name.encode('utf-8')

    def from_node (cls, node) :
            Construct from a Node object

        return cls(node.parent, node.fsname,, node.config)

    def __init__ (self, parent, fsname=None, name=None, config=None) :
            Initialize the node with a parent and both name/fsname.

            If not given, fsname is encoded from name, or name decoded from fsname, using encode/decode_name.

            If parent is given, but both fsname and name are None, then this node will be equal to the parent.

        assert not fsname or isinstance(fsname, str)

        if parent and not fsname and not name :
            # no name given -> we're the same as parent
            self.parent, self.config, self.fsname, = parent.parent, parent.config, parent.fsname,

        else :
            # store
            self.parent = parent
            # config, either as given, or copy from parent
            if config :
                self.config = config

            else :
                self.config = parent.config
            # fsname
            if fsname :
                self.fsname = fsname

            else :
                self.fsname = self.encode_name(name)
            # name
            if name :
       = name

            else :
       = self.decode_fsname(fsname)
    def subnode (self, name) :
            Returns a Node object representing the given name behind this node.

            The name should either be a plain ASCII string or unicode object.
        return Node(self, name=name)
    def path (self) :
            Build and return the real filesystem path for this node
        # build using parent path and our fsname
        return os.path.join(self.parent.path, self.fsname)
    def unicodepath (self) :
            Build and return the fake unicode filesystem path for this node
        # build using parent unicodepath and our name
        return os.path.join(self.parent.path,
    def exists (self) :
            Tests if this node exists on the physical filesystem

        return os.path.exists(self.path)

    def is_dir (self) :
            Tests if this node represents a directory on the filesystem

        return os.path.isdir(self.path)

    def is_file (self) :
            Tests if this node represents a normal file on the filesystem

        return os.path.isfile(self.path)
    def __str__ (self) :
            Returns the raw filesystem path

        return self.path

    def __unicode__ (self) :
            Returns the human-readable path

        return self.unicodepath
    def __repr__ (self) :
            Returns a str representing this dir

        return "Node(%r, %r)" % (self.parent.path, self.fsname)
    def __cmp__ (self) :
            Comparisons between Nodes

        return cmp((self.parent,, (other.parent,

class File (Node) :
        A file. Simple, eh?

    def basename (self) :
            Returns the "base" part of this file's name, i.e. the filename without the extension

        basename, _ = os.path.splitext(

        return basename
    def fileext (self) :
            Returns the file extension part of the file's name, without any leading dot

        _, fileext = os.path.splitext(

        return fileext.rstrip('.')

    def matchext (self, ext_list) :
            Tests if this file's extension is part of the recognized list of extensions
        return (self.fileext.lower() in ext_list)

class Directory (Node) :
        A directory is a node that contains other nodes.

    # a list of (test_func, node_type) tuples for use by children() to build subnodes with
    NODE_TYPES = None
    def subdir (self, name) :
            Returns a Directory object representing the name underneath this dir

        return Directory(self, name=name)
    def subfile (self, name) :
            Returns a File object representing the name underneath this dir

        return Directory(self, name=name)

    def listdir (self, skip_dotfiles=True) :
            Yield a series of raw fsnames for nodes in this dir
        # expressed 
        return (fsname for fsname in os.listdir(self.path) if not (skip_dotfiles and fsname.startswith('.')))

    def child_nodes (self, skip_dotfiles=True) :
            Yield a series of nodes contained in this dir

        return (Node(self, fsname) for fsname in self.listdir(skip_dotfiles))

    def __iter__ (self) :
            Iterating over a Directory yields sub-Nodes.

            Dotfiles are skipped.
        return self.childnodes()

    def root_path (self) :
            Build and return a relative path to the root of this dir tree
        # build using parent root_path
        return os.path.join('..', self.parent.root_path)
    def children (self) :
            Yield a series of Node subclasses representing the items in this dir.
            This uses self.NODE_TYPES to figure out what kind of sub-node object to build. This should be a list of
                (test_func, node_type)

            tuples, of which the first is a function that takes a Node as it's sole argument, and returns a boolean.
            For the first test_func which returns True, a Node-subclass object is constructed using node_type.from_node.

        for node in self :
            # figure out what type to use
            for test_func, node_type in self.NODE_TYPES :
                if test_func(node) :
                    # matches, build
                    yield node_type.from_node(node)

            else :
                # unknown file type!
                raise Exception("unrecongized type of file: %s" % node);

# assign default Directory.NODE_TYPES
Directory.NODE_TYPES = [
    (Node.is_dir,   Directory),
    (Node.is_file,  File),

class Root (Directory) :
        A special Directory that overrides the Node methods to anchor the recursion/etc at some 'real' filesystem path.

    def __init__ (self, fspath, config) :
            Construct the directory tree root at the given 'real' path, which must be a raw str

        # abuse Node's concept of a "name" a bit
        super(Root, self).__init__(None, fspath, config=config)

    def path (self) :
            Returns the raw path

        return self.fsname

    def unicodepath (self) :
            Returns the raw decoded path


    def root_path (self) :
            Returns an empty string representing this dir

        return ''

    def __repr__ (self) :
            Override Node.__repr__ to not use self.parent.path

        return "Root(%r)" % self.fsname