degal/filesystem.py
author Tero Marttila <terom@fixme.fi>
Fri, 05 Jun 2009 23:42:03 +0300
changeset 76 e22d9f699081
parent 68 49388e9fd5fa
child 77 2a53c5ade434
permissions -rw-r--r--
misc. fixes
"""
    Filesystem path handling
"""

import os, os.path, errno
import codecs, shutil
import itertools

from utils import lazy_load

class Node (object) :
    """
        A filesystem object is basically just complicated representation of a path.
        
        On the plus side, it has a parent node and can handle unicode/binary paths.
    """

    # the binary name
    fsname = None

    # the unicode name
    name = None
    
    def decode_fsname (self, fsname) :
        """
            Decode the given raw byte string representing a filesystem name into an user-readable unicode name.

            XXX: currently just hardcoded as utf-8
        """

        return fsname.decode('utf-8', 'replace')
    
    def encode_name (self, name) :
        """
            Returns a suitable fsname for the given unicode name or strict ASCII str

            XXX: currently just hardcoded as utf-8
        """
        
        # this should fail for non-ASCII str
        return name.encode('utf-8')

    @classmethod
    def from_node (cls, node) :
        """
            Construct from a Node object
        """

        return cls(node.parent, node.fsname, node.name, node.config)

    def __init__ (self, parent, fsname=None, name=None, config=None) :
        """
            Initialize the node with a parent and both name/fsname.

            If not given, fsname is encoded from name, or name decoded from fsname, using encode/decode_name.

            If parent is given, but both fsname and name are None, then this node will be equal to the parent.
        """

        assert not fsname or isinstance(fsname, str)

        if parent and not fsname and not name :
            # no name given -> we're the same as parent
            self.parent, self.config, self.fsname, self.name = parent.parent, parent.config, parent.fsname, parent.name

        else :
            # store
            self.parent = parent
            
            # config, either as given, or copy from parent
            if config :
                self.config = config
            
            elif parent : # XXX: else :
                self.config = parent.config
     
            # fsname
            if fsname :
                self.fsname = fsname

            else :
                self.fsname = self.encode_name(name)
           
            # name
            if name :
                self.name = name

            else :
                self.name = self.decode_fsname(fsname)
        
    def subnode (self, name) :
        """
            Returns a Node object representing the given name behind this node.

            The name should either be a plain ASCII string or unicode object.
        """
        
        return Node(self, name=name)
 
    def nodepath (self) :
        """
            Returns the path of nodes from this node to the root node, inclusive
        """

        return self.parent.nodepath() + [self]

    @lazy_load
    def path (self) :
        """
            Return the machine-readable root-path for this node
        """
        
        # build using parent path and our fsname
        return os.path.join(self.parent.path, self.fsname)
    
    @lazy_load
    def unicodepath (self) :
        """
            Return the human-readable root-path for this node
        """
        
        # build using parent unicodepath and our name
        return os.path.join(self.parent.path, self.name)
   
    def path_segments (self, unicode=True) :
        """
            Return a series of single-level names describing the path from the root to this node
        """

        return self.parent.path_segments(unicode=unicode) + [self.name if unicode else self.fsname]

    def exists (self) :
        """
            Tests if this node exists on the physical filesystem
        """

        return os.path.exists(self.path)

    def is_dir (self) :
        """
            Tests if this node represents a directory on the filesystem
        """

        return os.path.isdir(self.path)

    def is_file (self) :
        """
            Tests if this node represents a normal file on the filesystem
        """

        return os.path.isfile(self.path)

    def test (self) :
        """
            Tests that this node exists. Raises an error it not, otherwise, returns the node itself
        """

        if not self.exists() :
            raise Exception("Filesystem node does not exist: %s" % self)

        return self
    
    def path_from (self, node) :
        """
            Returns a relative path to this node from the given node.

            This is the same as path_to, but just reversed.
        """
        
        return node.path_to(self)

    def path_to (self, node) :
        """
            Returns a relative path from this node to the given node
        """

        # get real paths for both
        from_path = self.nodepath()
        to_path = node.nodepath()
        pivot = None

        # reduce common prefix
        while from_path[0] == to_path[0] :
            from_path.pop(0)
            pivot = to_path.pop(0)

        # build path
        return Path(*itertools.chain(reversed(from_path), [pivot], to_path))

    def stat (self, soft=False) :
        """
            Returns the os.stat struct for this node.
            
            If `soft` is given, returns None if this node doesn't exist
        """

        try :
            return os.stat(self.path)

        except OSError, e :
            # trap ENOENT for soft
            if soft and e.errno == errno.ENOENT :
                return None

            else :
                raise
    
    # alias str/unicode
    __str__ = path
    __unicode__ = unicodepath
    
    def __repr__ (self) :
        """
            Returns a str representing this dir
        """

        return "Node(%r, %r)" % (self.parent.path, self.fsname)
    
    def __cmp__ (self, other) :
        """
            Comparisons between Nodes
        """

        return cmp((self.parent, self.name), (other.parent if self.parent else None, other.name))

class Path (object) :
    """
        A Path is a sequence of Nodes that form a path through the node tree.

        Each node must either be the parent or the child of the following node.
    """

    def __init__ (self, *nodes) :
        """
            Initialize with the given node path
        """

        self.nodes = nodes
    
    def subpath (self, *nodes) :
        """
            Returns a new path with the given node(s) appended
        """

        return Path(*itertools.chain(self.nodes, nodes))
    
    def path_segments (self, unicode=True) :
        """
            Yields a series of physical path segments for this path
        """

        prev = None

        for node in self.nodes :
            if not prev :
                # ignore
                pass

            elif prev.parent and prev.parent == node :
                # up a level
                yield u'..' if unicode else '..'
            
            else :
                # down a level
                yield node.name if unicode else node.fsname
            
            # chained together
            prev = node

    def __iter__ (self) :
        """
            Iterate over the nodes
        """

        return iter(self.nodes)
    
    def __unicode__ (self) :
        """
            Returns the unicode human-readable path
        """

        return os.path.join(*self.path_segments(unicode=True))
    
    def __str__ (self) :
        """
            Returns the binary machine-readable path
        """

        return os.path.join(*self.path_segments(unicode=False))
    
    def __repr__ (self) :
        return "Path(%s)" % ', '.join(repr(segment) for segment in self.path_segments(unicode=False))

class File (Node) :
    """
        A file. Simple, eh?
    """

    @property
    def basename (self) :
        """
            Returns the "base" part of this file's name, i.e. the filename without the extension
        """

        basename, _ = os.path.splitext(self.name)

        return basename
    
    @property
    def fileext (self) :
        """
            Returns the file extension part of the file's name, without any leading dot
        """

        _, fileext = os.path.splitext(self.name)

        return fileext.rstrip('.')

    def matchext (self, ext_list) :
        """
            Tests if this file's extension is part of the recognized list of extensions
        """
        
        return (self.fileext.lower() in ext_list)
    
    def test (self) :
        """
            Tests that this file exists as a file. Raises an error it not, otherwise, returns itself
        """

        if not self.is_file() :
            raise Exception("File does not exist: %s" % self)

        return self

    def open (self, mode='r', encoding=None, errors=None, bufsize=None) :
        """
            Wrapper for open/codecs.open.

            Raises an error if read_only mode is set and mode contains any of 'wa+'
        """

        if self.config.read_only and any((c in mode) for c in 'wa+') :
            raise Exception("Unable to open file for %s due to read_only mode: %s" % (mode, self))

        if encoding :
            return codecs.open(self.path, mode, encoding, errors, bufsize)

        else :
            return open(self.path, mode, bufsize)

    def open_write (self, encoding=None, errors=None, bufsize=None) :
        """
            Open for write using open('w').
        """

        return self.open('w', encoding, errors, bufsize)

    def copy_from (self, file) :
        """
            Replace this file with a copy of the given file with default permissions.

            Raises an error if read_only mode is set.

            XXX: accept mode
        """

        if self.config.read_only :
            raise Exception("Not copying file as read_only mode is set: %s -> %s" % (file, self))
        
        # perform the copy
        shutil.copyfile(file.path, self.path)

class Directory (Node) :
    """
        A directory is a node that contains other nodes.
    """

    # a list of (test_func, node_type) tuples for use by children() to build subnodes with
    NODE_TYPES = None
   
    def subdir (self, name, create=False) :
        """
            Returns a Directory object representing the name underneath this dir.

            If the create option is given, the directory will be created if it does not exist. Note that this will
            raise an error if read_only mode is set
        """

        subdir = Directory(self, name=name)

        if create and not subdir.is_dir() :
            # create it!
            subdir.mkdir()

        return dir
    
    def test_subdir (self, name, create=False) :
        """
            Test for the presence of a subdir with the given name, and possibly return it, or None.

            Returns a (exists, created, dir) tuple.
            
            XXX: ugly, not used
        """

        subdir = Directory(self, name=name)
        
        # already exists?
        if subdir.is_dir() :
            if create :
                # create it!
                subdir.mkdir()
                
                # didn't exist, did create
                return True, True, subdir
            
            else :
                # doesn't exist, don't create
                return False, False, subdir

        else :
            # already existing
            return True, False, subdir


    def subfile (self, name) :
        """
            Returns a File object representing the name underneath this dir
        """

        return Directory(self, name=name)

    def test (self) :
        """
            Tests that this dir exists as a dir. Raises an error it not, otherwise, returns itself
        """

        if not self.is_dir() :
            raise Exception("Directory does not exist: %s" % self)

        return self

    def mkdir (self) :
        """
            Create this directory with default permissions.

            This will fail if read_only mode is set
            
            XXX: mode argument
        """
        
        if self.config.read_only :
            # forbidden
            raise Exception("Unable to create dir due to read_only mode: %s" % self)
        
        # do it
        os.mkdir(self.path)

    def listdir (self, skip_dotfiles=True) :
        """
            Yield a series of raw fsnames for nodes in this dir
        """
        
        # expressed 
        return (fsname for fsname in os.listdir(self.path) if not (skip_dotfiles and fsname.startswith('.')))

    def subnodes (self, skip_dotfiles=True, sort=True) :
        """
            Yield a series of Nodes contained in this dir.

            If skip_dotfiles is given, nodes that begin with a . are omitted.

            If `sort` is given, the returned nodes will be in sorted order.
        """

        iter = (Node(self, fsname) for fsname in self.listdir(skip_dotfiles))

        if sort :
            return sorted(iter)

        else :
            return iter

    def __iter__ (self) :
        """
            Iterating over a Directory yields sub-Nodes.

            Dotfiles are skipped.
        """
        
        return self.subnodes()

    @property
    def root_path (self) :
        """
            Build and return a relative path to the root of this dir tree

            XXX: move to node
        """
        
        # build using parent root_path
        return os.path.join('..', self.parent.root_path)
 
    def children (self) :
        """
            Yield a series of Node subclasses representing the items in this dir.
            
            This uses self.NODE_TYPES to figure out what kind of sub-node object to build. This should be a list of
                (test_func, node_type)

            tuples, of which the first is a function that takes a Node as it's sole argument, and returns a boolean.
            For the first test_func which returns True, a Node-subclass object is constructed using node_type.from_node.

            XXX: never used
        """

        for node in self :
            # figure out what type to use
            for test_func, node_type in self.NODE_TYPES :
                if test_func(node) :
                    # matches, build
                    yield node_type.from_node(node)

            else :
                # unknown file type!
                raise Exception("unrecongized type of file: %s" % node);

# assign default Directory.NODE_TYPES
Directory.NODE_TYPES = [
    (Node.is_dir,   Directory),
    (Node.is_file,  File),
]


class Root (Directory) :
    """
        A special Directory that overrides the Node methods to anchor the recursion/etc at some 'real' filesystem path.
    """

    # XXX: config needs a default
    def __init__ (self, fspath, config=None) :
        """
            Construct the directory tree root at the given 'real' path, which must be a raw str
        """

        # abuse Node's concept of a "name" a bit
        super(Root, self).__init__(None, fspath)
        
        # store our config
        self.config = config

    def nodepath (self) :
        """
            Just return ourself
        """
        
        return [self]

    @property
    def path (self) :
        """
            Returns the raw path
        """

        return self.fsname

    @property
    def unicodepath (self) :
        """
            Returns the raw decoded path
        """

        return self.name

    @property
    def root_path (self) :
        """
            Returns an empty string representing this dir
        """

        return ''

    def path_segments (self, unicode=True) :
        """
            No path segments
        """

        return []

    def __repr__ (self) :
        """
            Override Node.__repr__ to not use self.parent.path
        """

        return "Root(%r)" % self.fsname