terom@51: """ terom@51: Filesystem path handling terom@51: """ terom@51: terom@97: import os, os.path, errno, stat terom@60: import codecs, shutil terom@60: import itertools terom@60: terom@60: from utils import lazy_load terom@51: terom@97: class NodeError (Exception) : terom@97: """ terom@97: General exception class for errors associated with some specific node terom@97: """ terom@97: terom@97: def __init__ (self, node, message) : terom@97: super(NodeError, self).__init__(message) terom@97: terom@97: self.node = node terom@97: terom@97: def __str__ (self) : terom@97: return "%s: %s" % (self.node, self.message) terom@97: terom@97: def __unicode__ (self) : terom@97: return u"%s: %s" % (self.node, self.message) terom@97: terom@97: def __repr__ (self) : terom@97: return "NodeError(%r, %r)" % (self.node, self.message) terom@97: terom@97: class NodeErrno (NodeError) : terom@97: """ terom@97: OSError/errno errors for nodes terom@97: """ terom@97: terom@97: def __init__ (self, node, errno) : terom@97: """ terom@97: Converts the given errno into an error message and uses that as the exception message terom@97: """ terom@97: terom@97: super(NodeErrno, self).__init__(node, os.strerror(errno)) terom@97: terom@51: class Node (object) : terom@51: """ terom@51: A filesystem object is basically just complicated representation of a path. terom@51: terom@51: On the plus side, it has a parent node and can handle unicode/binary paths. terom@51: """ terom@51: terom@51: # the binary name terom@51: fsname = None terom@51: terom@51: # the unicode name terom@51: name = None terom@51: terom@51: def decode_fsname (self, fsname) : terom@51: """ terom@51: Decode the given raw byte string representing a filesystem name into an user-readable unicode name. terom@51: terom@51: XXX: currently just hardcoded as utf-8 terom@79: terom@79: >>> Node(None, 'foo').decode_fsname('\xa5\xa6') terom@79: u'\\ufffd\\ufffd' terom@51: """ terom@51: terom@51: return fsname.decode('utf-8', 'replace') terom@51: terom@51: def encode_name (self, name) : terom@51: """ terom@51: Returns a suitable fsname for the given unicode name or strict ASCII str terom@51: terom@51: XXX: currently just hardcoded as utf-8 terom@79: terom@79: >>> Node(None, 'foo').encode_name(u'ab') terom@79: 'ab' terom@51: """ terom@51: terom@51: # this should fail for non-ASCII str terom@51: return name.encode('utf-8') terom@51: terom@51: def __init__ (self, parent, fsname=None, name=None, config=None) : terom@51: """ terom@51: Initialize the node with a parent and both name/fsname. terom@51: terom@51: If not given, fsname is encoded from name, or name decoded from fsname, using encode/decode_name. terom@51: terom@79: If parent is given, but both fsname and name are None, then this node will be cloned from the parent. terom@79: terom@79: >>> Node(Root('/'), 'foo') terom@79: Node('/', 'foo') terom@79: >>> Node(Node(Root('/'), 'bar')) terom@79: Node('/', 'bar') terom@79: >>> Node(None, fsname='foo\xa5').name terom@79: u'foo\\ufffd' terom@79: >>> Node(None, name=u'foo').fsname terom@79: 'foo' terom@51: """ terom@79: terom@79: # fsname must not be an unicode string terom@51: assert not fsname or isinstance(fsname, str) terom@51: terom@51: if parent and not fsname and not name : terom@51: # no name given -> we're the same as parent terom@51: self.parent, self.config, self.fsname, self.name = parent.parent, parent.config, parent.fsname, parent.name terom@51: terom@51: else : terom@51: # store terom@51: self.parent = parent terom@51: terom@51: # config, either as given, or copy from parent terom@51: if config : terom@51: self.config = config terom@76: terom@79: elif parent : terom@51: self.config = parent.config terom@79: terom@79: else : terom@79: # XXX: no config terom@79: self.config = None terom@51: terom@51: # fsname terom@51: if fsname : terom@51: self.fsname = fsname terom@51: terom@51: else : terom@51: self.fsname = self.encode_name(name) terom@51: terom@51: # name terom@51: if name : terom@79: self.name = unicode(name) terom@51: terom@51: else : terom@51: self.name = self.decode_fsname(fsname) terom@51: terom@51: def subnode (self, name) : terom@51: """ terom@51: Returns a Node object representing the given name behind this node. terom@51: terom@51: The name should either be a plain ASCII string or unicode object. terom@79: terom@79: >>> Node(Root('/'), 'foo').subnode('bar') terom@79: Node('/foo', 'bar') terom@51: """ terom@51: terom@51: return Node(self, name=name) terom@51: terom@60: def nodepath (self) : terom@60: """ terom@60: Returns the path of nodes from this node to the root node, inclusive terom@117: terom@79: >>> list(Node(Root('/'), 'foo').subnode('bar').nodepath()) terom@79: [Root('/'), Node('/', 'foo'), Node('/foo', 'bar')] terom@117: terom@117: XXX: circular reference hell? terom@60: """ terom@79: terom@79: # recursive generator terom@79: for node in self.parent.nodepath() : terom@79: yield node terom@60: terom@79: yield self terom@60: terom@60: @lazy_load terom@51: def path (self) : terom@51: """ terom@60: Return the machine-readable root-path for this node terom@79: terom@79: >>> Node(Root('/'), 'foo').subnode('bar').path terom@79: '/foo/bar' terom@79: >>> Node(Root('/'), name=u'foo').path terom@79: '/foo' terom@79: >>> Node(Root('/'), fsname='\\x01\\x02').path terom@79: '/\\x01\\x02' terom@51: """ terom@51: terom@51: # build using parent path and our fsname terom@79: # XXX: rewrite using nodepath? terom@51: return os.path.join(self.parent.path, self.fsname) terom@51: terom@60: @lazy_load terom@51: def unicodepath (self) : terom@51: """ terom@60: Return the human-readable root-path for this node terom@79: terom@79: >>> Node(Root('/'), 'foo').subnode('bar').unicodepath terom@79: u'/foo/bar' terom@79: >>> Node(Root('/'), name=u'foo').unicodepath terom@79: u'/foo' terom@79: >>> Node(Root('/'), fsname='\\x01\\x02').unicodepath terom@79: u'/??' terom@51: """ terom@51: terom@51: # build using parent unicodepath and our name terom@79: # XXX: rewrte using nodepath? terom@51: return os.path.join(self.parent.path, self.name) terom@51: terom@60: def path_segments (self, unicode=True) : terom@60: """ terom@79: Return a series of single-level names describing the path from the root to this node. terom@79: terom@79: If `unicode` is given, then the returned items will be the unicode names, otherwise, the binary names. terom@79: terom@79: >>> list(Node(Root('/'), 'foo').subnode('bar').path_segments()) terom@79: [u'/', u'foo', u'bar'] terom@79: >>> list(Node(Root('/'), 'foo').subnode('bar').path_segments(unicode=False)) terom@79: ['/', 'foo', 'bar'] terom@60: """ terom@60: terom@79: # iter terom@79: for segment in self.parent.path_segments(unicode=unicode) : terom@79: yield segment terom@79: terom@79: yield self.name if unicode else self.fsname terom@97: terom@97: @lazy_load terom@97: def _stat (self) : terom@97: """ terom@109: Cached low-level stat. terom@97: terom@97: Returns None on ENOENT (node doesn't exist). terom@97: """ terom@97: terom@97: try : terom@97: # syscall terom@97: return os.stat(self.path) terom@97: terom@97: except OSError, e : terom@97: # trap ENOENT for soft terom@118: if e.errno == errno.ENOENT : terom@97: return None terom@97: terom@97: else : terom@97: raise terom@97: terom@97: def stat (self, soft=False) : terom@97: """ terom@97: Returns the os.stat struct for this node. terom@97: terom@97: If `soft` is given, returns None if this node doesn't exist. terom@97: terom@97: These stats are not cached. terom@97: terom@97: >>> Root('/').stat() is not None terom@97: True terom@97: >>> Root('/nonexistant').stat(soft=True) is None terom@97: True terom@97: """ terom@97: terom@97: if self._stat : terom@97: # got it terom@97: return self._stat terom@97: terom@97: elif soft : terom@97: # doesn't exist terom@97: return None terom@97: terom@97: else : terom@97: # not found terom@97: raise NodeErrno(self, errno.ENOENT) terom@60: terom@51: def exists (self) : terom@51: """ terom@51: Tests if this node exists on the physical filesystem terom@79: terom@79: >>> Node(Root('.'), '.').exists() terom@79: True terom@79: >>> Node(Root('/'), 'nonexistant').exists() terom@79: False terom@51: """ terom@51: terom@97: return self._stat is not None terom@51: terom@51: def is_dir (self) : terom@51: """ terom@97: Tests if this node represents a directory on the physical filesystem. terom@97: terom@97: Returns False for non-existant files. terom@79: terom@79: >>> Node(Root('/'), '.').is_dir() terom@79: True terom@79: >>> Root('/').subnode('dev').subnode('null').is_dir() terom@79: False terom@51: """ terom@97: terom@97: return stat.S_ISDIR(self._stat.st_mode) if self._stat else False terom@51: terom@51: def is_file (self) : terom@51: """ terom@79: Tests if this node represents a normal file on the physical filesystem terom@79: terom@97: Returns False for non-existant files. terom@97: terom@79: >>> Node(Root('/'), '.').is_file() terom@79: False terom@79: >>> Root('/').subnode('dev').subnode('null').is_file() terom@79: False terom@51: """ terom@51: terom@97: return stat.S_ISREG(self._stat.st_mode) if self._stat else False terom@60: terom@60: def test (self) : terom@60: """ terom@60: Tests that this node exists. Raises an error it not, otherwise, returns the node itself terom@60: """ terom@60: terom@60: if not self.exists() : terom@109: raise NodeErrno(self, errno.ENOENT) terom@60: terom@60: return self terom@51: terom@79: def path_to (self, node) : terom@79: """ terom@79: Returns a relative path from this node to the given node terom@79: terom@79: XXX: doctests terom@79: """ terom@79: terom@79: # get real paths for both terom@79: from_path = list(self.nodepath()) terom@79: to_path = list(node.nodepath()) terom@79: pivot = None terom@79: terom@79: # reduce common prefix terom@79: while from_path and to_path and from_path[0] == to_path[0] : terom@79: from_path.pop(0) terom@79: pivot = to_path.pop(0) terom@79: terom@79: # full path terom@79: path = itertools.chain(reversed(from_path), [pivot] if pivot else (), to_path) terom@79: terom@79: # build path terom@79: return Path(*path) terom@79: terom@55: def path_from (self, node) : terom@55: """ terom@60: Returns a relative path to this node from the given node. terom@60: terom@60: This is the same as path_to, but just reversed. terom@60: """ terom@60: terom@60: return node.path_to(self) terom@60: terom@77: def __str__ (self) : terom@77: return self.path terom@77: terom@77: def __unicode__ (self) : terom@77: return self.unicodepath terom@51: terom@51: def __repr__ (self) : terom@51: return "Node(%r, %r)" % (self.parent.path, self.fsname) terom@51: terom@79: def __eq__ (self, other) : terom@79: """ terom@109: A Node is equal if compared to another node that shares the same name, and the parents are also equal. terom@79: """ terom@79: terom@79: return isinstance(other, Node) and self.name == other.name and self.parent == other.parent terom@79: terom@60: def __cmp__ (self, other) : terom@51: """ terom@109: Compare two Nodes or with None. This does not support comparisons with other kinds of objects. terom@79: terom@79: >>> cmp(Node(None, 'foo'), Node(None, 'foo')) terom@79: 0 terom@79: >>> cmp(Node(None, 'aaa'), Node(None, 'bbb')) terom@79: -1 terom@79: >>> cmp(Node(None, 'bbb'), Node(None, 'aaa')) terom@79: 1 terom@79: terom@79: >>> cmp(Node(Node(None, 'a'), 'aa'), Node(Node(None, 'a'), 'aa')) terom@79: 0 terom@79: >>> cmp(Node(Node(None, 'a'), 'aa'), Node(Node(None, 'a'), 'ab')) terom@79: -1 terom@79: >>> cmp(Node(Node(None, 'a'), 'ab'), Node(Node(None, 'a'), 'aa')) terom@79: 1 terom@79: terom@79: >>> cmp(Node(Node(None, 'a'), 'zz'), Node(Node(None, 'b'), 'aa')) terom@79: -1 terom@79: >>> cmp(Node(Node(None, 'a'), 'aa'), Node(Node(None, 'b'), 'zz')) terom@79: -1 terom@79: >>> cmp(Node(Node(None, 'z'), 'aa'), Node(Node(None, 'a'), 'zz')) terom@79: 1 terom@51: """ terom@51: terom@79: if other is None : terom@79: # arbitrary... terom@79: return 1 terom@79: terom@79: else : terom@79: return cmp((self.parent, self.name), (other.parent if self.parent else None, other.name)) terom@60: terom@60: class Path (object) : terom@60: """ terom@79: A Path is a sequence of Nodes that form a path through a Node tree rooted at some Root. terom@60: terom@60: Each node must either be the parent or the child of the following node. terom@79: terom@79: The first and last nodes may be Files, but all other objects must be Directories. terom@109: terom@109: XXX: better to keep Paths symbolic/relative? terom@139: XXX: welcome to Circular Reference Hell, a place has been reserved for you terom@60: """ terom@60: terom@60: def __init__ (self, *nodes) : terom@60: """ terom@79: Initialize with the given node path. terom@79: terom@79: The node path must not be empty. terom@60: """ terom@60: terom@60: self.nodes = nodes terom@60: terom@60: def subpath (self, *nodes) : terom@60: """ terom@60: Returns a new path with the given node(s) appended terom@60: """ terom@60: terom@60: return Path(*itertools.chain(self.nodes, nodes)) terom@60: terom@60: def path_segments (self, unicode=True) : terom@60: """ terom@79: Yields a series of physical path segments for this path. terom@79: terom@79: File -> Directory : terom@79: file.parent == dir -> nothing terom@79: terom@79: Directory -> Directory : terom@79: dir_1.parent == dir_2 -> '..' terom@79: dir_1 == dir_2.parent -> dir_2.name terom@79: terom@79: Directory -> File : terom@79: file.parent == dir -> file.name terom@79: terom@79: >>> root = Root('root'); Path(root, root.subfile('foo')) terom@79: Path('foo') terom@79: >>> root = Root('root'); Path(root, root.subdir('foo'), root.subdir('foo').subfile('bar')) terom@79: Path('foo', 'bar') terom@79: >>> root = Root('root'); Path(root.subfile('foo'), root) terom@79: Path('.') terom@79: >>> root = Root('root'); Path(root.subfile('foo'), root, root.subfile('bar')) terom@79: Path('bar') terom@79: >>> root = Root('root'); Path(root.subfile('foo')) terom@79: Path('foo') terom@60: """ terom@60: terom@79: # XXX: this logic should be implemented as methods in Node terom@79: terom@79: prev = prev_last = None terom@79: terom@79: # output directory components terom@60: for node in self.nodes : terom@60: if not prev : terom@79: # ignore the first item for now terom@60: pass terom@60: terom@79: elif isinstance(prev, File) : terom@79: # going from a file to its dir doesn't require anything terom@79: assert isinstance(node, Directory) and prev.parent == node terom@79: terom@79: elif isinstance(node, File) : terom@79: # final target, must come from a directory terom@79: assert node is self.nodes[-1] and (not prev or (isinstance(prev, Directory) and node.parent == prev)) terom@79: terom@79: elif prev.parent == node : terom@79: # going from a dir into the dir above it terom@79: yield '..' terom@79: terom@79: elif node.parent == prev : terom@79: # going from a dir into a dir underneath it terom@79: yield node.name if unicode else node.fsname terom@60: terom@60: else : terom@79: raise Exception("invalid path: %r" % (self.nodes, )) terom@79: terom@79: # chained together terom@79: prev_last = prev terom@79: prev = node terom@60: terom@79: # output final file/lone dir component terom@79: if isinstance(node, File) : terom@79: # the last/only node is the final file target and must *always* be output terom@79: yield node.name if unicode else node.fsname terom@60: terom@79: elif isinstance(node, Directory) and (prev_last is None or isinstance(prev_last, File)) : terom@79: assert prev_last.parent == node terom@79: terom@79: # going from a file into it's own directory is a direct reference terom@79: yield '.' terom@79: terom@60: def __iter__ (self) : terom@60: """ terom@60: Iterate over the nodes terom@60: """ terom@60: terom@60: return iter(self.nodes) terom@60: terom@60: def __unicode__ (self) : terom@60: """ terom@60: Returns the unicode human-readable path terom@60: """ terom@60: terom@60: return os.path.join(*self.path_segments(unicode=True)) terom@60: terom@60: def __str__ (self) : terom@60: """ terom@60: Returns the binary machine-readable path terom@60: """ terom@60: terom@60: return os.path.join(*self.path_segments(unicode=False)) terom@60: terom@60: def __repr__ (self) : terom@60: return "Path(%s)" % ', '.join(repr(segment) for segment in self.path_segments(unicode=False)) terom@51: terom@51: class File (Node) : terom@51: """ terom@51: A file. Simple, eh? terom@51: """ terom@51: terom@51: @property terom@51: def basename (self) : terom@51: """ terom@51: Returns the "base" part of this file's name, i.e. the filename without the extension terom@51: """ terom@51: terom@51: basename, _ = os.path.splitext(self.name) terom@51: terom@51: return basename terom@51: terom@51: @property terom@51: def fileext (self) : terom@51: """ terom@51: Returns the file extension part of the file's name, without any leading dot terom@51: """ terom@51: terom@51: _, fileext = os.path.splitext(self.name) terom@51: terom@77: # strip leading . terom@77: return fileext[1:] terom@51: terom@51: def matchext (self, ext_list) : terom@51: """ terom@51: Tests if this file's extension is part of the recognized list of extensions terom@51: """ terom@51: terom@51: return (self.fileext.lower() in ext_list) terom@55: terom@60: def test (self) : terom@60: """ terom@60: Tests that this file exists as a file. Raises an error it not, otherwise, returns itself terom@60: """ terom@60: terom@60: if not self.is_file() : terom@60: raise Exception("File does not exist: %s" % self) terom@60: terom@60: return self terom@60: terom@55: def open (self, mode='r', encoding=None, errors=None, bufsize=None) : terom@55: """ terom@60: Wrapper for open/codecs.open. terom@55: """ terom@55: terom@55: if encoding : terom@55: return codecs.open(self.path, mode, encoding, errors, bufsize) terom@55: terom@55: else : terom@77: return open(self.path, mode, *(arg for arg in (bufsize, ) if arg is not None)) terom@118: terom@118: def open_read (self, *args, **kwargs) : terom@118: """ Open for read using open('r') """ terom@118: terom@118: return self.open('r', *args, **kwargs) terom@51: terom@77: def open_write (self, *args, **kwargs) : terom@118: """ Open for write using open('w') """ terom@68: terom@77: return self.open('w', *args, **kwargs) terom@68: terom@60: def copy_from (self, file) : terom@60: """ terom@60: Replace this file with a copy of the given file with default permissions. terom@60: terom@60: XXX: accept mode terom@60: """ terom@60: terom@60: # perform the copy terom@60: shutil.copyfile(file.path, self.path) terom@60: terom@85: def newer_than (self, file) : terom@85: """ terom@93: Returns True if both files exist, and this file is newer than the given file. terom@85: """ terom@85: terom@97: if self._stat and file._stat : terom@97: return self._stat.st_mtime > file._stat.st_mtime terom@85: terom@85: else : terom@85: return None terom@85: terom@85: def older_than (self, file) : terom@85: """ terom@93: Returns True if both files exist, and this file is older than the given file. terom@93: """ terom@93: terom@93: # mirror terom@95: return file.newer_than(self) terom@85: terom@51: class Directory (Node) : terom@51: """ terom@51: A directory is a node that contains other nodes. terom@51: """ terom@51: terom@51: # a list of (test_func, node_type) tuples for use by children() to build subnodes with terom@51: NODE_TYPES = None terom@51: terom@55: def subdir (self, name, create=False) : terom@51: """ terom@55: Returns a Directory object representing the name underneath this dir. terom@55: terom@109: If the create option is given, the directory will be created if it does not exist. terom@51: """ terom@51: terom@60: subdir = Directory(self, name=name) terom@68: terom@60: if create and not subdir.is_dir() : terom@60: # create it! terom@60: subdir.mkdir() terom@60: terom@77: return subdir terom@51: terom@51: def subfile (self, name) : terom@51: """ terom@51: Returns a File object representing the name underneath this dir terom@51: """ terom@51: terom@77: return File(self, name=name) terom@51: terom@60: def test (self) : terom@60: """ terom@60: Tests that this dir exists as a dir. Raises an error it not, otherwise, returns itself terom@60: """ terom@60: terom@60: if not self.is_dir() : terom@60: raise Exception("Directory does not exist: %s" % self) terom@60: terom@60: return self terom@60: terom@55: def mkdir (self) : terom@55: """ terom@60: Create this directory with default permissions. terom@60: terom@55: XXX: mode argument terom@55: """ terom@55: terom@55: # do it terom@55: os.mkdir(self.path) terom@55: terom@51: def listdir (self, skip_dotfiles=True) : terom@51: """ terom@51: Yield a series of raw fsnames for nodes in this dir terom@51: """ terom@51: terom@51: # expressed terom@51: return (fsname for fsname in os.listdir(self.path) if not (skip_dotfiles and fsname.startswith('.'))) terom@51: terom@55: def subnodes (self, skip_dotfiles=True, sort=True) : terom@51: """ terom@55: Yield a series of Nodes contained in this dir. terom@55: terom@55: If skip_dotfiles is given, nodes that begin with a . are omitted. terom@55: terom@55: If `sort` is given, the returned nodes will be in sorted order. terom@51: """ terom@51: terom@55: iter = (Node(self, fsname) for fsname in self.listdir(skip_dotfiles)) terom@55: terom@55: if sort : terom@55: return sorted(iter) terom@55: terom@55: else : terom@55: return iter terom@51: terom@79: __iter__ = subnodes terom@51: terom@51: @property terom@51: def root_path (self) : terom@51: """ terom@51: Build and return a relative path to the root of this dir tree terom@55: terom@55: XXX: move to node terom@51: """ terom@51: terom@51: # build using parent root_path terom@51: return os.path.join('..', self.parent.root_path) terom@51: terom@51: class Root (Directory) : terom@51: """ terom@51: A special Directory that overrides the Node methods to anchor the recursion/etc at some 'real' filesystem path. terom@51: """ terom@51: terom@60: # XXX: config needs a default terom@60: def __init__ (self, fspath, config=None) : terom@51: """ terom@51: Construct the directory tree root at the given 'real' path, which must be a raw str terom@51: """ terom@51: terom@51: # abuse Node's concept of a "name" a bit terom@118: super(Root, self).__init__(None, fspath, config=config) terom@51: terom@60: def nodepath (self) : terom@60: """ terom@60: Just return ourself terom@60: """ terom@60: terom@60: return [self] terom@60: terom@51: @property terom@51: def path (self) : terom@51: """ terom@51: Returns the raw path terom@51: """ terom@51: terom@51: return self.fsname terom@51: terom@51: @property terom@51: def unicodepath (self) : terom@51: """ terom@51: Returns the raw decoded path terom@51: """ terom@51: terom@51: return self.name terom@51: terom@51: @property terom@51: def root_path (self) : terom@51: """ terom@51: Returns an empty string representing this dir terom@51: """ terom@51: terom@51: return '' terom@51: terom@60: def path_segments (self, unicode=True) : terom@60: """ terom@79: No path segments other than our own terom@60: """ terom@79: terom@79: yield self.name if unicode else self.fsname terom@60: terom@51: def __repr__ (self) : terom@51: """ terom@51: Override Node.__repr__ to not use self.parent.path terom@51: """ terom@51: terom@51: return "Root(%r)" % self.fsname terom@51: terom@79: # testing terom@79: if __name__ == '__main__' : terom@79: import doctest terom@79: terom@79: doctest.testmod() terom@79: