scripts/pvlbackup-rsync-snapshot
changeset 12 fbfdde7326f4
child 14 2a7b87dc6c45
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/pvlbackup-rsync-snapshot	Tue Feb 14 21:51:30 2012 +0200
@@ -0,0 +1,429 @@
+#!/usr/bin/python
+
+"""
+    Manage rsync --link-dest based snapshots.
+
+    rsync's from <src> to <dst>/snapshots/YYYY-MM-DD-HH-MM-SS using --link-dest <dst>/current.
+
+    Updates symlink <dst>/current -> <dst>/snapshots/...
+
+    Then archives <dst>/current to <dst>/<period>/<date> using --link-dest.
+"""
+
+from pvl.backup import rsync
+
+import optparse
+import os, os.path, stat
+import shutil
+import datetime
+import logging
+
+log = logging.getLogger()
+
+# command-line options
+options = None
+
+def parse_options (argv) :
+    """
+        Parse command-line arguments.
+    """
+
+    parser = optparse.OptionParser(
+            prog        = argv[0],
+            usage       = '%prog: [options] --source <src> --destination <dst>',
+
+            # module docstring
+            # XXX: breaks multi-line descriptions..
+            description = __doc__,
+    )
+
+    # logging
+    general = optparse.OptionGroup(parser, "General Options")
+
+    general.add_option('-q', '--quiet',      dest='loglevel', action='store_const', const=logging.WARNING, help="Less output")
+    general.add_option('-v', '--verbose',    dest='loglevel', action='store_const', const=logging.INFO,  help="More output")
+    general.add_option('-D', '--debug',      dest='loglevel', action='store_const', const=logging.DEBUG, help="Even more output")
+
+    parser.add_option_group(general)
+
+    #
+    parser.add_option('-s', '--source',     metavar='RSYNC-PATH',
+        help="Backup source in rsync-syntax")
+
+    parser.add_option('-d', '--destination',    metavar='RSYNC-PATH',
+        help="Backup destination in rsync-syntax")
+
+    parser.add_option('--interval',         metavar='NAME', action='append', dest='intervals',
+        help="Enable given interval")
+
+    parser.add_option('--clean-intervals',  action='store_true',
+        help="Clean out old interval links")
+
+    parser.add_option('--clean-snapshots',  action='store_true',
+        help="Clean out unused snapshots (those not linked to)")
+
+    parser.add_option('--clean',             action='store_true',
+        help="Clean out both intervals and snapshots")
+
+    parser.add_option('-n', '--dry-run',    action='store_true',
+        help="Don't actually clean anything")
+
+    # defaults
+    parser.set_defaults(
+        loglevel    = logging.WARNING,
+        
+        snapshot_format = '%Y%m%d-%H%M%S',
+
+        ## XXX: configure somehow
+        # rsync options, in invoke.optargs format
+        rsync_options = {
+            'archive':          True,
+            'hard-links':       True,
+            'one-file-system':  True,
+            'numeric-ids':      True,
+            'delete':           True,
+        },
+
+        # datetime formats for intervals
+        interval_format = {
+            'all':      None,       # default to snapshot_format
+            'day':      '%Y-%m-%d',
+            'week':     '%Y-%W',
+            'month':    '%Y-%m',
+            'year':     '%Y',
+        },
+
+        # retention for intervals
+        interval_retention = {
+            'all':      4,
+            'day':      7,
+            'week':     4,
+            'month':    4,
+            'year':     1,
+        },
+
+        # selected intervals
+        intervals       = [],
+    )
+
+    # parse
+    options, args = parser.parse_args(argv[1:])
+
+    # validate
+    if not options.destination :
+        parser.error("--destination is required")
+
+    # configure
+    logging.basicConfig(
+        format  = '%(processName)s: %(name)s: %(levelname)s %(funcName)s : %(message)s',
+        level   = options.loglevel,
+    )
+
+    if options.clean :
+        options.clean_intervals = options.clean_snapshots = options.clean
+
+    return options, args
+
+def run_snapshot (options) :
+    """
+        Perform the rsync from source to given path.
+    """
+
+    snapshot_dir = os.path.join(options.destination, 'snapshots')
+
+    if not os.path.exists(snapshot_dir) :
+        log.warn("Creating snapshots dir: %s", snapshot_dir)
+        os.mkdir(snapshot_dir)
+    
+    # new snapshot
+    snapshot_name = options.now.strftime(options.snapshot_format)
+    snapshot_path = os.path.join(snapshot_dir, snapshot_name)
+    temp_path = os.path.join(snapshot_dir, 'new')
+
+    if os.path.exists(temp_path) :
+        raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path))
+
+    log.info("Perform main snapshot: %s", snapshot_path)
+
+    # build rsync options
+    opts = dict(options.rsync_options)
+
+    if os.path.exists(options.current_path) :
+        # use as link-dest base; hardlinks unchanged files
+        opts['link-dest'] = options.current_path
+
+    # go
+    log.debug("rsync %s -> %s", options.source, temp_path)
+    rsync.rsync(options.source, temp_path, **opts)
+
+    # move in to final name
+    log.debug("rename %s -> %s", temp_path, snapshot_path)
+    os.rename(temp_path, snapshot_path)
+
+    return snapshot_name
+
+def update_interval (options, snapshot_name, interval) :
+    """
+        Update the interval/... links
+    """
+
+    dir_path = os.path.join(options.destination, interval)
+
+    if not os.path.exists(dir_path) :
+        log.warn("Creating interval dir: %s", dir_path)
+        os.mkdir(dir_path)
+    
+    # format code
+    name_fmt = options.interval_format[interval]
+
+    if name_fmt is None :
+        # keep all snapshots
+        name_fmt = options.snapshot_format
+
+    # name
+    name = options.now.strftime(name_fmt)
+
+    # path
+    path_name = os.path.join(interval, name)
+    path = os.path.join(options.destination, path_name)
+
+    log.debug("processing %s", path_name)
+
+    # already there?
+    if os.path.exists(path) :
+        target = os.readlink(path)
+
+        log.info("Found existing %s: %s -> %s", interval, name, target)
+
+    else :
+        # update
+        target = os.path.join('..', 'snapshots', snapshot_name)
+
+        log.info("Updating %s: %s -> %s", interval, name, target)
+        log.debug("%s -> %s", path, target)
+
+        os.symlink(target, path)
+
+
+def clean_interval (options, interval) :
+    """
+        Clean out old entries from interval dir.
+    """
+
+    # path
+    dir_path = os.path.join(options.destination, interval)
+
+    if not os.path.exists(dir_path) :
+        log.warn("%s: Skipping, no interval dir: %s", interval, dir_path)
+        return
+
+    # configured
+    retention = options.interval_retention[interval]
+
+    # clean?
+    items = os.listdir(dir_path)
+    items.sort()
+
+    log.info("%s: Have %d / %d items", interval, len(items), retention)
+    log.debug("%s: items: %s", interval, ' '.join(items))
+
+    if len(items) > retention :
+        # clean out
+        clean = items[retention:]
+
+        log.info("%s: Cleaning out %d items", interval, len(clean))
+        log.debug("%s: cleaning out: %s", interval, ' '.join(clean))
+
+        for item in clean :
+            path = os.path.join(dir_path, item)
+
+            log.info("%s: Clean: %s", interval, path)
+
+            os.unlink(path)
+
+def walk_symlinks (tree, ignore=False) :
+    """
+        Walk through all symlinks in given dir, yielding:
+
+            (dirpath, name, target)
+
+        Passes through errors from os.listdir/os.lstat.
+    """
+
+    for name in os.listdir(tree) :
+        if ignore and name in ignore :
+            log.debug("%s: ignore: %s", tree, name)
+            continue
+
+        path = os.path.join(tree, name)
+        
+        # stat symlink itself
+        st = os.lstat(path)
+
+        if stat.S_ISDIR(st.st_mode) :
+            # recurse
+            log.debug("%s: tree: %s", tree, name)
+
+            for item in walk_symlinks(path) :
+                yield item
+
+        elif stat.S_ISLNK(st.st_mode) :
+            # found
+            target = os.readlink(path)
+
+            log.debug("%s: link: %s -> %s", tree, name, target)
+
+            yield tree, name, target
+
+        else :
+            log.debug("%s: skip: %s", tree, name)
+
+
+def clean_snapshots (options) :
+    """
+        Clean out all snapshots not linked to from within dest.
+
+        Fails without doing anything if unable to read the destination dir.
+    """
+
+    # real path to snapshots
+    snapshots_path = os.path.realpath(os.path.abspath(os.path.join(options.destination, 'snapshots')))
+    log.debug("real snapshots_path: %s", snapshots_path)
+
+    # set of found targets
+    found = set()
+
+    # walk all symlinks
+    for dirpath, name, target in walk_symlinks(options.destination, ignore=set(['snapshots'])) :
+        # target dir
+        target_path = os.path.realpath(os.path.join(dirpath, target))
+        target_dir = os.path.dirname(target_path)
+        target_name = os.path.basename(target_path)
+
+        if target_dir == snapshots_path :
+            log.debug("%s: found: %s -> %s", dirpath, name, target_name)
+            found.add(target_name)
+
+        else :
+            log.debug("%s: ignore: %s -> %s", dirpath, name, target_path)
+
+    # discover all snapshots
+    snapshots = set(os.listdir(snapshots_path))
+
+    # clean out special names
+    snapshots = snapshots - set(['new'])
+
+    ## compare
+    used = snapshots & found
+    unused = snapshots - found
+    broken = found - snapshots
+
+    log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken))
+    log.debug("used=%s, unused=%s", used, unused)
+
+    if broken :
+        log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken))
+    
+    if unused :
+        log.info("Clean out unused snapshots: %s", ' '.join(unused))
+
+        for name in unused :
+            path = os.path.join(snapshots_path, name)
+
+            log.info("Clean: %s", name)
+
+            if not options.dry_run :
+                log.debug("rmtree: %s", path)
+
+                # nuke
+                shutil.rmtree(path)
+
+            else :
+                log.debug("dry-run: %s", path)
+
+def run (options) :
+    """
+        Perform the current snapshot
+    """
+
+    # timestamp for run
+    options.now = datetime.datetime.now()
+
+    # snapshot from source?
+    if options.source :
+        # base snapshot (symlink)
+        options.current_path = os.path.join(options.destination, 'current')
+
+        log.info("Started snapshot run at: %s", options.now)
+
+        # initial rsync
+        snapshot_name = run_snapshot(options)
+
+        # update current
+        log.info("Updating current -> %s", snapshot_name)
+
+        if os.path.islink(options.current_path) :
+            # replace
+            os.unlink(options.current_path)
+
+        os.symlink(os.path.join('snapshots', snapshot_name), options.current_path)
+
+        # intervals?
+        if not options.intervals :
+            log.info("No --intervals given; not running any")
+
+        else :
+            # maintain intervals
+            log.info("Running intervals: %s", options.intervals)
+
+            for interval in options.intervals :
+                log.debug("%s", interval)
+
+                # update
+                update_interval(options, snapshot_name, interval)
+
+    # clean intervals?
+    if options.clean_intervals:
+        for interval in options.intervals :
+            log.info("Cleaning interval: %s...", interval)
+
+            clean_interval(options, interval)
+
+    # clean snapshots?
+    if options.clean_snapshots :
+        log.info("Cleaning snapshots...")
+
+        clean_snapshots(options)
+
+    # ok
+    return 1
+
+def main (argv) :
+    global options
+
+    # global options + args
+    options, args = parse_options(argv)
+
+    # XXX: args?
+    if args :
+        log.error("No arguments are handled")
+        return 2
+
+    try :
+        # handle it
+        return run(options)
+
+    except Exception, e:
+        log.error("Internal error:", exc_info=e)
+        return 3
+
+    # ok
+    return 0
+
+
+
+if __name__ == '__main__' :
+    import sys
+
+    sys.exit(main(sys.argv))
+