diff -r 2a7b87dc6c45 -r 61f6d0ca0432 scripts/pvlbackup-rsync-snapshot --- a/scripts/pvlbackup-rsync-snapshot Tue Feb 14 22:16:43 2012 +0200 +++ b/scripts/pvlbackup-rsync-snapshot Wed Feb 15 13:58:35 2012 +0200 @@ -12,7 +12,7 @@ from pvl.backup import rsync -import optparse +import optparse, ConfigParser import os, os.path, stat import shutil import datetime @@ -23,7 +23,7 @@ # command-line options options = None -def parse_options (argv) : +def parse_options (argv, defaults) : """ Parse command-line arguments. """ @@ -57,16 +57,7 @@ parser.add_option_group(rsync) - # - parser.add_option('-s', '--source', metavar='RSYNC-PATH', - help="Backup source in rsync-syntax") - - parser.add_option('-d', '--destination', metavar='RSYNC-PATH', - help="Backup destination in rsync-syntax") - - parser.add_option('--interval', metavar='NAME', action='append', dest='intervals', - help="Enable given interval") - + # global parser.add_option('--clean-intervals', action='store_true', help="Clean out old interval links") @@ -79,51 +70,32 @@ parser.add_option('-n', '--dry-run', action='store_true', help="Don't actually clean anything") + # + parser.add_option('-c', '--config', metavar='FILE', + help="Load configuration file") + + + # + parser.add_option('-s', '--source', metavar='RSYNC-PATH', + help="Backup source in rsync-syntax") + + parser.add_option('-d', '--destination', metavar='RSYNC-PATH', + help="Backup destination in rsync-syntax") + + parser.add_option('--interval', metavar='NAME', action='append', dest='target_intervals', + help="Enable given interval") + + # defaults parser.set_defaults( loglevel = logging.WARNING, - - snapshot_format = '%Y%m%d-%H%M%S', - - ## XXX: configure somehow - # rsync options, in invoke.optargs format - rsync_options = { - 'archive': True, - 'hard-links': True, - 'one-file-system': True, - 'numeric-ids': True, - 'delete': True, - }, + ) + parser.set_defaults(**defaults) - # datetime formats for intervals - interval_format = { - 'recent': None, # default to snapshot_format - 'day': '%Y-%m-%d', - 'week': '%Y-%W', - 'month': '%Y-%m', - 'year': '%Y', - }, - - # retention for intervals - interval_retention = { - 'recent': 4, - 'day': 7, - 'week': 4, - 'month': 4, - 'year': 1, - }, - - # selected intervals - intervals = [], - ) - + # parse options, args = parser.parse_args(argv[1:]) - # validate - if not options.destination : - parser.error("--destination is required") - # configure logging.basicConfig( format = '%(processName)s: %(name)s: %(levelname)s %(funcName)s : %(message)s', @@ -141,130 +113,78 @@ return options, args -def run_snapshot (options) : - """ - Perform the rsync from source to given path. +def process_config_name (name) : """ - - snapshot_dir = os.path.join(options.destination, 'snapshots') - - if not os.path.exists(snapshot_dir) : - log.warn("Creating snapshots dir: %s", snapshot_dir) - os.mkdir(snapshot_dir) - - # new snapshot - snapshot_name = options.now.strftime(options.snapshot_format) - snapshot_path = os.path.join(snapshot_dir, snapshot_name) - temp_path = os.path.join(snapshot_dir, 'new') - - if os.path.exists(temp_path) : - raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path)) - - log.info("Perform main snapshot: %s", snapshot_path) - - # build rsync options - opts = dict(options.rsync_options) - - if os.path.exists(options.current_path) : - # real path to target - target = os.readlink(options.current_path) - target_path = os.path.join(os.path.dirname(options.current_path), target) - target_abs = os.path.abspath(target_path) - - log.info("Using current -> %s as base", target_path) - - # use as link-dest base; hardlinks unchanged files; target directory must be empty - # rsync links absolute paths.. - opts['link-dest'] = target_abs - - # go - log.debug("rsync %s -> %s", options.source, temp_path) - rsync.rsync(options.source, temp_path, **opts) - - # move in to final name - log.debug("rename %s -> %s", temp_path, snapshot_path) - os.rename(temp_path, snapshot_path) - - return snapshot_name - -def update_interval (options, snapshot_name, interval) : - """ - Update the interval/... links + Process config file name into python version """ - dir_path = os.path.join(options.destination, interval) - - if not os.path.exists(dir_path) : - log.warn("Creating interval dir: %s", dir_path) - os.mkdir(dir_path) - - # format code - name_fmt = options.interval_format[interval] - - if name_fmt is None : - # keep all snapshots - name_fmt = options.snapshot_format + return name.replace('-', '_') - # name - name = options.now.strftime(name_fmt) - - # path - path_name = os.path.join(interval, name) - path = os.path.join(options.destination, path_name) +def parse_config (path, defaults) : + """ + Parse given config file + """ - log.debug("processing %s", path_name) + log.debug("loading config: %s", path) - # already there? - if os.path.exists(path) : - target = os.readlink(path) + config = dict(defaults) + config_file = ConfigParser.RawConfigParser() + config_file.read([path]) - log.info("%s: Found existing: %s -> %s", interval, name, target) + # handle each section + for section in config_file.sections() : + # mangle + section_name = process_config_name(section) + + log.debug("section: %s", section_name) + + # subsections + section_path = section_name.split(':') + + # lookup dict + lookup = config + + for name in section_path : + if name not in lookup : + lookup[name] = {} + + lookup = lookup[name] + + # found dict for this section + config_section = lookup + + # values + for name, value in config_file.items(section) : + # mangle + name = process_config_name(name) + + log.debug("section: %s: %s = %s", '/'.join(section_path), name, value) + + config_section[name] = value + + log.debug("config: %s", config) + + return config + +def config_bool (name, value) : + if value.lower() in ('yes', 'true', '1', 'on') : + return True + + elif value.lower() in ('no', 'false', '0', 'off') : + return False else : - # update - target = os.path.join('..', 'snapshots', snapshot_name) - - log.info("%s: Updating: %s -> %s", interval, name, target) - log.debug("%s -> %s", path, target) - - os.symlink(target, path) - - -def clean_interval (options, interval) : - """ - Clean out old entries from interval dir. - """ - - # path - dir_path = os.path.join(options.destination, interval) - - if not os.path.exists(dir_path) : - log.warn("%s: Skipping, no interval dir: %s", interval, dir_path) - return + raise ConfigError("Unrecognized boolean value: {name} = {value}".format(name=name, value=value)) - # configured - retention = options.interval_retention[interval] - - # clean? - items = os.listdir(dir_path) - items.sort() - - log.info("%s: Have %d / %d items", interval, len(items), retention) - log.debug("%s: items: %s", interval, ' '.join(items)) +def config_int (name, value) : + try : + return int(value) - if len(items) > retention : - # clean out - clean = items[retention:] + except ValueError, e: + raise ConfigError("Invalid integer value: {name} = {value}".format(name=name, value=value)) - log.info("%s: Cleaning out %d items", interval, len(clean)) - log.debug("%s: cleaning out: %s", interval, ' '.join(clean)) - - for item in clean : - path = os.path.join(dir_path, item) - - log.info("%s: Clean: %s", interval, path) - - os.unlink(path) +def config_list (name, value) : + return value.split() def walk_symlinks (tree, ignore=False) : """ @@ -304,132 +224,473 @@ log.debug("%s: skip: %s", tree, name) -def clean_snapshots (options) : +class Interval (object) : """ - Clean out all snapshots not linked to from within dest. - - Fails without doing anything if unable to read the destination dir. + An interval definition. """ - # real path to snapshots - snapshots_path = os.path.realpath(os.path.abspath(os.path.join(options.destination, 'snapshots'))) - log.debug("real snapshots_path: %s", snapshots_path) - - # set of found targets - found = set() + @classmethod + def from_config (cls, options, name, + format, + keep, + ) : + if not format : + # magic to use snapshot name + _format = None + else : + _format = format - # walk all symlinks - for dirpath, name, target in walk_symlinks(options.destination, ignore=set(['snapshots'])) : - # target dir - target_path = os.path.realpath(os.path.join(dirpath, target)) - target_dir = os.path.dirname(target_path) - target_name = os.path.basename(target_path) + return cls(name, + format = _format, + keep = config_int('keep', keep), + ) - if target_dir == snapshots_path : - log.debug("%s: found: %s -> %s", dirpath, name, target_name) - found.add(target_name) + def __init__ (self, name, format, keep) : + self.name = name + self.format = format + self.keep = keep + + def __str__ (self) : + return self.name + +class Target (object) : + """ + A target run, i.e. a rsync-snapshot destination dir + + [target:...] + """ + + @classmethod + def from_config (cls, options, name, + path = False, + source = None, + enable = 'no', + intervals = None, + exclude_from = None, + + # subsections + rsync_options = None, + ) : + if not source : + raise ConfigError("Missing required option: source for [target:{name}]".format(name=name)) + + # global defaults + _rsync_options = dict(options.rsync_options) + + if rsync_options : + # override + _rsync_options.update(rsync_options) + + # lookup intervals + _intervals = [options.intervals[name] for name in config_list('intervals', intervals)] + + return cls(name, + path = path if path else name, + source = source, + enable = config_bool('enable', enable), + intervals = _intervals, + rsync_options = _rsync_options, + exclude_from = exclude_from, + ) + + def __init__ (self, name, + path, + source, + enable = False, + intervals = [], + rsync_options = {}, + exclude_from = None + ) : + self.name = name + + self.path = path + self.source = source + self.enable = enable + + self.intervals = intervals + + self.rsync_options = rsync_options + self.exclude_from = exclude_from + + # this snapshot? + self.snapshots_dir = os.path.join(self.path, 'snapshots') + + # 'current' symlink + self.current_path = os.path.join(self.path, 'current') + + def snapshot (self, options, now) : + """ + Perform the rsync from our source to self.snapshot_dir. + + XXX: allocate snapshot_name here? + """ + + if not os.path.exists(self.snapshots_dir) : + log.warn("Creating snapshots dir: %s", self.snapshots_dir) + os.mkdir(self.snapshots_dir) + + # new snapshot + snapshot_name = now.strftime(options.snapshot_format) + snapshot_path = os.path.join(self.snapshots_dir, snapshot_name) + temp_path = os.path.join(self.snapshots_dir, 'tmp') + + if os.path.exists(temp_path) : + raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path)) + + log.info("Perform main snapshot: %s", snapshot_path) + + # build rsync options + opts = dict(self.rsync_options) + + if os.path.exists(self.current_path) : + # real path to target + target = os.readlink(self.current_path) + target_path = os.path.join(os.path.dirname(self.current_path), target) + target_abs = os.path.abspath(target_path) + + log.info("Using current -> %s as base", target_path) + + # use as link-dest base; hardlinks unchanged files; target directory must be empty + # rsync links absolute paths.. + opts['link-dest'] = target_abs + + # go + log.debug("rsync %s -> %s", self.source, temp_path) + rsync.rsync(self.source, temp_path, **opts) + + # move in to final name + log.debug("rename %s -> %s", temp_path, snapshot_path) + os.rename(temp_path, snapshot_path) + + return snapshot_name + + def update_interval (self, options, interval, now, snapshot_name) : + """ + Update given /... links for this target, using the given new snapshot + """ + + dir_path = os.path.join(self.path, interval.name) + + if not os.path.exists(dir_path) : + log.warn("Creating interval dir: %s", dir_path) + os.mkdir(dir_path) + + + # name + if interval.format is None : + # per-snapshot + name = snapshot_name + + log.debug("%s: using snapshot_name: %s", interval, name) else : - log.debug("%s: ignore: %s -> %s", dirpath, name, target_path) - - # discover all snapshots - snapshots = set(os.listdir(snapshots_path)) - - # clean out special names - snapshots = snapshots - set(['new']) - - ## compare - used = snapshots & found - unused = snapshots - found - broken = found - snapshots - - log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken)) - log.debug("used=%s, unused=%s", used, unused) + # by date + name = now.strftime(interval.format) + + log.debug("%s: using interval.format: %s -> %s", interval, interval.format, name) - if broken : - log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken)) - - if unused : - log.info("Clean out unused snapshots: %s", ' '.join(unused)) + # path + path_name = os.path.join(interval.name, name) + path = os.path.join(self.path, path_name) - for name in unused : - path = os.path.join(snapshots_path, name) - - log.info("Clean: %s", name) + log.debug("%s: processing %s", interval, path_name) - if not options.dry_run : - log.debug("rmtree: %s", path) + # already there? + if os.path.exists(path) : + target = os.readlink(path) - # nuke - shutil.rmtree(path) + log.info("%s: Found existing: %s -> %s", interval, name, target) + + else : + # update + target = os.path.join('..', 'snapshots', snapshot_name) + + log.info("%s: Updating: %s -> %s", interval, name, target) + log.debug("%s -> %s", path, target) + + os.symlink(target, path) + + + def clean_interval (self, options, interval) : + """ + Clean out given /... dir for this target. + """ + + # path + dir_path = os.path.join(self.path, interval.name) + + if not os.path.exists(dir_path) : + log.warn("%s: Skipping, no interval dir: %s", interval, dir_path) + return + + # configured + keep = interval.keep + + # clean? + items = os.listdir(dir_path) + items.sort() + + log.info("%s: Have %d / %d items", interval, len(items), keep) + log.debug("%s: items: %s", interval, ' '.join(items)) + + if len(items) > keep : + # clean out + clean = items[keep:] + + log.info("%s: Cleaning out %d items", interval, len(clean)) + log.debug("%s: cleaning out: %s", interval, ' '.join(clean)) + + for item in clean : + path = os.path.join(dir_path, item) + + log.info("%s: Clean: %s", interval, path) + + os.unlink(path) + + + def clean_snapshots (self, options) : + """ + Clean out all snapshots for this target not linked to from within our root. + + Fails without doing anything if unable to read the destination dir. + """ + + # real path to snapshots + snapshots_path = os.path.realpath(os.path.abspath(self.snapshots_dir)) + log.debug("real snapshots_path: %s", snapshots_path) + + # set of found targets + found = set() + + # walk all symlinks + for dirpath, name, target in walk_symlinks(self.path, ignore=set(['snapshots'])) : + # target dir + target_path = os.path.realpath(os.path.join(dirpath, target)) + target_dir = os.path.dirname(target_path) + target_name = os.path.basename(target_path) + + if target_dir == snapshots_path : + log.debug("%s: found: %s -> %s", dirpath, name, target_name) + found.add(target_name) else : - log.debug("dry-run: %s", path) - -def run (options) : - """ - Perform the current snapshot - """ - - # timestamp for run - options.now = datetime.datetime.now() + log.debug("%s: ignore: %s -> %s", dirpath, name, target_path) - # clean intervals? - if options.clean_intervals: - for interval in options.intervals : - log.info("Cleaning interval: %s...", interval) - - clean_interval(options, interval) + # discover all snapshots + snapshots = set(os.listdir(snapshots_path)) - # clean snapshots? - if options.clean_snapshots : - log.info("Cleaning snapshots...") - - clean_snapshots(options) + # clean out special names + snapshots = snapshots - set(['new']) - # snapshot from source? - if options.source : - # base snapshot (symlink) - options.current_path = os.path.join(options.destination, 'current') + ## compare + used = snapshots & found + unused = snapshots - found + broken = found - snapshots - log.info("Started snapshot run at: %s", options.now) + log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken)) + log.debug("used=%s, unused=%s", used, unused) + + if broken : + log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken)) + + if unused : + log.info("Clean out unused snapshots: %s", ' '.join(unused)) + + for name in unused : + path = os.path.join(snapshots_path, name) + + log.info("Clean: %s", name) + + if not options.dry_run : + log.debug("rmtree: %s", path) + + # nuke + shutil.rmtree(path) + + else : + log.debug("dry-run: %s", path) + + def run_snapshot (self, options, now) : + """ + Run snapshot + update current. + """ # initial rsync - snapshot_name = run_snapshot(options) + snapshot_name = self.snapshot(options, now) # update current log.info("Updating current -> %s", snapshot_name) - if os.path.islink(options.current_path) : + if os.path.islink(self.current_path) : # replace - os.unlink(options.current_path) + os.unlink(self.current_path) - os.symlink(os.path.join('snapshots', snapshot_name), options.current_path) + os.symlink(os.path.join('snapshots', snapshot_name), self.current_path) - # intervals? - if not options.intervals : - log.info("No --intervals given; not running any") + return snapshot_name + + def run_intervals (self, options, now, snapshot_name) : + """ + Run our intervals. + """ + + if not self.intervals : + log.info("No intervals given; not running any") else : # maintain intervals - log.info("Updating %d intervals...", len(options.intervals)) + log.info("Updating %d intervals...", len(self.intervals)) - for interval in options.intervals : + for interval in self.intervals : log.debug("%s", interval) log.info("Updating interval: %s", interval) # update - update_interval(options, snapshot_name, interval) + self.update_interval(options, interval, now, snapshot_name) - # ok - return 1 + def run (self, options) : + """ + Execute + """ + + # clean intervals? + if options.clean_intervals: + for interval in self.intervals : + log.info("Cleaning interval: %s...", interval) + + self.clean_interval(options, interval) + + # clean snapshots? + if options.clean_snapshots : + log.info("Cleaning snapshots...") + + self.clean_snapshots(options) + + # snapshot from source? + if self.source : + # timestamp for run + now = datetime.datetime.now() + + log.info("Started snapshot run at: %s", now) + + # snapshot + current + snapshot_name = self.run_snapshot(options, now) + + # intervals? + self.run_intervals(options, now, snapshot_name) + + # ok + return 1 + + def __str__ (self) : + return self.name + +def run (options) : + # default config + config = dict( + rsync_options = {}, + intervals = {}, + targets = {}, + ) + + if options.config : + # load + config = parse_config(options.config, config) + + # manual? + if options.destination : + config['targets'][''] = dict( + path = options.destination, + source = options.source, + intervals = options.target_intervals, + ) + + # intervals + for name in config['intervals'] : + interval_config = config['intervals'][name] + + # parse + interval = Interval.from_config(options, name, **interval_config) + + log.info("Interval: %s", name) + + # store + options.intervals[name] = interval + + for option in config['rsync_options'] : + value = config['rsync_options'][option] + + # parse + value = config_bool(option, value) + + log.debug("rsync option: %s=%s", option, value) + + # store + options.rsync_options[option] = value + + for name in config['targets'] : + target_config = config['targets'][name] + + # parse + target = Target.from_config(options, name, **target_config) + + log.info("Target: %s", name) + + # run + target.run(options) + +def config_defaults () : + return dict( + # snapshots/ naming + snapshot_format = '%Y%m%d-%H%M%S', + + # rsync options, in invoke.optargs format + rsync_options = { + 'archive': True, + 'hard-links': True, + 'one-file-system': True, + 'numeric-ids': True, + 'delete': True, + }, + + # defined intervals + intervals = dict((i.name, i) for i in [ + Interval('recent', + format = None, + keep = 4, + ), + + Interval('day', + format = '%Y-%m-%d', + keep = 7, + ), + + Interval('week', + format = '%Y-%W', + keep = 4, + ), + + Interval('month', + format = '%Y-%m', + keep = 4, + ), + + Interval('year', + format = '%Y', + keep = 1, + ) + ]), + ) def main (argv) : global options + # option defaults + defaults = config_defaults() + # global options + args - options, args = parse_options(argv) + options, args = parse_options(argv, defaults) # XXX: args? if args :