terom@12: #!/usr/bin/python terom@12: terom@12: """ terom@12: Manage rsync --link-dest based snapshots. terom@12: terom@12: rsync's from to /snapshots/YYYY-MM-DD-HH-MM-SS using --link-dest /current. terom@12: terom@12: Updates symlink /current -> /snapshots/... terom@12: terom@12: Then archives /current to // using --link-dest. terom@12: """ terom@12: terom@12: from pvl.backup import rsync terom@12: terom@15: import optparse, ConfigParser terom@12: import os, os.path, stat terom@12: import shutil terom@12: import datetime terom@12: import logging terom@12: terom@12: log = logging.getLogger() terom@12: terom@12: # command-line options terom@12: options = None terom@12: terom@15: def parse_options (argv, defaults) : terom@12: """ terom@12: Parse command-line arguments. terom@12: """ terom@12: terom@12: parser = optparse.OptionParser( terom@12: prog = argv[0], terom@17: usage = '%prog: [options] [ --config | --target [ --source ] [ --interval ] ]', terom@12: terom@12: # module docstring terom@12: # XXX: breaks multi-line descriptions.. terom@12: description = __doc__, terom@12: ) terom@12: terom@12: # logging terom@12: general = optparse.OptionGroup(parser, "General Options") terom@12: terom@12: general.add_option('-q', '--quiet', dest='loglevel', action='store_const', const=logging.WARNING, help="Less output") terom@12: general.add_option('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO, help="More output") terom@12: general.add_option('-D', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG, help="Even more output") terom@12: terom@12: parser.add_option_group(general) terom@12: terom@14: # rsync terom@14: rsync = optparse.OptionGroup(parser, "rsync Options") terom@14: terom@14: rsync.add_option('--exclude-from', metavar='FILE', terom@14: help="Read exclude rules from given file") terom@14: terom@14: rsync.add_option('--include-from', metavar='FILE', terom@14: help="Read include rules from given file") terom@14: terom@14: parser.add_option_group(rsync) terom@14: terom@15: # global terom@12: parser.add_option('--clean-intervals', action='store_true', terom@12: help="Clean out old interval links") terom@12: terom@12: parser.add_option('--clean-snapshots', action='store_true', terom@12: help="Clean out unused snapshots (those not linked to)") terom@12: terom@12: parser.add_option('--clean', action='store_true', terom@12: help="Clean out both intervals and snapshots") terom@12: terom@12: parser.add_option('-n', '--dry-run', action='store_true', terom@12: help="Don't actually clean anything") terom@12: terom@15: # terom@15: parser.add_option('-c', '--config', metavar='FILE', terom@15: help="Load configuration file") terom@15: terom@15: # terom@17: parser.add_option('-T', '--target', metavar='PATH', terom@17: help="Target path") terom@15: terom@17: parser.add_option('-s', '--source', metavar='RSYNC-PATH', dest='target_source', default=False, terom@17: help="Run target backup from source in rsync-syntax") terom@15: terom@15: parser.add_option('--interval', metavar='NAME', action='append', dest='target_intervals', terom@17: help="Run target with given given interval(s)") terom@15: terom@15: terom@12: # defaults terom@12: parser.set_defaults( terom@21: loglevel = logging.INFO, terom@17: terom@17: target_intervals = [], terom@15: ) terom@15: parser.set_defaults(**defaults) terom@12: terom@15: terom@12: # parse terom@12: options, args = parser.parse_args(argv[1:]) terom@12: terom@12: # configure terom@12: logging.basicConfig( terom@12: format = '%(processName)s: %(name)s: %(levelname)s %(funcName)s : %(message)s', terom@12: level = options.loglevel, terom@12: ) terom@12: terom@12: if options.clean : terom@12: options.clean_intervals = options.clean_snapshots = options.clean terom@12: terom@14: if options.include_from : terom@14: options.rsync_options['include-from'] = options.include_from terom@14: terom@14: if options.exclude_from : terom@14: options.rsync_options['exclude-from'] = options.exclude_from terom@14: terom@12: return options, args terom@12: terom@21: ## Configuration terom@21: class ConfigError (Exception) : terom@21: pass terom@21: terom@15: def process_config_name (name) : terom@12: """ terom@15: Process config file name into python version terom@12: """ terom@12: terom@15: return name.replace('-', '_') terom@12: terom@15: def parse_config (path, defaults) : terom@15: """ terom@15: Parse given config file terom@15: """ terom@12: terom@15: log.debug("loading config: %s", path) terom@12: terom@15: config = dict(defaults) terom@15: config_file = ConfigParser.RawConfigParser() terom@15: config_file.read([path]) terom@12: terom@15: # handle each section terom@15: for section in config_file.sections() : terom@15: # mangle terom@15: section_name = process_config_name(section) terom@15: terom@15: log.debug("section: %s", section_name) terom@15: terom@15: # subsections terom@15: section_path = section_name.split(':') terom@15: terom@22: # lookup section dict from config terom@15: lookup = config terom@15: terom@22: # XXX: sections are not in order, so we can't rely on the parent section being created before we handle the sub-section terom@15: for name in section_path : terom@22: # possibly create terom@15: if name not in lookup : terom@15: lookup[name] = {} terom@22: terom@15: lookup = lookup[name] terom@22: terom@15: # found dict for this section terom@15: config_section = lookup terom@15: terom@15: # values terom@15: for name, value in config_file.items(section) : terom@15: # mangle terom@15: name = process_config_name(name) terom@15: terom@15: log.debug("section: %s: %s = %s", '/'.join(section_path), name, value) terom@15: terom@15: config_section[name] = value terom@15: terom@15: log.debug("config: %s", config) terom@15: terom@15: return config terom@15: terom@21: def config_bool (name, value, strict=True) : terom@15: if value.lower() in ('yes', 'true', '1', 'on') : terom@15: return True terom@15: terom@15: elif value.lower() in ('no', 'false', '0', 'off') : terom@15: return False terom@12: terom@21: elif strict : terom@21: raise ConfigError("Unrecognized boolean value: {name} = {value}".format(name=name, value=value)) terom@21: terom@12: else : terom@21: # allow non-boolean values terom@21: return value terom@12: terom@15: def config_int (name, value) : terom@15: try : terom@15: return int(value) terom@12: terom@15: except ValueError, e: terom@15: raise ConfigError("Invalid integer value: {name} = {value}".format(name=name, value=value)) terom@12: terom@15: def config_list (name, value) : terom@15: return value.split() terom@12: terom@12: def walk_symlinks (tree, ignore=False) : terom@12: """ terom@12: Walk through all symlinks in given dir, yielding: terom@12: terom@12: (dirpath, name, target) terom@12: terom@12: Passes through errors from os.listdir/os.lstat. terom@12: """ terom@12: terom@12: for name in os.listdir(tree) : terom@12: if ignore and name in ignore : terom@12: log.debug("%s: ignore: %s", tree, name) terom@12: continue terom@12: terom@12: path = os.path.join(tree, name) terom@12: terom@12: # stat symlink itself terom@12: st = os.lstat(path) terom@12: terom@12: if stat.S_ISDIR(st.st_mode) : terom@12: # recurse terom@12: log.debug("%s: tree: %s", tree, name) terom@12: terom@12: for item in walk_symlinks(path) : terom@12: yield item terom@12: terom@12: elif stat.S_ISLNK(st.st_mode) : terom@12: # found terom@12: target = os.readlink(path) terom@12: terom@12: log.debug("%s: link: %s -> %s", tree, name, target) terom@12: terom@12: yield tree, name, target terom@12: terom@12: else : terom@12: log.debug("%s: skip: %s", tree, name) terom@12: terom@12: terom@15: class Interval (object) : terom@12: """ terom@15: An interval definition. terom@12: """ terom@12: terom@15: @classmethod terom@15: def from_config (cls, options, name, terom@15: format, terom@15: keep, terom@15: ) : terom@15: if not format : terom@15: # magic to use snapshot name terom@15: _format = None terom@15: else : terom@15: _format = format terom@12: terom@15: return cls(name, terom@15: format = _format, terom@15: keep = config_int('keep', keep), terom@15: ) terom@12: terom@16: @classmethod terom@16: def from_target_config (cls, name, base, arg) : terom@16: if isinstance(arg, dict) : terom@16: # full instance terom@16: return cls(name, terom@16: format = arg.get('format', base.format if base else None), terom@16: keep = arg.get('keep', base.keep if base else None), terom@16: ) terom@16: else : terom@16: # partial instance with keep terom@16: return cls(name, terom@16: format = base.format, terom@17: keep = config_int('keep', arg) if arg else base.keep, terom@16: ) terom@16: terom@15: def __init__ (self, name, format, keep) : terom@15: self.name = name terom@15: self.format = format terom@15: self.keep = keep terom@15: terom@15: def __str__ (self) : terom@15: return self.name terom@15: terom@15: class Target (object) : terom@15: """ terom@15: A target run, i.e. a rsync-snapshot destination dir terom@15: terom@15: [target:...] terom@15: """ terom@15: terom@15: @classmethod terom@15: def from_config (cls, options, name, terom@15: path = False, terom@15: source = None, terom@15: enable = 'no', terom@15: exclude_from = None, terom@15: terom@15: # subsections terom@16: intervals = None, terom@15: rsync_options = None, terom@15: ) : terom@17: if not source and source is not False : terom@15: raise ConfigError("Missing required option: source for [target:{name}]".format(name=name)) terom@15: terom@15: # global defaults terom@15: _rsync_options = dict(options.rsync_options) terom@15: terom@15: if rsync_options : terom@15: # override terom@21: _rsync_options.update([ terom@21: # parse terom@21: (option, config_bool(option, value, strict=False)) for option, value in rsync_options.iteritems() terom@21: ]) terom@15: terom@15: # lookup intervals terom@16: _intervals = [ terom@16: ( terom@16: # lookup base from options.intervals terom@16: Interval.from_target_config(name, options.intervals.get(name), arg) terom@16: ) for name, arg in intervals.iteritems() terom@16: ] terom@15: terom@15: return cls(name, terom@15: path = path if path else name, terom@15: source = source, terom@15: enable = config_bool('enable', enable), terom@15: intervals = _intervals, terom@15: rsync_options = _rsync_options, terom@15: exclude_from = exclude_from, terom@15: ) terom@15: terom@15: def __init__ (self, name, terom@15: path, terom@15: source, terom@15: enable = False, terom@15: intervals = [], terom@15: rsync_options = {}, terom@15: exclude_from = None terom@15: ) : terom@15: self.name = name terom@15: terom@15: self.path = path terom@15: self.source = source terom@15: self.enable = enable terom@15: terom@15: self.intervals = intervals terom@15: terom@15: self.rsync_options = rsync_options terom@15: self.exclude_from = exclude_from terom@15: terom@15: # this snapshot? terom@15: self.snapshots_dir = os.path.join(self.path, 'snapshots') terom@15: terom@15: # 'current' symlink terom@15: self.current_path = os.path.join(self.path, 'current') terom@15: terom@17: def prepare (self, options) : terom@17: """ terom@17: Prepare dir for usage terom@17: """ terom@17: terom@17: if not os.path.exists(self.path) : terom@17: raise Exception("Missing target dir: {path}".format(path=self.path)) terom@17: terom@17: if not os.path.exists(self.snapshots_dir) : terom@17: log.warn("Creating snapshots dir: %s", self.snapshots_dir) terom@17: os.mkdir(self.snapshots_dir) terom@17: terom@15: def snapshot (self, options, now) : terom@15: """ terom@15: Perform the rsync from our source to self.snapshot_dir. terom@15: terom@15: XXX: allocate snapshot_name here? terom@15: """ terom@17: terom@15: # new snapshot terom@15: snapshot_name = now.strftime(options.snapshot_format) terom@15: snapshot_path = os.path.join(self.snapshots_dir, snapshot_name) terom@15: temp_path = os.path.join(self.snapshots_dir, 'tmp') terom@15: terom@15: if os.path.exists(temp_path) : terom@15: raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path)) terom@15: terom@22: log.info("Perform main snapshot: %s -> %s", self.source, snapshot_path) terom@15: terom@15: # build rsync options terom@15: opts = dict(self.rsync_options) terom@15: terom@15: if os.path.exists(self.current_path) : terom@15: # real path to target terom@15: target = os.readlink(self.current_path) terom@15: target_path = os.path.join(os.path.dirname(self.current_path), target) terom@15: target_abs = os.path.abspath(target_path) terom@15: terom@15: log.info("Using current -> %s as base", target_path) terom@15: terom@15: # use as link-dest base; hardlinks unchanged files; target directory must be empty terom@15: # rsync links absolute paths.. terom@15: opts['link-dest'] = target_abs terom@15: terom@15: # go terom@15: log.debug("rsync %s -> %s", self.source, temp_path) terom@15: rsync.rsync(self.source, temp_path, **opts) terom@15: terom@15: # move in to final name terom@15: log.debug("rename %s -> %s", temp_path, snapshot_path) terom@15: os.rename(temp_path, snapshot_path) terom@15: terom@15: return snapshot_name terom@15: terom@15: def update_interval (self, options, interval, now, snapshot_name) : terom@15: """ terom@15: Update given /... links for this target, using the given new snapshot terom@15: """ terom@15: terom@15: dir_path = os.path.join(self.path, interval.name) terom@15: terom@15: if not os.path.exists(dir_path) : terom@15: log.warn("Creating interval dir: %s", dir_path) terom@15: os.mkdir(dir_path) terom@15: terom@15: terom@15: # name terom@15: if interval.format is None : terom@15: # per-snapshot terom@15: name = snapshot_name terom@15: terom@15: log.debug("%s: using snapshot_name: %s", interval, name) terom@12: terom@12: else : terom@15: # by date terom@15: name = now.strftime(interval.format) terom@15: terom@15: log.debug("%s: using interval.format: %s -> %s", interval, interval.format, name) terom@12: terom@15: # path terom@15: path_name = os.path.join(interval.name, name) terom@15: path = os.path.join(self.path, path_name) terom@12: terom@15: log.debug("%s: processing %s", interval, path_name) terom@12: terom@15: # already there? terom@15: if os.path.exists(path) : terom@15: target = os.readlink(path) terom@12: terom@15: log.info("%s: Found existing: %s -> %s", interval, name, target) terom@15: terom@15: else : terom@15: # update terom@15: target = os.path.join('..', 'snapshots', snapshot_name) terom@15: terom@15: log.info("%s: Updating: %s -> %s", interval, name, target) terom@15: log.debug("%s -> %s", path, target) terom@15: terom@15: os.symlink(target, path) terom@15: terom@15: terom@15: def clean_interval (self, options, interval) : terom@15: """ terom@15: Clean out given /... dir for this target. terom@15: """ terom@15: terom@15: # path terom@15: dir_path = os.path.join(self.path, interval.name) terom@15: terom@15: if not os.path.exists(dir_path) : terom@15: log.warn("%s: Skipping, no interval dir: %s", interval, dir_path) terom@15: return terom@15: terom@15: # configured terom@15: keep = interval.keep terom@15: terom@15: # clean? terom@15: items = os.listdir(dir_path) terom@15: items.sort() terom@15: terom@15: log.info("%s: Have %d / %d items", interval, len(items), keep) terom@15: log.debug("%s: items: %s", interval, ' '.join(items)) terom@15: terom@15: if len(items) > keep : terom@15: # clean out terom@15: clean = items[keep:] terom@15: terom@15: log.info("%s: Cleaning out %d items", interval, len(clean)) terom@15: log.debug("%s: cleaning out: %s", interval, ' '.join(clean)) terom@15: terom@15: for item in clean : terom@15: path = os.path.join(dir_path, item) terom@15: terom@15: log.info("%s: Clean: %s", interval, path) terom@15: terom@15: os.unlink(path) terom@15: terom@15: terom@15: def clean_snapshots (self, options) : terom@15: """ terom@15: Clean out all snapshots for this target not linked to from within our root. terom@15: terom@15: Fails without doing anything if unable to read the destination dir. terom@15: """ terom@15: terom@15: # real path to snapshots terom@15: snapshots_path = os.path.realpath(os.path.abspath(self.snapshots_dir)) terom@15: log.debug("real snapshots_path: %s", snapshots_path) terom@15: terom@15: # set of found targets terom@15: found = set() terom@15: terom@15: # walk all symlinks terom@15: for dirpath, name, target in walk_symlinks(self.path, ignore=set(['snapshots'])) : terom@15: # target dir terom@15: target_path = os.path.realpath(os.path.join(dirpath, target)) terom@15: target_dir = os.path.dirname(target_path) terom@15: target_name = os.path.basename(target_path) terom@15: terom@15: if target_dir == snapshots_path : terom@15: log.debug("%s: found: %s -> %s", dirpath, name, target_name) terom@15: found.add(target_name) terom@12: terom@12: else : terom@15: log.debug("%s: ignore: %s -> %s", dirpath, name, target_path) terom@12: terom@15: # discover all snapshots terom@15: snapshots = set(os.listdir(snapshots_path)) terom@14: terom@15: # clean out special names terom@15: snapshots = snapshots - set(['new']) terom@14: terom@15: ## compare terom@15: used = snapshots & found terom@15: unused = snapshots - found terom@15: broken = found - snapshots terom@12: terom@15: log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken)) terom@15: log.debug("used=%s, unused=%s", used, unused) terom@15: terom@15: if broken : terom@15: log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken)) terom@15: terom@15: if unused : terom@15: log.info("Clean out unused snapshots: %s", ' '.join(unused)) terom@15: terom@15: for name in unused : terom@15: path = os.path.join(snapshots_path, name) terom@15: terom@15: log.info("Clean: %s", name) terom@15: terom@15: if not options.dry_run : terom@15: log.debug("rmtree: %s", path) terom@15: terom@15: # nuke terom@15: shutil.rmtree(path) terom@15: terom@15: else : terom@15: log.debug("dry-run: %s", path) terom@15: terom@15: def run_snapshot (self, options, now) : terom@15: """ terom@15: Run snapshot + update current. terom@15: """ terom@12: terom@12: # initial rsync terom@15: snapshot_name = self.snapshot(options, now) terom@12: terom@12: # update current terom@12: log.info("Updating current -> %s", snapshot_name) terom@12: terom@15: if os.path.islink(self.current_path) : terom@12: # replace terom@15: os.unlink(self.current_path) terom@12: terom@15: os.symlink(os.path.join('snapshots', snapshot_name), self.current_path) terom@12: terom@15: return snapshot_name terom@15: terom@15: def run_intervals (self, options, now, snapshot_name) : terom@15: """ terom@15: Run our intervals. terom@15: """ terom@15: terom@15: if not self.intervals : terom@15: log.info("No intervals given; not running any") terom@12: terom@12: else : terom@12: # maintain intervals terom@15: log.info("Updating %d intervals...", len(self.intervals)) terom@12: terom@15: for interval in self.intervals : terom@12: log.debug("%s", interval) terom@12: terom@14: log.info("Updating interval: %s", interval) terom@14: terom@12: # update terom@15: self.update_interval(options, interval, now, snapshot_name) terom@12: terom@15: def run (self, options) : terom@15: """ terom@15: Execute terom@15: """ terom@15: terom@17: # prep terom@17: self.prepare(options) terom@17: terom@15: # clean intervals? terom@15: if options.clean_intervals: terom@15: for interval in self.intervals : terom@15: log.info("Cleaning interval: %s...", interval) terom@15: terom@15: self.clean_interval(options, interval) terom@15: terom@15: # clean snapshots? terom@15: if options.clean_snapshots : terom@15: log.info("Cleaning snapshots...") terom@15: terom@15: self.clean_snapshots(options) terom@15: terom@15: # snapshot from source? terom@15: if self.source : terom@15: # timestamp for run terom@15: now = datetime.datetime.now() terom@15: terom@15: log.info("Started snapshot run at: %s", now) terom@15: terom@15: # snapshot + current terom@15: snapshot_name = self.run_snapshot(options, now) terom@15: terom@15: # intervals? terom@15: self.run_intervals(options, now, snapshot_name) terom@15: terom@15: # ok terom@15: return 1 terom@15: terom@15: def __str__ (self) : terom@15: return self.name terom@15: terom@17: def run (options, targets) : terom@15: # default config terom@15: config = dict( terom@15: rsync_options = {}, terom@15: intervals = {}, terom@15: targets = {}, terom@15: ) terom@15: terom@15: if options.config : terom@15: # load terom@22: try : terom@22: config = parse_config(options.config, config) terom@22: except ConfigError as e: terom@22: log.error("Configuration error: %s: %s", options.config, e) terom@22: return 2 terom@15: terom@15: # manual? terom@17: if options.target : terom@17: config['targets'][options.target] = dict( terom@17: path = options.target, terom@17: source = options.target_source, terom@17: intervals = dict((name, None) for name in options.target_intervals), terom@15: ) terom@15: terom@15: # intervals terom@15: for name in config['intervals'] : terom@15: interval_config = config['intervals'][name] terom@15: terom@15: # parse terom@15: interval = Interval.from_config(options, name, **interval_config) terom@15: terom@21: log.debug("config interval: %s", name) terom@15: terom@15: # store terom@15: options.intervals[name] = interval terom@15: terom@15: for option in config['rsync_options'] : terom@15: value = config['rsync_options'][option] terom@15: terom@21: # parse, allowing non-boolean values as well... terom@21: value = config_bool(option, value, strict=False) terom@15: terom@15: log.debug("rsync option: %s=%s", option, value) terom@15: terom@15: # store terom@15: options.rsync_options[option] = value terom@15: terom@22: # what targets? terom@17: if not targets : terom@22: # default to all defined targets terom@17: targets = list(config['targets']) terom@22: terom@22: else : terom@22: # given ones, but verify they exist terom@22: for target in targets : terom@22: if target not in config['targets'] : terom@22: log.error("Unknown target given: %s", target) terom@22: log.info("Defined targets: %s", ' '.join(config['targets'])) terom@22: terom@22: return 2 terom@17: terom@17: # targets terom@17: for name in targets : terom@15: target_config = config['targets'][name] terom@15: terom@15: # parse terom@15: target = Target.from_config(options, name, **target_config) terom@15: terom@21: log.info("Config target: %s", name) terom@15: terom@15: # run terom@15: target.run(options) terom@15: terom@22: # ok terom@22: return 0 terom@22: terom@15: def config_defaults () : terom@15: return dict( terom@15: # snapshots/ naming terom@15: snapshot_format = '%Y%m%d-%H%M%S', terom@15: terom@15: # rsync options, in invoke.optargs format terom@15: rsync_options = { terom@15: 'archive': True, terom@15: 'hard-links': True, terom@15: 'one-file-system': True, terom@15: 'numeric-ids': True, terom@15: 'delete': True, terom@15: }, terom@15: terom@15: # defined intervals terom@15: intervals = dict((i.name, i) for i in [ terom@15: Interval('recent', terom@15: format = None, terom@15: keep = 4, terom@15: ), terom@15: terom@15: Interval('day', terom@15: format = '%Y-%m-%d', terom@15: keep = 7, terom@15: ), terom@15: terom@15: Interval('week', terom@15: format = '%Y-%W', terom@15: keep = 4, terom@15: ), terom@15: terom@15: Interval('month', terom@15: format = '%Y-%m', terom@15: keep = 4, terom@15: ), terom@15: terom@15: Interval('year', terom@15: format = '%Y', terom@15: keep = 1, terom@15: ) terom@15: ]), terom@15: ) terom@12: terom@12: def main (argv) : terom@12: global options terom@12: terom@15: # option defaults terom@15: defaults = config_defaults() terom@15: terom@12: # global options + args terom@15: options, args = parse_options(argv, defaults) terom@12: terom@17: # args: filter targets terom@17: # XXX: fix name mangling terom@17: targets = [target.replace('-', '_') for target in args] terom@12: terom@12: try : terom@12: # handle it terom@17: return run(options, targets) terom@12: terom@12: except Exception, e: terom@12: log.error("Internal error:", exc_info=e) terom@12: return 3 terom@12: terom@12: # ok terom@12: return 0 terom@12: terom@12: terom@12: terom@12: if __name__ == '__main__' : terom@12: import sys terom@12: terom@12: sys.exit(main(sys.argv)) terom@12: