scripts/pvlbackup-rsync-snapshot
changeset 12 fbfdde7326f4
child 14 2a7b87dc6c45
equal deleted inserted replaced
11:f8dd32bf0e89 12:fbfdde7326f4
       
     1 #!/usr/bin/python
       
     2 
       
     3 """
       
     4     Manage rsync --link-dest based snapshots.
       
     5 
       
     6     rsync's from <src> to <dst>/snapshots/YYYY-MM-DD-HH-MM-SS using --link-dest <dst>/current.
       
     7 
       
     8     Updates symlink <dst>/current -> <dst>/snapshots/...
       
     9 
       
    10     Then archives <dst>/current to <dst>/<period>/<date> using --link-dest.
       
    11 """
       
    12 
       
    13 from pvl.backup import rsync
       
    14 
       
    15 import optparse
       
    16 import os, os.path, stat
       
    17 import shutil
       
    18 import datetime
       
    19 import logging
       
    20 
       
    21 log = logging.getLogger()
       
    22 
       
    23 # command-line options
       
    24 options = None
       
    25 
       
    26 def parse_options (argv) :
       
    27     """
       
    28         Parse command-line arguments.
       
    29     """
       
    30 
       
    31     parser = optparse.OptionParser(
       
    32             prog        = argv[0],
       
    33             usage       = '%prog: [options] --source <src> --destination <dst>',
       
    34 
       
    35             # module docstring
       
    36             # XXX: breaks multi-line descriptions..
       
    37             description = __doc__,
       
    38     )
       
    39 
       
    40     # logging
       
    41     general = optparse.OptionGroup(parser, "General Options")
       
    42 
       
    43     general.add_option('-q', '--quiet',      dest='loglevel', action='store_const', const=logging.WARNING, help="Less output")
       
    44     general.add_option('-v', '--verbose',    dest='loglevel', action='store_const', const=logging.INFO,  help="More output")
       
    45     general.add_option('-D', '--debug',      dest='loglevel', action='store_const', const=logging.DEBUG, help="Even more output")
       
    46 
       
    47     parser.add_option_group(general)
       
    48 
       
    49     #
       
    50     parser.add_option('-s', '--source',     metavar='RSYNC-PATH',
       
    51         help="Backup source in rsync-syntax")
       
    52 
       
    53     parser.add_option('-d', '--destination',    metavar='RSYNC-PATH',
       
    54         help="Backup destination in rsync-syntax")
       
    55 
       
    56     parser.add_option('--interval',         metavar='NAME', action='append', dest='intervals',
       
    57         help="Enable given interval")
       
    58 
       
    59     parser.add_option('--clean-intervals',  action='store_true',
       
    60         help="Clean out old interval links")
       
    61 
       
    62     parser.add_option('--clean-snapshots',  action='store_true',
       
    63         help="Clean out unused snapshots (those not linked to)")
       
    64 
       
    65     parser.add_option('--clean',             action='store_true',
       
    66         help="Clean out both intervals and snapshots")
       
    67 
       
    68     parser.add_option('-n', '--dry-run',    action='store_true',
       
    69         help="Don't actually clean anything")
       
    70 
       
    71     # defaults
       
    72     parser.set_defaults(
       
    73         loglevel    = logging.WARNING,
       
    74         
       
    75         snapshot_format = '%Y%m%d-%H%M%S',
       
    76 
       
    77         ## XXX: configure somehow
       
    78         # rsync options, in invoke.optargs format
       
    79         rsync_options = {
       
    80             'archive':          True,
       
    81             'hard-links':       True,
       
    82             'one-file-system':  True,
       
    83             'numeric-ids':      True,
       
    84             'delete':           True,
       
    85         },
       
    86 
       
    87         # datetime formats for intervals
       
    88         interval_format = {
       
    89             'all':      None,       # default to snapshot_format
       
    90             'day':      '%Y-%m-%d',
       
    91             'week':     '%Y-%W',
       
    92             'month':    '%Y-%m',
       
    93             'year':     '%Y',
       
    94         },
       
    95 
       
    96         # retention for intervals
       
    97         interval_retention = {
       
    98             'all':      4,
       
    99             'day':      7,
       
   100             'week':     4,
       
   101             'month':    4,
       
   102             'year':     1,
       
   103         },
       
   104 
       
   105         # selected intervals
       
   106         intervals       = [],
       
   107     )
       
   108 
       
   109     # parse
       
   110     options, args = parser.parse_args(argv[1:])
       
   111 
       
   112     # validate
       
   113     if not options.destination :
       
   114         parser.error("--destination is required")
       
   115 
       
   116     # configure
       
   117     logging.basicConfig(
       
   118         format  = '%(processName)s: %(name)s: %(levelname)s %(funcName)s : %(message)s',
       
   119         level   = options.loglevel,
       
   120     )
       
   121 
       
   122     if options.clean :
       
   123         options.clean_intervals = options.clean_snapshots = options.clean
       
   124 
       
   125     return options, args
       
   126 
       
   127 def run_snapshot (options) :
       
   128     """
       
   129         Perform the rsync from source to given path.
       
   130     """
       
   131 
       
   132     snapshot_dir = os.path.join(options.destination, 'snapshots')
       
   133 
       
   134     if not os.path.exists(snapshot_dir) :
       
   135         log.warn("Creating snapshots dir: %s", snapshot_dir)
       
   136         os.mkdir(snapshot_dir)
       
   137     
       
   138     # new snapshot
       
   139     snapshot_name = options.now.strftime(options.snapshot_format)
       
   140     snapshot_path = os.path.join(snapshot_dir, snapshot_name)
       
   141     temp_path = os.path.join(snapshot_dir, 'new')
       
   142 
       
   143     if os.path.exists(temp_path) :
       
   144         raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path))
       
   145 
       
   146     log.info("Perform main snapshot: %s", snapshot_path)
       
   147 
       
   148     # build rsync options
       
   149     opts = dict(options.rsync_options)
       
   150 
       
   151     if os.path.exists(options.current_path) :
       
   152         # use as link-dest base; hardlinks unchanged files
       
   153         opts['link-dest'] = options.current_path
       
   154 
       
   155     # go
       
   156     log.debug("rsync %s -> %s", options.source, temp_path)
       
   157     rsync.rsync(options.source, temp_path, **opts)
       
   158 
       
   159     # move in to final name
       
   160     log.debug("rename %s -> %s", temp_path, snapshot_path)
       
   161     os.rename(temp_path, snapshot_path)
       
   162 
       
   163     return snapshot_name
       
   164 
       
   165 def update_interval (options, snapshot_name, interval) :
       
   166     """
       
   167         Update the interval/... links
       
   168     """
       
   169 
       
   170     dir_path = os.path.join(options.destination, interval)
       
   171 
       
   172     if not os.path.exists(dir_path) :
       
   173         log.warn("Creating interval dir: %s", dir_path)
       
   174         os.mkdir(dir_path)
       
   175     
       
   176     # format code
       
   177     name_fmt = options.interval_format[interval]
       
   178 
       
   179     if name_fmt is None :
       
   180         # keep all snapshots
       
   181         name_fmt = options.snapshot_format
       
   182 
       
   183     # name
       
   184     name = options.now.strftime(name_fmt)
       
   185 
       
   186     # path
       
   187     path_name = os.path.join(interval, name)
       
   188     path = os.path.join(options.destination, path_name)
       
   189 
       
   190     log.debug("processing %s", path_name)
       
   191 
       
   192     # already there?
       
   193     if os.path.exists(path) :
       
   194         target = os.readlink(path)
       
   195 
       
   196         log.info("Found existing %s: %s -> %s", interval, name, target)
       
   197 
       
   198     else :
       
   199         # update
       
   200         target = os.path.join('..', 'snapshots', snapshot_name)
       
   201 
       
   202         log.info("Updating %s: %s -> %s", interval, name, target)
       
   203         log.debug("%s -> %s", path, target)
       
   204 
       
   205         os.symlink(target, path)
       
   206 
       
   207 
       
   208 def clean_interval (options, interval) :
       
   209     """
       
   210         Clean out old entries from interval dir.
       
   211     """
       
   212 
       
   213     # path
       
   214     dir_path = os.path.join(options.destination, interval)
       
   215 
       
   216     if not os.path.exists(dir_path) :
       
   217         log.warn("%s: Skipping, no interval dir: %s", interval, dir_path)
       
   218         return
       
   219 
       
   220     # configured
       
   221     retention = options.interval_retention[interval]
       
   222 
       
   223     # clean?
       
   224     items = os.listdir(dir_path)
       
   225     items.sort()
       
   226 
       
   227     log.info("%s: Have %d / %d items", interval, len(items), retention)
       
   228     log.debug("%s: items: %s", interval, ' '.join(items))
       
   229 
       
   230     if len(items) > retention :
       
   231         # clean out
       
   232         clean = items[retention:]
       
   233 
       
   234         log.info("%s: Cleaning out %d items", interval, len(clean))
       
   235         log.debug("%s: cleaning out: %s", interval, ' '.join(clean))
       
   236 
       
   237         for item in clean :
       
   238             path = os.path.join(dir_path, item)
       
   239 
       
   240             log.info("%s: Clean: %s", interval, path)
       
   241 
       
   242             os.unlink(path)
       
   243 
       
   244 def walk_symlinks (tree, ignore=False) :
       
   245     """
       
   246         Walk through all symlinks in given dir, yielding:
       
   247 
       
   248             (dirpath, name, target)
       
   249 
       
   250         Passes through errors from os.listdir/os.lstat.
       
   251     """
       
   252 
       
   253     for name in os.listdir(tree) :
       
   254         if ignore and name in ignore :
       
   255             log.debug("%s: ignore: %s", tree, name)
       
   256             continue
       
   257 
       
   258         path = os.path.join(tree, name)
       
   259         
       
   260         # stat symlink itself
       
   261         st = os.lstat(path)
       
   262 
       
   263         if stat.S_ISDIR(st.st_mode) :
       
   264             # recurse
       
   265             log.debug("%s: tree: %s", tree, name)
       
   266 
       
   267             for item in walk_symlinks(path) :
       
   268                 yield item
       
   269 
       
   270         elif stat.S_ISLNK(st.st_mode) :
       
   271             # found
       
   272             target = os.readlink(path)
       
   273 
       
   274             log.debug("%s: link: %s -> %s", tree, name, target)
       
   275 
       
   276             yield tree, name, target
       
   277 
       
   278         else :
       
   279             log.debug("%s: skip: %s", tree, name)
       
   280 
       
   281 
       
   282 def clean_snapshots (options) :
       
   283     """
       
   284         Clean out all snapshots not linked to from within dest.
       
   285 
       
   286         Fails without doing anything if unable to read the destination dir.
       
   287     """
       
   288 
       
   289     # real path to snapshots
       
   290     snapshots_path = os.path.realpath(os.path.abspath(os.path.join(options.destination, 'snapshots')))
       
   291     log.debug("real snapshots_path: %s", snapshots_path)
       
   292 
       
   293     # set of found targets
       
   294     found = set()
       
   295 
       
   296     # walk all symlinks
       
   297     for dirpath, name, target in walk_symlinks(options.destination, ignore=set(['snapshots'])) :
       
   298         # target dir
       
   299         target_path = os.path.realpath(os.path.join(dirpath, target))
       
   300         target_dir = os.path.dirname(target_path)
       
   301         target_name = os.path.basename(target_path)
       
   302 
       
   303         if target_dir == snapshots_path :
       
   304             log.debug("%s: found: %s -> %s", dirpath, name, target_name)
       
   305             found.add(target_name)
       
   306 
       
   307         else :
       
   308             log.debug("%s: ignore: %s -> %s", dirpath, name, target_path)
       
   309 
       
   310     # discover all snapshots
       
   311     snapshots = set(os.listdir(snapshots_path))
       
   312 
       
   313     # clean out special names
       
   314     snapshots = snapshots - set(['new'])
       
   315 
       
   316     ## compare
       
   317     used = snapshots & found
       
   318     unused = snapshots - found
       
   319     broken = found - snapshots
       
   320 
       
   321     log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken))
       
   322     log.debug("used=%s, unused=%s", used, unused)
       
   323 
       
   324     if broken :
       
   325         log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken))
       
   326     
       
   327     if unused :
       
   328         log.info("Clean out unused snapshots: %s", ' '.join(unused))
       
   329 
       
   330         for name in unused :
       
   331             path = os.path.join(snapshots_path, name)
       
   332 
       
   333             log.info("Clean: %s", name)
       
   334 
       
   335             if not options.dry_run :
       
   336                 log.debug("rmtree: %s", path)
       
   337 
       
   338                 # nuke
       
   339                 shutil.rmtree(path)
       
   340 
       
   341             else :
       
   342                 log.debug("dry-run: %s", path)
       
   343 
       
   344 def run (options) :
       
   345     """
       
   346         Perform the current snapshot
       
   347     """
       
   348 
       
   349     # timestamp for run
       
   350     options.now = datetime.datetime.now()
       
   351 
       
   352     # snapshot from source?
       
   353     if options.source :
       
   354         # base snapshot (symlink)
       
   355         options.current_path = os.path.join(options.destination, 'current')
       
   356 
       
   357         log.info("Started snapshot run at: %s", options.now)
       
   358 
       
   359         # initial rsync
       
   360         snapshot_name = run_snapshot(options)
       
   361 
       
   362         # update current
       
   363         log.info("Updating current -> %s", snapshot_name)
       
   364 
       
   365         if os.path.islink(options.current_path) :
       
   366             # replace
       
   367             os.unlink(options.current_path)
       
   368 
       
   369         os.symlink(os.path.join('snapshots', snapshot_name), options.current_path)
       
   370 
       
   371         # intervals?
       
   372         if not options.intervals :
       
   373             log.info("No --intervals given; not running any")
       
   374 
       
   375         else :
       
   376             # maintain intervals
       
   377             log.info("Running intervals: %s", options.intervals)
       
   378 
       
   379             for interval in options.intervals :
       
   380                 log.debug("%s", interval)
       
   381 
       
   382                 # update
       
   383                 update_interval(options, snapshot_name, interval)
       
   384 
       
   385     # clean intervals?
       
   386     if options.clean_intervals:
       
   387         for interval in options.intervals :
       
   388             log.info("Cleaning interval: %s...", interval)
       
   389 
       
   390             clean_interval(options, interval)
       
   391 
       
   392     # clean snapshots?
       
   393     if options.clean_snapshots :
       
   394         log.info("Cleaning snapshots...")
       
   395 
       
   396         clean_snapshots(options)
       
   397 
       
   398     # ok
       
   399     return 1
       
   400 
       
   401 def main (argv) :
       
   402     global options
       
   403 
       
   404     # global options + args
       
   405     options, args = parse_options(argv)
       
   406 
       
   407     # XXX: args?
       
   408     if args :
       
   409         log.error("No arguments are handled")
       
   410         return 2
       
   411 
       
   412     try :
       
   413         # handle it
       
   414         return run(options)
       
   415 
       
   416     except Exception, e:
       
   417         log.error("Internal error:", exc_info=e)
       
   418         return 3
       
   419 
       
   420     # ok
       
   421     return 0
       
   422 
       
   423 
       
   424 
       
   425 if __name__ == '__main__' :
       
   426     import sys
       
   427 
       
   428     sys.exit(main(sys.argv))
       
   429