|
1 #!/usr/bin/python |
|
2 |
|
3 """ |
|
4 Manage rsync --link-dest based snapshots. |
|
5 |
|
6 rsync's from <src> to <dst>/snapshots/YYYY-MM-DD-HH-MM-SS using --link-dest <dst>/current. |
|
7 |
|
8 Updates symlink <dst>/current -> <dst>/snapshots/... |
|
9 |
|
10 Then archives <dst>/current to <dst>/<period>/<date> using --link-dest. |
|
11 """ |
|
12 |
|
13 from pvl.backup import rsync |
|
14 |
|
15 import optparse |
|
16 import os, os.path, stat |
|
17 import shutil |
|
18 import datetime |
|
19 import logging |
|
20 |
|
21 log = logging.getLogger() |
|
22 |
|
23 # command-line options |
|
24 options = None |
|
25 |
|
26 def parse_options (argv) : |
|
27 """ |
|
28 Parse command-line arguments. |
|
29 """ |
|
30 |
|
31 parser = optparse.OptionParser( |
|
32 prog = argv[0], |
|
33 usage = '%prog: [options] --source <src> --destination <dst>', |
|
34 |
|
35 # module docstring |
|
36 # XXX: breaks multi-line descriptions.. |
|
37 description = __doc__, |
|
38 ) |
|
39 |
|
40 # logging |
|
41 general = optparse.OptionGroup(parser, "General Options") |
|
42 |
|
43 general.add_option('-q', '--quiet', dest='loglevel', action='store_const', const=logging.WARNING, help="Less output") |
|
44 general.add_option('-v', '--verbose', dest='loglevel', action='store_const', const=logging.INFO, help="More output") |
|
45 general.add_option('-D', '--debug', dest='loglevel', action='store_const', const=logging.DEBUG, help="Even more output") |
|
46 |
|
47 parser.add_option_group(general) |
|
48 |
|
49 # |
|
50 parser.add_option('-s', '--source', metavar='RSYNC-PATH', |
|
51 help="Backup source in rsync-syntax") |
|
52 |
|
53 parser.add_option('-d', '--destination', metavar='RSYNC-PATH', |
|
54 help="Backup destination in rsync-syntax") |
|
55 |
|
56 parser.add_option('--interval', metavar='NAME', action='append', dest='intervals', |
|
57 help="Enable given interval") |
|
58 |
|
59 parser.add_option('--clean-intervals', action='store_true', |
|
60 help="Clean out old interval links") |
|
61 |
|
62 parser.add_option('--clean-snapshots', action='store_true', |
|
63 help="Clean out unused snapshots (those not linked to)") |
|
64 |
|
65 parser.add_option('--clean', action='store_true', |
|
66 help="Clean out both intervals and snapshots") |
|
67 |
|
68 parser.add_option('-n', '--dry-run', action='store_true', |
|
69 help="Don't actually clean anything") |
|
70 |
|
71 # defaults |
|
72 parser.set_defaults( |
|
73 loglevel = logging.WARNING, |
|
74 |
|
75 snapshot_format = '%Y%m%d-%H%M%S', |
|
76 |
|
77 ## XXX: configure somehow |
|
78 # rsync options, in invoke.optargs format |
|
79 rsync_options = { |
|
80 'archive': True, |
|
81 'hard-links': True, |
|
82 'one-file-system': True, |
|
83 'numeric-ids': True, |
|
84 'delete': True, |
|
85 }, |
|
86 |
|
87 # datetime formats for intervals |
|
88 interval_format = { |
|
89 'all': None, # default to snapshot_format |
|
90 'day': '%Y-%m-%d', |
|
91 'week': '%Y-%W', |
|
92 'month': '%Y-%m', |
|
93 'year': '%Y', |
|
94 }, |
|
95 |
|
96 # retention for intervals |
|
97 interval_retention = { |
|
98 'all': 4, |
|
99 'day': 7, |
|
100 'week': 4, |
|
101 'month': 4, |
|
102 'year': 1, |
|
103 }, |
|
104 |
|
105 # selected intervals |
|
106 intervals = [], |
|
107 ) |
|
108 |
|
109 # parse |
|
110 options, args = parser.parse_args(argv[1:]) |
|
111 |
|
112 # validate |
|
113 if not options.destination : |
|
114 parser.error("--destination is required") |
|
115 |
|
116 # configure |
|
117 logging.basicConfig( |
|
118 format = '%(processName)s: %(name)s: %(levelname)s %(funcName)s : %(message)s', |
|
119 level = options.loglevel, |
|
120 ) |
|
121 |
|
122 if options.clean : |
|
123 options.clean_intervals = options.clean_snapshots = options.clean |
|
124 |
|
125 return options, args |
|
126 |
|
127 def run_snapshot (options) : |
|
128 """ |
|
129 Perform the rsync from source to given path. |
|
130 """ |
|
131 |
|
132 snapshot_dir = os.path.join(options.destination, 'snapshots') |
|
133 |
|
134 if not os.path.exists(snapshot_dir) : |
|
135 log.warn("Creating snapshots dir: %s", snapshot_dir) |
|
136 os.mkdir(snapshot_dir) |
|
137 |
|
138 # new snapshot |
|
139 snapshot_name = options.now.strftime(options.snapshot_format) |
|
140 snapshot_path = os.path.join(snapshot_dir, snapshot_name) |
|
141 temp_path = os.path.join(snapshot_dir, 'new') |
|
142 |
|
143 if os.path.exists(temp_path) : |
|
144 raise Exception("Old temp snapshot dir remains, please clean up: {path}".format(path=temp_path)) |
|
145 |
|
146 log.info("Perform main snapshot: %s", snapshot_path) |
|
147 |
|
148 # build rsync options |
|
149 opts = dict(options.rsync_options) |
|
150 |
|
151 if os.path.exists(options.current_path) : |
|
152 # use as link-dest base; hardlinks unchanged files |
|
153 opts['link-dest'] = options.current_path |
|
154 |
|
155 # go |
|
156 log.debug("rsync %s -> %s", options.source, temp_path) |
|
157 rsync.rsync(options.source, temp_path, **opts) |
|
158 |
|
159 # move in to final name |
|
160 log.debug("rename %s -> %s", temp_path, snapshot_path) |
|
161 os.rename(temp_path, snapshot_path) |
|
162 |
|
163 return snapshot_name |
|
164 |
|
165 def update_interval (options, snapshot_name, interval) : |
|
166 """ |
|
167 Update the interval/... links |
|
168 """ |
|
169 |
|
170 dir_path = os.path.join(options.destination, interval) |
|
171 |
|
172 if not os.path.exists(dir_path) : |
|
173 log.warn("Creating interval dir: %s", dir_path) |
|
174 os.mkdir(dir_path) |
|
175 |
|
176 # format code |
|
177 name_fmt = options.interval_format[interval] |
|
178 |
|
179 if name_fmt is None : |
|
180 # keep all snapshots |
|
181 name_fmt = options.snapshot_format |
|
182 |
|
183 # name |
|
184 name = options.now.strftime(name_fmt) |
|
185 |
|
186 # path |
|
187 path_name = os.path.join(interval, name) |
|
188 path = os.path.join(options.destination, path_name) |
|
189 |
|
190 log.debug("processing %s", path_name) |
|
191 |
|
192 # already there? |
|
193 if os.path.exists(path) : |
|
194 target = os.readlink(path) |
|
195 |
|
196 log.info("Found existing %s: %s -> %s", interval, name, target) |
|
197 |
|
198 else : |
|
199 # update |
|
200 target = os.path.join('..', 'snapshots', snapshot_name) |
|
201 |
|
202 log.info("Updating %s: %s -> %s", interval, name, target) |
|
203 log.debug("%s -> %s", path, target) |
|
204 |
|
205 os.symlink(target, path) |
|
206 |
|
207 |
|
208 def clean_interval (options, interval) : |
|
209 """ |
|
210 Clean out old entries from interval dir. |
|
211 """ |
|
212 |
|
213 # path |
|
214 dir_path = os.path.join(options.destination, interval) |
|
215 |
|
216 if not os.path.exists(dir_path) : |
|
217 log.warn("%s: Skipping, no interval dir: %s", interval, dir_path) |
|
218 return |
|
219 |
|
220 # configured |
|
221 retention = options.interval_retention[interval] |
|
222 |
|
223 # clean? |
|
224 items = os.listdir(dir_path) |
|
225 items.sort() |
|
226 |
|
227 log.info("%s: Have %d / %d items", interval, len(items), retention) |
|
228 log.debug("%s: items: %s", interval, ' '.join(items)) |
|
229 |
|
230 if len(items) > retention : |
|
231 # clean out |
|
232 clean = items[retention:] |
|
233 |
|
234 log.info("%s: Cleaning out %d items", interval, len(clean)) |
|
235 log.debug("%s: cleaning out: %s", interval, ' '.join(clean)) |
|
236 |
|
237 for item in clean : |
|
238 path = os.path.join(dir_path, item) |
|
239 |
|
240 log.info("%s: Clean: %s", interval, path) |
|
241 |
|
242 os.unlink(path) |
|
243 |
|
244 def walk_symlinks (tree, ignore=False) : |
|
245 """ |
|
246 Walk through all symlinks in given dir, yielding: |
|
247 |
|
248 (dirpath, name, target) |
|
249 |
|
250 Passes through errors from os.listdir/os.lstat. |
|
251 """ |
|
252 |
|
253 for name in os.listdir(tree) : |
|
254 if ignore and name in ignore : |
|
255 log.debug("%s: ignore: %s", tree, name) |
|
256 continue |
|
257 |
|
258 path = os.path.join(tree, name) |
|
259 |
|
260 # stat symlink itself |
|
261 st = os.lstat(path) |
|
262 |
|
263 if stat.S_ISDIR(st.st_mode) : |
|
264 # recurse |
|
265 log.debug("%s: tree: %s", tree, name) |
|
266 |
|
267 for item in walk_symlinks(path) : |
|
268 yield item |
|
269 |
|
270 elif stat.S_ISLNK(st.st_mode) : |
|
271 # found |
|
272 target = os.readlink(path) |
|
273 |
|
274 log.debug("%s: link: %s -> %s", tree, name, target) |
|
275 |
|
276 yield tree, name, target |
|
277 |
|
278 else : |
|
279 log.debug("%s: skip: %s", tree, name) |
|
280 |
|
281 |
|
282 def clean_snapshots (options) : |
|
283 """ |
|
284 Clean out all snapshots not linked to from within dest. |
|
285 |
|
286 Fails without doing anything if unable to read the destination dir. |
|
287 """ |
|
288 |
|
289 # real path to snapshots |
|
290 snapshots_path = os.path.realpath(os.path.abspath(os.path.join(options.destination, 'snapshots'))) |
|
291 log.debug("real snapshots_path: %s", snapshots_path) |
|
292 |
|
293 # set of found targets |
|
294 found = set() |
|
295 |
|
296 # walk all symlinks |
|
297 for dirpath, name, target in walk_symlinks(options.destination, ignore=set(['snapshots'])) : |
|
298 # target dir |
|
299 target_path = os.path.realpath(os.path.join(dirpath, target)) |
|
300 target_dir = os.path.dirname(target_path) |
|
301 target_name = os.path.basename(target_path) |
|
302 |
|
303 if target_dir == snapshots_path : |
|
304 log.debug("%s: found: %s -> %s", dirpath, name, target_name) |
|
305 found.add(target_name) |
|
306 |
|
307 else : |
|
308 log.debug("%s: ignore: %s -> %s", dirpath, name, target_path) |
|
309 |
|
310 # discover all snapshots |
|
311 snapshots = set(os.listdir(snapshots_path)) |
|
312 |
|
313 # clean out special names |
|
314 snapshots = snapshots - set(['new']) |
|
315 |
|
316 ## compare |
|
317 used = snapshots & found |
|
318 unused = snapshots - found |
|
319 broken = found - snapshots |
|
320 |
|
321 log.info("Found used=%d, unused=%d, broken=%d snapshot symlinks", len(used), len(unused), len(broken)) |
|
322 log.debug("used=%s, unused=%s", used, unused) |
|
323 |
|
324 if broken : |
|
325 log.warn("Found broken symlinks to snapshots: %s", ' '.join(broken)) |
|
326 |
|
327 if unused : |
|
328 log.info("Clean out unused snapshots: %s", ' '.join(unused)) |
|
329 |
|
330 for name in unused : |
|
331 path = os.path.join(snapshots_path, name) |
|
332 |
|
333 log.info("Clean: %s", name) |
|
334 |
|
335 if not options.dry_run : |
|
336 log.debug("rmtree: %s", path) |
|
337 |
|
338 # nuke |
|
339 shutil.rmtree(path) |
|
340 |
|
341 else : |
|
342 log.debug("dry-run: %s", path) |
|
343 |
|
344 def run (options) : |
|
345 """ |
|
346 Perform the current snapshot |
|
347 """ |
|
348 |
|
349 # timestamp for run |
|
350 options.now = datetime.datetime.now() |
|
351 |
|
352 # snapshot from source? |
|
353 if options.source : |
|
354 # base snapshot (symlink) |
|
355 options.current_path = os.path.join(options.destination, 'current') |
|
356 |
|
357 log.info("Started snapshot run at: %s", options.now) |
|
358 |
|
359 # initial rsync |
|
360 snapshot_name = run_snapshot(options) |
|
361 |
|
362 # update current |
|
363 log.info("Updating current -> %s", snapshot_name) |
|
364 |
|
365 if os.path.islink(options.current_path) : |
|
366 # replace |
|
367 os.unlink(options.current_path) |
|
368 |
|
369 os.symlink(os.path.join('snapshots', snapshot_name), options.current_path) |
|
370 |
|
371 # intervals? |
|
372 if not options.intervals : |
|
373 log.info("No --intervals given; not running any") |
|
374 |
|
375 else : |
|
376 # maintain intervals |
|
377 log.info("Running intervals: %s", options.intervals) |
|
378 |
|
379 for interval in options.intervals : |
|
380 log.debug("%s", interval) |
|
381 |
|
382 # update |
|
383 update_interval(options, snapshot_name, interval) |
|
384 |
|
385 # clean intervals? |
|
386 if options.clean_intervals: |
|
387 for interval in options.intervals : |
|
388 log.info("Cleaning interval: %s...", interval) |
|
389 |
|
390 clean_interval(options, interval) |
|
391 |
|
392 # clean snapshots? |
|
393 if options.clean_snapshots : |
|
394 log.info("Cleaning snapshots...") |
|
395 |
|
396 clean_snapshots(options) |
|
397 |
|
398 # ok |
|
399 return 1 |
|
400 |
|
401 def main (argv) : |
|
402 global options |
|
403 |
|
404 # global options + args |
|
405 options, args = parse_options(argv) |
|
406 |
|
407 # XXX: args? |
|
408 if args : |
|
409 log.error("No arguments are handled") |
|
410 return 2 |
|
411 |
|
412 try : |
|
413 # handle it |
|
414 return run(options) |
|
415 |
|
416 except Exception, e: |
|
417 log.error("Internal error:", exc_info=e) |
|
418 return 3 |
|
419 |
|
420 # ok |
|
421 return 0 |
|
422 |
|
423 |
|
424 |
|
425 if __name__ == '__main__' : |
|
426 import sys |
|
427 |
|
428 sys.exit(main(sys.argv)) |
|
429 |