# HG changeset patch # User Tero Marttila # Date 1340090660 -10800 # Node ID 4fdf70cbc9b0c726eed9222e916c25cc49b16f79 # Parent 3402a85f9baac344632859c66063ae6c56109784 pvl.backup.lvm: snapshot: retry removal if fails with exit == 5 ("Can't remove open logical volume ...") diff -r 3402a85f9baa -r 4fdf70cbc9b0 pvl/backup/lvm.py --- a/pvl/backup/lvm.py Thu May 24 14:26:24 2012 +0300 +++ b/pvl/backup/lvm.py Tue Jun 19 10:24:20 2012 +0300 @@ -17,6 +17,9 @@ # number of seconds to wait for lvm snapshot to settle after unmount.. LVM_SNAPSHOT_WAIT = 5 +# number of times to retry removal, due to lvm/udev bug.. +LVM_SNAPSHOT_RETRY = 5 + class LVMError (Exception) : pass @@ -67,7 +70,7 @@ return LVMVolume(self, name) @contextlib.contextmanager - def snapshot (self, base, wait=LVM_SNAPSHOT_WAIT, **opts) : + def snapshot (self, base, wait=LVM_SNAPSHOT_WAIT, retry=LVM_SNAPSHOT_RETRY, **opts) : """ A Context Manager for handling an LVMSnapshot. @@ -76,10 +79,11 @@ with lvm.snapshot(lv) as snapshot : ... wait - wait given interval for the snapshot device to settle before unmounting it + retry - retry removal given number of times **opts - LVMSnapshot.create() options (e.g. size) """ - log.debug("creating snapshot from {base}: wait={wait} {opts}".format(base=base, wait=wait, opts=opts)) + log.debug("creating snapshot from {base}: wait={wait}, retry={retry} {opts}".format(base=base, wait=wait, retry=retry, opts=opts)) snapshot = LVMSnapshot.create(self, base, **opts) try : @@ -87,17 +91,43 @@ yield snapshot finally: - if wait : - # XXX: there's some timing bug with an umount leaving the LV open, do we need to wait for it to get closed after mount? - # https://bugzilla.redhat.com/show_bug.cgi?id=577798 - # some udev event bug, possibly fixed in lvm2 2.02.86? - # try to just patiently wait for it to settle down... if this isn't enough, we need some dmremove magic - log.debug("cleanup: waiting %.2f seconds for snapshot volume to settle...", wait) - time.sleep(wait) + # XXX: there's some common udev bug with removing lvm snapshots + # https://bugzilla.redhat.com/show_bug.cgi?id=577798 + # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=618016 + # possibly fixed in lvm2 2.02.86? + # try to just patiently wait for it to settle down... then retry... if this isn't enough, we need some dmremove magic? + while True : + # wait.. + if wait : + log.debug("%s: cleanup: waiting %.2f seconds for snapshot volume to settle...", snapshot, wait) + time.sleep(wait) - # cleanup - log.debug("cleanup: {0}".format(snapshot)) - snapshot.close() + # lvremove + try : + log.debug("%s: cleanup", snapshot) + snapshot.close() + + except InvokeError as ex : + if ex.exit != 5 : + # lvremove sez "Can't remove open logical volume ..." -> exit(5); + raise + + # retry counter? + if retry : + log.warn("%s: cleanup: lvremove failed, retrying...", snapshot) + retry -= 1 + + # retry + continue + + else : + # failed + log.error("%s: cleanup: lvremove failed, aborting...", snapshot) + raise + + else : + # done + break def __str__ (self) : return self.name @@ -230,10 +260,7 @@ Remove snapshot volume. """ - # XXX: can't deactivate snapshot volume - #self.lvm.command('lvchange', name, available='n') - - # XXX: risky! + # don't typo me! self.lvm.command('lvremove', '-f', self.lvm_path) def __repr__ (self) :