pvl.backup.lvm: snapshot: retry removal if fails with exit == 5 ("Can't remove open logical volume ...")
authorTero Marttila <terom@paivola.fi>
Tue, 19 Jun 2012 10:24:20 +0300
changeset 61 4fdf70cbc9b0
parent 60 3402a85f9baa
child 62 86ba7b12a7c9
pvl.backup.lvm: snapshot: retry removal if fails with exit == 5 ("Can't remove open logical volume ...")
pvl/backup/lvm.py
--- a/pvl/backup/lvm.py	Thu May 24 14:26:24 2012 +0300
+++ b/pvl/backup/lvm.py	Tue Jun 19 10:24:20 2012 +0300
@@ -17,6 +17,9 @@
 # number of seconds to wait for lvm snapshot to settle after unmount..
 LVM_SNAPSHOT_WAIT   = 5
 
+# number of times to retry removal, due to lvm/udev bug..
+LVM_SNAPSHOT_RETRY  = 5
+
 class LVMError (Exception) :
     pass
 
@@ -67,7 +70,7 @@
         return LVMVolume(self, name)
 
     @contextlib.contextmanager
-    def snapshot (self, base, wait=LVM_SNAPSHOT_WAIT, **opts) :
+    def snapshot (self, base, wait=LVM_SNAPSHOT_WAIT, retry=LVM_SNAPSHOT_RETRY, **opts) :
         """
             A Context Manager for handling an LVMSnapshot.
 
@@ -76,10 +79,11 @@
             with lvm.snapshot(lv) as snapshot : ...
 
                 wait        - wait given interval for the snapshot device to settle before unmounting it
+                retry       - retry removal given number of times
                 **opts      - LVMSnapshot.create() options (e.g. size)
         """
 
-        log.debug("creating snapshot from {base}: wait={wait} {opts}".format(base=base, wait=wait, opts=opts))
+        log.debug("creating snapshot from {base}: wait={wait}, retry={retry} {opts}".format(base=base, wait=wait, retry=retry, opts=opts))
         snapshot = LVMSnapshot.create(self, base, **opts)
 
         try :
@@ -87,17 +91,43 @@
             yield snapshot
 
         finally:
-            if wait :
-                # XXX: there's some timing bug with an umount leaving the LV open, do we need to wait for it to get closed after mount?
-                #       https://bugzilla.redhat.com/show_bug.cgi?id=577798
-                #       some udev event bug, possibly fixed in lvm2 2.02.86?
-                # try to just patiently wait for it to settle down... if this isn't enough, we need some dmremove magic
-                log.debug("cleanup: waiting %.2f seconds for snapshot volume to settle...", wait)
-                time.sleep(wait)
+            # XXX: there's some common udev bug with removing lvm snapshots
+            #       https://bugzilla.redhat.com/show_bug.cgi?id=577798
+            #       http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=618016
+            #       possibly fixed in lvm2 2.02.86?
+            # try to just patiently wait for it to settle down... then retry... if this isn't enough, we need some dmremove magic?
+            while True :
+                # wait..
+                if wait :
+                    log.debug("%s: cleanup: waiting %.2f seconds for snapshot volume to settle...", snapshot, wait)
+                    time.sleep(wait)
 
-            # cleanup
-            log.debug("cleanup: {0}".format(snapshot))
-            snapshot.close()
+                # lvremove
+                try :
+                    log.debug("%s: cleanup", snapshot)
+                    snapshot.close()
+
+                except InvokeError as ex :
+                    if ex.exit != 5 :
+                        # lvremove sez "Can't remove open logical volume ..." -> exit(5);
+                        raise
+                    
+                    # retry counter?
+                    if retry :
+                        log.warn("%s: cleanup: lvremove failed, retrying...", snapshot)
+                        retry -= 1
+
+                        # retry
+                        continue
+
+                    else :
+                        # failed
+                        log.error("%s: cleanup: lvremove failed, aborting...", snapshot)
+                        raise
+
+                else :
+                    # done
+                    break
 
     def __str__ (self) :
         return self.name
@@ -230,10 +260,7 @@
             Remove snapshot volume.
         """
 
-        # XXX: can't deactivate snapshot volume
-        #self.lvm.command('lvchange', name, available='n')
-
-        # XXX: risky!
+        # don't typo me!
         self.lvm.command('lvremove', '-f', self.lvm_path)
 
     def __repr__ (self) :