updated shorturls.py to write new shorturls to the db, also adding support for dir-shorturls
authorterom
Thu, 31 Jan 2008 17:28:02 +0000
changeset 26 81d6679d50d0
parent 25 4b3cf12848c2
child 27 301d738b1181
updated shorturls.py to write new shorturls to the db, also adding support for dir-shorturls
lib/db.py
lib/folder.py
lib/shorturl.py
lib/utils.py
scripts/migrate_shorturls.py
--- a/lib/db.py	Sun Jan 20 01:52:00 2008 +0000
+++ b/lib/db.py	Thu Jan 31 17:28:02 2008 +0000
@@ -34,13 +34,33 @@
 
     return c
 
+def insert (expr, *args) :
+    return execute_commit(expr, *args).lastrowid
+
+def insert_many (cb, expr, iter) :
+    """
+        Perform an executemany with the given iterator (which must yield (cb_val, args) tuples), calling the given callback with the args (cb_val, row_id)
+    """
+
+    c = conn.cursor()
+
+    c.executemany(expr, _lastrowid_adapter(c, iter, cb))
+
+    return commit(c)
+
+def _lastrowid_adapter (c, iter, cb) :
+    for val, args in iter :
+        yield args
+
+        cb(val, c.lastrowid)
+
 def commit (cursor) :
     try :
         cursor.execute("COMMIT")
     except sqlite3.OperationalError :
         pass    # ffs. INSERT just doesn't do anything otherwise
 
-    return cursor.rowcount
+    return cursor
 
 def execute_commit (expr, *args) :
     return commit(execute(expr, *args))
@@ -51,10 +71,8 @@
 select = execute
 
 delete = execute_commit
-insert = execute_commit
 
 delete_many = execute_commit_many
-insert_many = execute_commit_many
 
 cursor = conn.cursor
 
--- a/lib/folder.py	Sun Jan 20 01:52:00 2008 +0000
+++ b/lib/folder.py	Thu Jan 31 17:28:02 2008 +0000
@@ -160,24 +160,24 @@
                 # add to the sorted images list
                 self.sorted_images.append(img)
                 
-            # figure out our title/ descr. Must be done before our parent dir is rendered (self.title)
-            title_path = self.pathFor(settings.TITLE_FILE)
-            
-            self.title, self.descr = utils.readTitleDescr(title_path)
+        # figure out our title/ descr. Must be done before our parent dir is rendered (self.title)
+        title_path = self.pathFor(settings.TITLE_FILE)
+        
+        self.title, self.descr = utils.readTitleDescr(title_path)
+        
+        # default title for the root dir
+        if self.title :
+            self.alive = True
+            pass # use what was in the title file
             
-            # default title for the root dir
-            if self.title :
-                self.alive = True
-                pass # use what was in the title file
-                
-            elif not self.parent :
-                self.title = 'Index'
+        elif not self.parent :
+            self.title = 'Index'
 
-            else :
-                self.title = self.name
-            
-            if self.descr :
-                self.alive = True
+        else :
+            self.title = self.name
+        
+        if self.descr :
+            self.alive = True
 
         return self.alive
 
@@ -274,13 +274,16 @@
 
             if pagination_required :
                 render.info("Index split into %d pages of %d images each", len(pages), settings.IMAGE_COUNT)
-            
+            elif not pages :
+                render.info("Dir with no images, render for subdirs")
+                pages = [[]]
+
             for cur_page, images in enumerate(pages) :
                 if pagination_required and cur_page > 0 :
                     shorturl = "%s/%s" % (self.shorturl_code, cur_page+1)
                 else :
                     shorturl = self.shorturl_code
-
+                
                 # render to index.html
                 gallery_tpl.render_to(self.pathFor(url_for_page(cur_page)), 
                     stylesheet_url               = self.inRoot('style.css'),
--- a/lib/shorturl.py	Sun Jan 20 01:52:00 2008 +0000
+++ b/lib/shorturl.py	Thu Jan 31 17:28:02 2008 +0000
@@ -25,7 +25,7 @@
 
 from log import index
 
-import utils, db
+import utils, db, helpers, folder, image
 
 def int2key (id) :
     """
@@ -41,7 +41,8 @@
 
 def key2int (key) :
     # base64 ignores extra padding, but if it doesn't, it's (4 - len%4), if len%4 != 0
-    bytes = base64.b64decode(key + '='*6, '-_')
+    # and it breaks on unicode strings
+    bytes = base64.b64decode(str(key + '='*6), '-_')
     
     type = {
         1: 'B',
@@ -51,66 +52,6 @@
 
     return struct.unpack(type, bytes)[0]
 
-def updateDB (root) :
-    """
-        DeGAL <= 0.2 used a simple key => path mapping, but now we use
-        something more structured, key => (type, dirpath, fname), where
-
-        type    - one of 'img', 'dir'
-        dirpath - the path to the directory, e.g. '.', './foobar', './foobar/quux'
-        fname   - the filename, one of '', 'DSC9839.JPG', 'this.png', etc.
-    """
-
-    db = shelve.open('shorturls2', 'c', writeback=True)
-    
-    id = db.get('_id', 1)
-
-    dirqueue = [root]
-
-    # dict of path -> obj
-    paths = {}
-
-    index.info("Processing ShortURLs...")
-
-    while dirqueue :
-        dir = dirqueue.pop(0)
-
-        dirqueue.extend(dir.subdirs.itervalues())
-
-        if dir.alive :
-            paths[dir.path] = dir
-
-        for img in dir.images.itervalues() :
-            paths[img.path] = img
-
-    for key in db.keys() :
-        if key.startswith('_') :
-            continue
-
-        type, dirpath, fname = db[key]
-        
-        path = os.path.join(dirpath, fname).rstrip(os.sep)
-
-        try :
-            paths.pop(path).shorturl_code = key
-            index.debug("Code for `%s' is %s", path, key)
-
-        except KeyError :
-            index.debug("Path `%s' in DB does not exist?", path)
-
-    for obj in paths.itervalues() :
-        key = int2key(id)
-        id += 1
-        
-        index.info("Alloc code `%s' for `%s'", key, obj.html_path)
-
-        obj.shorturl_code = key
-
-        db[key] = obj.getObjInfo()
-
-    db['_id'] = id
-    db.close()
-
 class DB (object) :
     def __init__ (self, read_only=True) :
         self.db = shelve.open('shorturls2', read_only and 'r' or 'c')
@@ -158,9 +99,21 @@
         return ret
 
 def html_path (key, index=None) :
-    dir, fname = image_info(key)
+    dir, fname = node_info(key)
 
-    return utils.url(dir, fname + '.html')
+    if fname :
+        return utils.url(dir, fname + '.html')
+    else :
+        return utils.url(dir, helpers.url_for_page(index or 0))
+
+def node_info (key) :
+    res = db.select("""SELECT dirpath, filename FROM nodes WHERE id=?""", key2int(key)).fetchone()
+    
+    if res :
+        return res
+
+    else :
+        raise KeyError(key)
 
 def image_info (key) :
     res = db.select("""SELECT dirpath, filename FROM images WHERE id=?""", key2int(key)).fetchone()
@@ -181,3 +134,75 @@
     else :
         raise KeyError(keys)
 
+def _got_obj_key (obj, id) :
+    key = int2key(id)
+
+    obj.shorturl_code = key
+
+    if isinstance(obj, folder.Folder) :
+        dir, fname = utils.strip_path(obj.path), ''
+    elif isinstance(obj, image.Image) :
+        dir, fname = utils.strip_path(obj.dir.path), obj.name
+    else :
+        assert(False, "%r %r" % (obj, id))
+
+    index.info("img %50s %15s = %d %s", dir, fname, id, key)
+
+def updateDB (root) :
+    """
+        Update the SQL database
+
+        type    - one of 'img', 'dir'
+        dirpath - the path to the directory, e.g. '.', './foobar', './foobar/quux'
+        fname   - the filename, one of '', 'DSC9839.JPG', 'this.png', etc.
+    """
+
+    dirqueue = [root]
+
+    # dict of (dir, fname) -> obj
+    paths = {}
+
+    index.info("Processing ShortURLs...")
+
+    while dirqueue :
+        dir = dirqueue.pop(0)
+
+        dirqueue.extend(dir.subdirs.itervalues())
+
+        if dir.alive :
+            pathtuple = (utils.strip_path(dir.path), '')
+            
+            index.debug("dir %50s", pathtuple[0])
+
+            paths[pathtuple] = dir
+
+        for img in dir.images.itervalues() :
+            pathtuple = (utils.strip_path(img.dir.path), img.name)
+            
+            index.debug("img %50s %15s", *pathtuple)
+
+            paths[pathtuple] = img
+    
+    print "%d nodes:" % (len(paths))
+
+    for (id, dir, fname) in db.select("SELECT id, dirpath, filename FROM nodes") :
+        try :
+            obj = paths.pop((dir, fname))
+            key = int2key(id)
+
+            obj.shorturl_code = key
+
+            index.debug("%s %50s %15s -> %d %s", dir and "img" or "dir", dir, fname, id, key)
+        
+        except KeyError :
+            pass
+#            index.warning("non-existant node (%d, %s, %s) in db", id, dir, fname)
+    
+    print "%d NEW nodes:" % (len(paths))
+
+    db.insert_many(
+        _got_obj_key,
+        "INSERT INTO nodes (dirpath, filename) VALUES (?, ?)",
+        ((obj, (path, fname)) for ((path, fname), obj) in paths.iteritems())
+    )
+
--- a/lib/utils.py	Sun Jan 20 01:52:00 2008 +0000
+++ b/lib/utils.py	Thu Jan 31 17:28:02 2008 +0000
@@ -68,3 +68,7 @@
 
 def path_join (*parts) :
     return os.path.join(*[part for part in parts if part is not None])
+
+def strip_path (path) :
+    return path.lstrip('.').lstrip('/')
+
--- a/scripts/migrate_shorturls.py	Sun Jan 20 01:52:00 2008 +0000
+++ b/scripts/migrate_shorturls.py	Thu Jan 31 17:28:02 2008 +0000
@@ -19,33 +19,36 @@
 
         type, dirpath, fname = value
 
-        if type == "img" :
-            id = shorturl.key2int(key)
+        id = shorturl.key2int(key)
+        dirpath = dirpath.lstrip('.').lstrip('/')
 
-            dirpath = dirpath.lstrip('.').lstrip('/')
+        if type == "img" :
+            print "img"
+            continue    # already imported images
 
-            print "%6d %50s %10s" % (id, dirpath, fname)
+            print "img %6d %50s %10s" % (id, dirpath, fname)
 
             yield id, dirpath, fname
 
-            count += 1
+        else :
 
-            if count % 500 == 0 :
-                print count
-        else :
-            print "dir"
+            print "dir %6d %50s" % (id, dirpath)
+
+            yield id, dirpath, ''
+
+        count += 1
+
+        if count % 500 == 0 :
+            print count
+
 
 print "Starting import..."
 
-c = db.cursor()
-
-c.executemany("""
-    INSERT INTO images VALUES (?, ?, ?)
+c = db.insert_many("""
+    INSERT OR IGNORE INTO nodes VALUES (?, ?, ?)
 """, gen())
 
 print "Done!"
 
-print "%d rows affected" % c.rowcount
+print "%d rows affected" % c
 
-c.execute("SELECT id FROM images").fetchall()
-