a rather silly shelve-based tagging thing, commiting before I scrap the code and start over with SQLite
import shelve
class TagDB (object) :
_imgs_cache = {}
def __init__ (self, read_only=True) :
self.img_tags = shelve.open("img_tags", read_only and 'r' or 'c')
self.tag_imgs = shelve.open("tag_imgs", read_only and 'r' or 'c')
def tag (self, img, tag) :
"""
Associate the given image with the given tag
"""
if img not in self.img_tags :
self.img_tags[img] = set([tag])
else :
s = self.img_tags[img]
s.add(tag)
self.img_tags[img] = s
if tag not in self.tag_imgs :
self.tag_imgs[tag] = set([img])
else :
s = self.tag_imgs[tag]
s.add(img)
self.tag_imgs[tag] = s
print "%s <-> %s" % (img, tag)
def imgs (self, tags) :
"""
Get the set of images that have the given set of tags
"""
cache_key = "/".join(tags)
if cache_key in self._imgs_cache :
return self._imgs_cache[cache_key]
if not tags :
return set(self.img_tags.keys())
img_sets = [self.tag_imgs[tag] for tag in tags]
res = None
for img_set in img_sets :
if res :
res = res & img_set
else :
res = img_set
self._imgs_cache[cache_key] = res
return res
def tags (self, tags) :
"""
Get the set of tags that are present in the set of images specified by these tags, sorted by count
This is currently implemented quite inefficiently... giev SQL db?
"""
imgs = self.imgs(tags)
ret = []
for tag in self.tag_imgs.keys() :
if tag in tags :
continue
count = len(self.tag_imgs[tag] & imgs)
if count :
ret.append((tag, count))
def my_cmp ((at, ac), (bt, bc)) :
return cmp((ac, at), (bc, bt))
ret.sort(reverse=True)
return ret
def close (self) :
self.img_tags.close()
self.tag_imgs.close()