# HG changeset patch
# User Tero Marttila <terom@fixme.fi>
# Date 1244910072 -10800
# Node ID 63e89dc2d6f17b0485e56a0b13fa293fe08098a6
# Parent  ef2c1ffdca8f8f882edea8ad5fd583f27d06ebd1
new exif.py seems to work now, although still missing sub-IFDs

diff -r ef2c1ffdca8f -r 63e89dc2d6f1 degal/exif.py
--- a/degal/exif.py	Sat Jun 13 18:34:55 2009 +0300
+++ b/degal/exif.py	Sat Jun 13 19:21:12 2009 +0300
@@ -34,7 +34,7 @@
         """
 
         # store
-        self.buf = buffer(obj, offset, size)
+        self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None))
         self.offset = offset
         self.size = size
         self.prefix = struct_prefix
@@ -107,7 +107,7 @@
         Represents a single Tag in an IFD
     """
 
-    def __init__ (self, offset, tag, type, count, value_ref) :
+    def __init__ (self, offset, tag, type, count, data_raw) :
         """
             Build a Tag with the given binary items from the IFD entry
         """
@@ -116,14 +116,14 @@
         self.tag = tag
         self.type = type
         self.count = count
-        self.value_ref = value_ref
+        self.data_raw = data_raw
         
         # lookup the type for this tag
         self.type_data = exif_data.FIELD_TYPES.get(type)
 
         # unpack it
         if self.type_data :
-            self.type_format, self.type_name = self.type_data
+            self.type_format, self.type_name, self.type_func = self.type_data
     
         # lookup the tag data for this tag
         self.tag_data = exif_data.EXIF_TAGS.get(tag)
@@ -131,7 +131,7 @@
         # unpack it
         if self.tag_data :
             # the EXIF tag name
-            self.tag_name = tag_data[0]
+            self.tag_name = self.tag_data[0]
             
             # the optional value formatting specification
             if len(self.tag_data) > 1 :
@@ -152,6 +152,19 @@
         else :
             return None
     
+    def process_values (self, raw_values) :
+        """
+            Process the given raw values unpacked from the file.
+        """
+
+        if self.type_data and self.type_func :
+            # use the filter func
+            return self.type_func(raw_values)
+
+        else :
+            # nada, just leave them
+            return raw_values
+
     def readable_value (self, value) :
         """
             Convert the given value for this tag into a human-readable string.
@@ -161,7 +174,7 @@
 
         if self.tag_data and self.tag_value_spec :
             # map it
-            return exif_data.tag_value(self.tag_value_spec, value)
+            return exif_data.map_value(self.tag_value_spec, value)
 
         else :
             # nope...
@@ -199,10 +212,10 @@
         # read each tag
         for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
             # read the tag data
-            tag, type, count, value_ref = self.pread_struct(offset, 'HHII')
+            tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s')
             
             # yield the new Tag
-            yield Tag(offset, tag, type, count, value_ref)
+            yield Tag(offset, tag, type, count, data_raw)
 
 class EXIF (Buffer) :
     """
@@ -231,8 +244,8 @@
         offset = self.pread_item(0x04, 'I')
 
         while offset :
-            # create and read the IFD
-            ifd = IFD(self, offset=offset)
+            # create and read the IFD, operating on the right sub-buffer
+            ifd = IFD(self.buf, offset=offset)
 
             # yield it
             yield ifd
@@ -242,32 +255,67 @@
     
     __iter__ = iter_ifds
     
-    def tag_values (self, tag) :
+    def tag_data_info (self, tag) :
+        """
+            Calculate the location, format and size of the given tag's data.
+
+            Returns a (fmt, offset, size) tuple.
+        """
+        # unknown tag?
+        if not tag.type_data :
+            return None
+
+        # data format
+        if len(tag.type_format) == 1 :
+            # let struct handle the count
+            fmt = "%d%s" % (tag.count, tag.type_format)
+
+        else :
+            # handle the count ourselves
+            fmt = tag.type_format * tag.count
+
+        # size of the data
+        size = self.item_size(fmt)
+
+        # inline or external?
+        if size > 0x04 :
+            # point at the external data
+            offset = self.unpack_item('I', tag.data_raw)
+
+        else :
+            # point at the inline data
+            offset = tag.offset + 0x08
+        
+        return fmt, offset, size
+
+    def tag_values_raw (self, tag) :
         """
             Get the raw values for the given tag as a tuple.
 
             Returns None if the tag could not be recognized.
         """
 
-        # unknown tag?
-        if not tag.type_data :
-            return None
-
-        # size of the data
-        data_size = tag.count * self.item_size(tag.type_format)
+        # find the data
+        data_info = self.tag_data_info(tag)
 
-        # inline or external?
-        if data_size > 0x04 :
-            # point at the external data
-            offset = self.unpack_item('I', tag.value_ref)
-
-        else :
-            # point at the inline data
-            offset = tag.offset + 0x08
+        # not found?
+        if not data_info :
+            return None
+        
+        # unpack
+        data_fmt, data_offset, data_size = data_info
         
         # read values
-        return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format))
+        return self.pread_struct(data_offset, data_fmt)
     
+    def tag_values (self, tag) :
+        """
+            Gets the processed values for the given tag as a list.
+        """
+
+        # read + process
+        return tag.process_values(self.tag_values_raw(tag))
+
     def tag_value (self, tag) :
         """
             Return the human-readable string value for the given tag.
@@ -301,7 +349,7 @@
     offset = file.tell()
 
     # mmap the region for the EXIF data
-    buffer = mmap_region(file, length)
+    buffer = mmap_buffer(file, length)
 
     # read byte-order header
     byte_order = file.read(2)
@@ -349,7 +397,7 @@
             raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
 
         # special cases for no data
-        if marker_byte in JPEG_NOSIZE_MARKERS :
+        if marker_type in JPEG_NOSIZE_MARKERS :
             size = 0
 
         else :
@@ -393,7 +441,7 @@
         # look for APP1 marker (0xE1) with EXIF signature
         elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
             # skipped the initial Exif marker signature
-            return size - JPEG_EXIF_HEADER
+            return size - len(JPEG_EXIF_HEADER)
 
     # nothing
     return None
@@ -409,7 +457,7 @@
     size = jpeg_find_exif(file)
     
     # not found?
-    if not res :
+    if not size :
         # nothing
         return
 
@@ -450,22 +498,36 @@
         Dump all tags from the given EXIF object to stdout
     """
 
-    print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size)
+    print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size)
 
     for i, ifd in enumerate(exif.iter_ifds()) :
-        print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset)
+        print "\tIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % (
+            i, 
+            ifd.offset, ifd.offset + exif.offset,
+            ifd.count, 
+            ifd.next_offset
+        )
         
-        for i, tag in enumerate(exif.iter_tags()) :
-            print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % (
+        for i, tag in enumerate(ifd.iter_tags()) :
+            data_info = exif.tag_data_info(tag)
+
+            if data_info :
+                data_fmt, data_offset, data_size = data_info
+
+            else :
+                data_fmt = data_offset = data_size = None
+
+            print "\t\tTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s:" % (
                 i, 
-                tag.offset,
-                tag.code, tag.name or '???',
+                tag.offset, tag.offset + exif.offset,
+                tag.tag, tag.name or '???',
                 tag.type, tag.type_name if tag.type_data else '???',
                 tag.count,
+                data_fmt, data_offset, data_size,
             )
             
             for i, value in enumerate(exif.tag_values(tag)) :
-                print "\t\t\t%02d: %s" % (i, tag.readable_value(value))
+                print "\t\t\t%02d: %r -> %s" % (i, value, tag.readable_value(value))
 
 def main (path) :
     """
diff -r ef2c1ffdca8f -r 63e89dc2d6f1 degal/exif_data.py
--- a/degal/exif_data.py	Sat Jun 13 18:34:55 2009 +0300
+++ b/degal/exif_data.py	Sat Jun 13 19:21:12 2009 +0300
@@ -43,6 +43,67 @@
 
 """
 
+import decimal, itertools
+
+def filter_ascii (values) :
+    """
+        Default post-filter for ASCII values.
+
+        This takes a single item of string data, splits it up into strings by ASCII-NUL, and trims the induvidual strings
+    """
+
+    return [string.rstrip() for string in values[0].split('\x00') if string]
+
+
+def build_ratio (num, denom) :
+    """
+        Builds a Decimal ratio out of the given numerator and denominator
+    """
+
+    return decimal.Decimal(num) / decimal.Decimal(denom)
+
+def filter_ratio (values) :
+    """
+        Default post-filter for Ratio values.
+
+        This takes the pairs of numerator/denominator values and builds Decimals out of them
+    """
+
+    return [build_ratio(values[i], values[i + 1]) for i in xrange(0, len(values), 2)]
+
+
+# IFD Tag type information, indexed by code
+#  { type_code: (type_fmt, name, filter_func) }
+#
+# type_fmt's that are one char will be prefixed with the count for use with struct.unpack, those with more chars will
+# be repeated as many times for use with struct.unpack.
+FIELD_TYPES = {
+#    0x0000: (None,  'Proprietary'   ), # ??? no such type
+    0x0001: ('B',   'Byte',         None            ),
+    0x0002: ('s',   'ASCII',        filter_ascii    ),
+    0x0003: ('H',   'Short',        None            ),
+    0x0004: ('L',   'Long',         None            ),
+    0x0005: ('LL',  'Ratio',        filter_ratio    ),
+    0x0006: ('b',   'Signed Byte',  None            ),
+    0x0007: ('s',   'Undefined',    None            ),
+    0x0008: ('h',   'Signed Short', None            ),
+    0x0009: ('l',   'Signed Long',  None            ),
+    0x000A: ('ll',  'Signed Ratio', filter_ratio    ),
+}
+
+def map_value (spec, value) :
+    """
+        Map the given tag value to a printable string using the given value spec.
+    """
+    
+    if callable(spec):
+        # call mapping function
+        return spec(value)
+
+    else:
+        return spec.get(value, repr(value))
+
+
 def make_string (seq):
     """
         Filter a string to strip out non-printing chars
@@ -73,33 +134,6 @@
     ## allows JIS and Unicode.
     return make_string(seq)
 
-# IFD Tag type information, indexed by code
-#  { type_code: (type_fmt, name) }
-FIELD_TYPES = {
-#    0x0000: (None,  'Proprietary'   ), # ??? no such type
-    0x0001: ('B',   'Byte'          ),
-    0x0002: ('s',   'ASCII'         ),
-    0x0003: ('H',   'Short'         ),
-    0x0004: ('L',   'Long'          ),
-    0x0005: ('LL',  'Ratio'         ),
-    0x0006: ('b',   'Signed Byte'   ),
-    0x0007: ('c',   'Undefined'     ),
-    0x0008: ('h',   'Signed Short'  ),
-    0x0009: ('l',   'Signed Long'   ),
-    0x000A: ('ll',  'Signed Ratio'  ),
-}
-
-def map_value (spec, value) :
-    """
-        Map the given tag value to a printable string using the given value spec.
-    """
-    
-    if callable(spec):
-        # call mapping function
-        return spec(value)
-
-    else:
-        return spec.get(value, repr(value))
 
 # dictionary of main EXIF tag names
 # first element of tuple is tag name, optional second element is