# HG changeset patch # User Tero Marttila # Date 1244910072 -10800 # Node ID 63e89dc2d6f17b0485e56a0b13fa293fe08098a6 # Parent ef2c1ffdca8f8f882edea8ad5fd583f27d06ebd1 new exif.py seems to work now, although still missing sub-IFDs diff -r ef2c1ffdca8f -r 63e89dc2d6f1 degal/exif.py --- a/degal/exif.py Sat Jun 13 18:34:55 2009 +0300 +++ b/degal/exif.py Sat Jun 13 19:21:12 2009 +0300 @@ -34,7 +34,7 @@ """ # store - self.buf = buffer(obj, offset, size) + self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None)) self.offset = offset self.size = size self.prefix = struct_prefix @@ -107,7 +107,7 @@ Represents a single Tag in an IFD """ - def __init__ (self, offset, tag, type, count, value_ref) : + def __init__ (self, offset, tag, type, count, data_raw) : """ Build a Tag with the given binary items from the IFD entry """ @@ -116,14 +116,14 @@ self.tag = tag self.type = type self.count = count - self.value_ref = value_ref + self.data_raw = data_raw # lookup the type for this tag self.type_data = exif_data.FIELD_TYPES.get(type) # unpack it if self.type_data : - self.type_format, self.type_name = self.type_data + self.type_format, self.type_name, self.type_func = self.type_data # lookup the tag data for this tag self.tag_data = exif_data.EXIF_TAGS.get(tag) @@ -131,7 +131,7 @@ # unpack it if self.tag_data : # the EXIF tag name - self.tag_name = tag_data[0] + self.tag_name = self.tag_data[0] # the optional value formatting specification if len(self.tag_data) > 1 : @@ -152,6 +152,19 @@ else : return None + def process_values (self, raw_values) : + """ + Process the given raw values unpacked from the file. + """ + + if self.type_data and self.type_func : + # use the filter func + return self.type_func(raw_values) + + else : + # nada, just leave them + return raw_values + def readable_value (self, value) : """ Convert the given value for this tag into a human-readable string. @@ -161,7 +174,7 @@ if self.tag_data and self.tag_value_spec : # map it - return exif_data.tag_value(self.tag_value_spec, value) + return exif_data.map_value(self.tag_value_spec, value) else : # nope... @@ -199,10 +212,10 @@ # read each tag for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : # read the tag data - tag, type, count, value_ref = self.pread_struct(offset, 'HHII') + tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s') # yield the new Tag - yield Tag(offset, tag, type, count, value_ref) + yield Tag(offset, tag, type, count, data_raw) class EXIF (Buffer) : """ @@ -231,8 +244,8 @@ offset = self.pread_item(0x04, 'I') while offset : - # create and read the IFD - ifd = IFD(self, offset=offset) + # create and read the IFD, operating on the right sub-buffer + ifd = IFD(self.buf, offset=offset) # yield it yield ifd @@ -242,32 +255,67 @@ __iter__ = iter_ifds - def tag_values (self, tag) : + def tag_data_info (self, tag) : + """ + Calculate the location, format and size of the given tag's data. + + Returns a (fmt, offset, size) tuple. + """ + # unknown tag? + if not tag.type_data : + return None + + # data format + if len(tag.type_format) == 1 : + # let struct handle the count + fmt = "%d%s" % (tag.count, tag.type_format) + + else : + # handle the count ourselves + fmt = tag.type_format * tag.count + + # size of the data + size = self.item_size(fmt) + + # inline or external? + if size > 0x04 : + # point at the external data + offset = self.unpack_item('I', tag.data_raw) + + else : + # point at the inline data + offset = tag.offset + 0x08 + + return fmt, offset, size + + def tag_values_raw (self, tag) : """ Get the raw values for the given tag as a tuple. Returns None if the tag could not be recognized. """ - # unknown tag? - if not tag.type_data : - return None - - # size of the data - data_size = tag.count * self.item_size(tag.type_format) + # find the data + data_info = self.tag_data_info(tag) - # inline or external? - if data_size > 0x04 : - # point at the external data - offset = self.unpack_item('I', tag.value_ref) - - else : - # point at the inline data - offset = tag.offset + 0x08 + # not found? + if not data_info : + return None + + # unpack + data_fmt, data_offset, data_size = data_info # read values - return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format)) + return self.pread_struct(data_offset, data_fmt) + def tag_values (self, tag) : + """ + Gets the processed values for the given tag as a list. + """ + + # read + process + return tag.process_values(self.tag_values_raw(tag)) + def tag_value (self, tag) : """ Return the human-readable string value for the given tag. @@ -301,7 +349,7 @@ offset = file.tell() # mmap the region for the EXIF data - buffer = mmap_region(file, length) + buffer = mmap_buffer(file, length) # read byte-order header byte_order = file.read(2) @@ -349,7 +397,7 @@ raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) # special cases for no data - if marker_byte in JPEG_NOSIZE_MARKERS : + if marker_type in JPEG_NOSIZE_MARKERS : size = 0 else : @@ -393,7 +441,7 @@ # look for APP1 marker (0xE1) with EXIF signature elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: # skipped the initial Exif marker signature - return size - JPEG_EXIF_HEADER + return size - len(JPEG_EXIF_HEADER) # nothing return None @@ -409,7 +457,7 @@ size = jpeg_find_exif(file) # not found? - if not res : + if not size : # nothing return @@ -450,22 +498,36 @@ Dump all tags from the given EXIF object to stdout """ - print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size) + print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size) for i, ifd in enumerate(exif.iter_ifds()) : - print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset) + print "\tIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % ( + i, + ifd.offset, ifd.offset + exif.offset, + ifd.count, + ifd.next_offset + ) - for i, tag in enumerate(exif.iter_tags()) : - print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % ( + for i, tag in enumerate(ifd.iter_tags()) : + data_info = exif.tag_data_info(tag) + + if data_info : + data_fmt, data_offset, data_size = data_info + + else : + data_fmt = data_offset = data_size = None + + print "\t\tTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s:" % ( i, - tag.offset, - tag.code, tag.name or '???', + tag.offset, tag.offset + exif.offset, + tag.tag, tag.name or '???', tag.type, tag.type_name if tag.type_data else '???', tag.count, + data_fmt, data_offset, data_size, ) for i, value in enumerate(exif.tag_values(tag)) : - print "\t\t\t%02d: %s" % (i, tag.readable_value(value)) + print "\t\t\t%02d: %r -> %s" % (i, value, tag.readable_value(value)) def main (path) : """ diff -r ef2c1ffdca8f -r 63e89dc2d6f1 degal/exif_data.py --- a/degal/exif_data.py Sat Jun 13 18:34:55 2009 +0300 +++ b/degal/exif_data.py Sat Jun 13 19:21:12 2009 +0300 @@ -43,6 +43,67 @@ """ +import decimal, itertools + +def filter_ascii (values) : + """ + Default post-filter for ASCII values. + + This takes a single item of string data, splits it up into strings by ASCII-NUL, and trims the induvidual strings + """ + + return [string.rstrip() for string in values[0].split('\x00') if string] + + +def build_ratio (num, denom) : + """ + Builds a Decimal ratio out of the given numerator and denominator + """ + + return decimal.Decimal(num) / decimal.Decimal(denom) + +def filter_ratio (values) : + """ + Default post-filter for Ratio values. + + This takes the pairs of numerator/denominator values and builds Decimals out of them + """ + + return [build_ratio(values[i], values[i + 1]) for i in xrange(0, len(values), 2)] + + +# IFD Tag type information, indexed by code +# { type_code: (type_fmt, name, filter_func) } +# +# type_fmt's that are one char will be prefixed with the count for use with struct.unpack, those with more chars will +# be repeated as many times for use with struct.unpack. +FIELD_TYPES = { +# 0x0000: (None, 'Proprietary' ), # ??? no such type + 0x0001: ('B', 'Byte', None ), + 0x0002: ('s', 'ASCII', filter_ascii ), + 0x0003: ('H', 'Short', None ), + 0x0004: ('L', 'Long', None ), + 0x0005: ('LL', 'Ratio', filter_ratio ), + 0x0006: ('b', 'Signed Byte', None ), + 0x0007: ('s', 'Undefined', None ), + 0x0008: ('h', 'Signed Short', None ), + 0x0009: ('l', 'Signed Long', None ), + 0x000A: ('ll', 'Signed Ratio', filter_ratio ), +} + +def map_value (spec, value) : + """ + Map the given tag value to a printable string using the given value spec. + """ + + if callable(spec): + # call mapping function + return spec(value) + + else: + return spec.get(value, repr(value)) + + def make_string (seq): """ Filter a string to strip out non-printing chars @@ -73,33 +134,6 @@ ## allows JIS and Unicode. return make_string(seq) -# IFD Tag type information, indexed by code -# { type_code: (type_fmt, name) } -FIELD_TYPES = { -# 0x0000: (None, 'Proprietary' ), # ??? no such type - 0x0001: ('B', 'Byte' ), - 0x0002: ('s', 'ASCII' ), - 0x0003: ('H', 'Short' ), - 0x0004: ('L', 'Long' ), - 0x0005: ('LL', 'Ratio' ), - 0x0006: ('b', 'Signed Byte' ), - 0x0007: ('c', 'Undefined' ), - 0x0008: ('h', 'Signed Short' ), - 0x0009: ('l', 'Signed Long' ), - 0x000A: ('ll', 'Signed Ratio' ), -} - -def map_value (spec, value) : - """ - Map the given tag value to a printable string using the given value spec. - """ - - if callable(spec): - # call mapping function - return spec(value) - - else: - return spec.get(value, repr(value)) # dictionary of main EXIF tag names # first element of tuple is tag name, optional second element is