degal/exif.py
branchnew-exif
changeset 103 63e89dc2d6f1
parent 102 ef2c1ffdca8f
child 104 6afe59e5ffae
equal deleted inserted replaced
102:ef2c1ffdca8f 103:63e89dc2d6f1
    32             The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'.
    32             The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'.
    33             Standard size/alignment are assumed.
    33             Standard size/alignment are assumed.
    34         """
    34         """
    35 
    35 
    36         # store
    36         # store
    37         self.buf = buffer(obj, offset, size)
    37         self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None))
    38         self.offset = offset
    38         self.offset = offset
    39         self.size = size
    39         self.size = size
    40         self.prefix = struct_prefix
    40         self.prefix = struct_prefix
    41     
    41     
    42     def subregion (self, offset, length=None) :
    42     def subregion (self, offset, length=None) :
   105 class Tag (object) :
   105 class Tag (object) :
   106     """
   106     """
   107         Represents a single Tag in an IFD
   107         Represents a single Tag in an IFD
   108     """
   108     """
   109 
   109 
   110     def __init__ (self, offset, tag, type, count, value_ref) :
   110     def __init__ (self, offset, tag, type, count, data_raw) :
   111         """
   111         """
   112             Build a Tag with the given binary items from the IFD entry
   112             Build a Tag with the given binary items from the IFD entry
   113         """
   113         """
   114         
   114         
   115         self.offset = offset
   115         self.offset = offset
   116         self.tag = tag
   116         self.tag = tag
   117         self.type = type
   117         self.type = type
   118         self.count = count
   118         self.count = count
   119         self.value_ref = value_ref
   119         self.data_raw = data_raw
   120         
   120         
   121         # lookup the type for this tag
   121         # lookup the type for this tag
   122         self.type_data = exif_data.FIELD_TYPES.get(type)
   122         self.type_data = exif_data.FIELD_TYPES.get(type)
   123 
   123 
   124         # unpack it
   124         # unpack it
   125         if self.type_data :
   125         if self.type_data :
   126             self.type_format, self.type_name = self.type_data
   126             self.type_format, self.type_name, self.type_func = self.type_data
   127     
   127     
   128         # lookup the tag data for this tag
   128         # lookup the tag data for this tag
   129         self.tag_data = exif_data.EXIF_TAGS.get(tag)
   129         self.tag_data = exif_data.EXIF_TAGS.get(tag)
   130         
   130         
   131         # unpack it
   131         # unpack it
   132         if self.tag_data :
   132         if self.tag_data :
   133             # the EXIF tag name
   133             # the EXIF tag name
   134             self.tag_name = tag_data[0]
   134             self.tag_name = self.tag_data[0]
   135             
   135             
   136             # the optional value formatting specification
   136             # the optional value formatting specification
   137             if len(self.tag_data) > 1 :
   137             if len(self.tag_data) > 1 :
   138                 self.tag_value_spec = self.tag_data[1]
   138                 self.tag_value_spec = self.tag_data[1]
   139 
   139 
   150             return self.tag_name
   150             return self.tag_name
   151 
   151 
   152         else :
   152         else :
   153             return None
   153             return None
   154     
   154     
       
   155     def process_values (self, raw_values) :
       
   156         """
       
   157             Process the given raw values unpacked from the file.
       
   158         """
       
   159 
       
   160         if self.type_data and self.type_func :
       
   161             # use the filter func
       
   162             return self.type_func(raw_values)
       
   163 
       
   164         else :
       
   165             # nada, just leave them
       
   166             return raw_values
       
   167 
   155     def readable_value (self, value) :
   168     def readable_value (self, value) :
   156         """
   169         """
   157             Convert the given value for this tag into a human-readable string.
   170             Convert the given value for this tag into a human-readable string.
   158 
   171 
   159             Returns the value itself by default.
   172             Returns the value itself by default.
   160         """
   173         """
   161 
   174 
   162         if self.tag_data and self.tag_value_spec :
   175         if self.tag_data and self.tag_value_spec :
   163             # map it
   176             # map it
   164             return exif_data.tag_value(self.tag_value_spec, value)
   177             return exif_data.map_value(self.tag_value_spec, value)
   165 
   178 
   166         else :
   179         else :
   167             # nope...
   180             # nope...
   168             return value
   181             return value
   169 
   182 
   197         """
   210         """
   198         
   211         
   199         # read each tag
   212         # read each tag
   200         for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
   213         for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
   201             # read the tag data
   214             # read the tag data
   202             tag, type, count, value_ref = self.pread_struct(offset, 'HHII')
   215             tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s')
   203             
   216             
   204             # yield the new Tag
   217             # yield the new Tag
   205             yield Tag(offset, tag, type, count, value_ref)
   218             yield Tag(offset, tag, type, count, data_raw)
   206 
   219 
   207 class EXIF (Buffer) :
   220 class EXIF (Buffer) :
   208     """
   221     """
   209         Represents the EXIF data embedded in some image file in the form of a Region.
   222         Represents the EXIF data embedded in some image file in the form of a Region.
   210     """
   223     """
   229 
   242 
   230         # starting offset
   243         # starting offset
   231         offset = self.pread_item(0x04, 'I')
   244         offset = self.pread_item(0x04, 'I')
   232 
   245 
   233         while offset :
   246         while offset :
   234             # create and read the IFD
   247             # create and read the IFD, operating on the right sub-buffer
   235             ifd = IFD(self, offset=offset)
   248             ifd = IFD(self.buf, offset=offset)
   236 
   249 
   237             # yield it
   250             # yield it
   238             yield ifd
   251             yield ifd
   239 
   252 
   240             # skip to next offset
   253             # skip to next offset
   241             offset = ifd.next_offset
   254             offset = ifd.next_offset
   242     
   255     
   243     __iter__ = iter_ifds
   256     __iter__ = iter_ifds
   244     
   257     
   245     def tag_values (self, tag) :
   258     def tag_data_info (self, tag) :
   246         """
   259         """
   247             Get the raw values for the given tag as a tuple.
   260             Calculate the location, format and size of the given tag's data.
   248 
   261 
   249             Returns None if the tag could not be recognized.
   262             Returns a (fmt, offset, size) tuple.
   250         """
   263         """
   251 
       
   252         # unknown tag?
   264         # unknown tag?
   253         if not tag.type_data :
   265         if not tag.type_data :
   254             return None
   266             return None
   255 
   267 
       
   268         # data format
       
   269         if len(tag.type_format) == 1 :
       
   270             # let struct handle the count
       
   271             fmt = "%d%s" % (tag.count, tag.type_format)
       
   272 
       
   273         else :
       
   274             # handle the count ourselves
       
   275             fmt = tag.type_format * tag.count
       
   276 
   256         # size of the data
   277         # size of the data
   257         data_size = tag.count * self.item_size(tag.type_format)
   278         size = self.item_size(fmt)
   258 
   279 
   259         # inline or external?
   280         # inline or external?
   260         if data_size > 0x04 :
   281         if size > 0x04 :
   261             # point at the external data
   282             # point at the external data
   262             offset = self.unpack_item('I', tag.value_ref)
   283             offset = self.unpack_item('I', tag.data_raw)
   263 
   284 
   264         else :
   285         else :
   265             # point at the inline data
   286             # point at the inline data
   266             offset = tag.offset + 0x08
   287             offset = tag.offset + 0x08
   267         
   288         
       
   289         return fmt, offset, size
       
   290 
       
   291     def tag_values_raw (self, tag) :
       
   292         """
       
   293             Get the raw values for the given tag as a tuple.
       
   294 
       
   295             Returns None if the tag could not be recognized.
       
   296         """
       
   297 
       
   298         # find the data
       
   299         data_info = self.tag_data_info(tag)
       
   300 
       
   301         # not found?
       
   302         if not data_info :
       
   303             return None
       
   304         
       
   305         # unpack
       
   306         data_fmt, data_offset, data_size = data_info
       
   307         
   268         # read values
   308         # read values
   269         return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format))
   309         return self.pread_struct(data_offset, data_fmt)
   270     
   310     
       
   311     def tag_values (self, tag) :
       
   312         """
       
   313             Gets the processed values for the given tag as a list.
       
   314         """
       
   315 
       
   316         # read + process
       
   317         return tag.process_values(self.tag_values_raw(tag))
       
   318 
   271     def tag_value (self, tag) :
   319     def tag_value (self, tag) :
   272         """
   320         """
   273             Return the human-readable string value for the given tag.
   321             Return the human-readable string value for the given tag.
   274         """
   322         """
   275         
   323         
   299 
   347 
   300     # all Exif data offsets are relative to the beginning of this TIFF header
   348     # all Exif data offsets are relative to the beginning of this TIFF header
   301     offset = file.tell()
   349     offset = file.tell()
   302 
   350 
   303     # mmap the region for the EXIF data
   351     # mmap the region for the EXIF data
   304     buffer = mmap_region(file, length)
   352     buffer = mmap_buffer(file, length)
   305 
   353 
   306     # read byte-order header
   354     # read byte-order header
   307     byte_order = file.read(2)
   355     byte_order = file.read(2)
   308 
   356 
   309     # map to struct prefix
   357     # map to struct prefix
   347         # validate
   395         # validate
   348         if marker_byte != 0xff :
   396         if marker_byte != 0xff :
   349             raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
   397             raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
   350 
   398 
   351         # special cases for no data
   399         # special cases for no data
   352         if marker_byte in JPEG_NOSIZE_MARKERS :
   400         if marker_type in JPEG_NOSIZE_MARKERS :
   353             size = 0
   401             size = 0
   354 
   402 
   355         else :
   403         else :
   356             # read size field
   404             # read size field
   357             size, = read_struct(file, '!H')
   405             size, = read_struct(file, '!H')
   391                 raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, ))
   439                 raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, ))
   392 
   440 
   393         # look for APP1 marker (0xE1) with EXIF signature
   441         # look for APP1 marker (0xE1) with EXIF signature
   394         elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
   442         elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
   395             # skipped the initial Exif marker signature
   443             # skipped the initial Exif marker signature
   396             return size - JPEG_EXIF_HEADER
   444             return size - len(JPEG_EXIF_HEADER)
   397 
   445 
   398     # nothing
   446     # nothing
   399     return None
   447     return None
   400 
   448 
   401 def jpeg_load (file, **opts) :
   449 def jpeg_load (file, **opts) :
   407         
   455         
   408     # look for the right section
   456     # look for the right section
   409     size = jpeg_find_exif(file)
   457     size = jpeg_find_exif(file)
   410     
   458     
   411     # not found?
   459     # not found?
   412     if not res :
   460     if not size :
   413         # nothing
   461         # nothing
   414         return
   462         return
   415 
   463 
   416     else :    
   464     else :    
   417         # load it as TIFF data
   465         # load it as TIFF data
   448 def dump_exif (exif) :
   496 def dump_exif (exif) :
   449     """
   497     """
   450         Dump all tags from the given EXIF object to stdout
   498         Dump all tags from the given EXIF object to stdout
   451     """
   499     """
   452 
   500 
   453     print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size)
   501     print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size)
   454 
   502 
   455     for i, ifd in enumerate(exif.iter_ifds()) :
   503     for i, ifd in enumerate(exif.iter_ifds()) :
   456         print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset)
   504         print "\tIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % (
   457         
   505             i, 
   458         for i, tag in enumerate(exif.iter_tags()) :
   506             ifd.offset, ifd.offset + exif.offset,
   459             print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % (
   507             ifd.count, 
       
   508             ifd.next_offset
       
   509         )
       
   510         
       
   511         for i, tag in enumerate(ifd.iter_tags()) :
       
   512             data_info = exif.tag_data_info(tag)
       
   513 
       
   514             if data_info :
       
   515                 data_fmt, data_offset, data_size = data_info
       
   516 
       
   517             else :
       
   518                 data_fmt = data_offset = data_size = None
       
   519 
       
   520             print "\t\tTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s:" % (
   460                 i, 
   521                 i, 
   461                 tag.offset,
   522                 tag.offset, tag.offset + exif.offset,
   462                 tag.code, tag.name or '???',
   523                 tag.tag, tag.name or '???',
   463                 tag.type, tag.type_name if tag.type_data else '???',
   524                 tag.type, tag.type_name if tag.type_data else '???',
   464                 tag.count,
   525                 tag.count,
       
   526                 data_fmt, data_offset, data_size,
   465             )
   527             )
   466             
   528             
   467             for i, value in enumerate(exif.tag_values(tag)) :
   529             for i, value in enumerate(exif.tag_values(tag)) :
   468                 print "\t\t\t%02d: %s" % (i, tag.readable_value(value))
   530                 print "\t\t\t%02d: %r -> %s" % (i, value, tag.readable_value(value))
   469 
   531 
   470 def main (path) :
   532 def main (path) :
   471     """
   533     """
   472         Load and dump EXIF data from the given path
   534         Load and dump EXIF data from the given path
   473     """
   535     """