degal/exif.py
branchnew-exif
changeset 102 ef2c1ffdca8f
child 103 63e89dc2d6f1
equal deleted inserted replaced
101:698dc68a985d 102:ef2c1ffdca8f
       
     1 """
       
     2     A custom EXIF parsing module, aimed at high performance.
       
     3 """
       
     4 
       
     5 import struct, mmap, os
       
     6 
       
     7 def read_struct (file, fmt) :
       
     8     """
       
     9         Utility function to read data from the a file using struct
       
    10     """
       
    11     
       
    12     # length of data
       
    13     fmt_size = struct.calcsize(fmt)
       
    14     
       
    15     # get data
       
    16     file_data = file.read(fmt_size)
       
    17     
       
    18     # unpack single item, this should raise an error if file_data is too short
       
    19     return struct.unpack(fmt, file_data)
       
    20 
       
    21 class Buffer (object) :
       
    22     """
       
    23         Wraps a buffer object (anything that supports the python buffer protocol) for read-only access.
       
    24         
       
    25         Includes an offset for relative values, and an endianess for reading binary data.
       
    26     """
       
    27     
       
    28     def __init__ (self, obj, offset=None, size=None, struct_prefix='=') :
       
    29         """
       
    30             Create a new Buffer object with a new underlying buffer, created from the given object, offset and size.
       
    31             
       
    32             The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'.
       
    33             Standard size/alignment are assumed.
       
    34         """
       
    35 
       
    36         # store
       
    37         self.buf = buffer(obj, offset, size)
       
    38         self.offset = offset
       
    39         self.size = size
       
    40         self.prefix = struct_prefix
       
    41     
       
    42     def subregion (self, offset, length=None) :
       
    43         """
       
    44             Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given
       
    45             length, if any, and the same struct_prefix.
       
    46         """
       
    47 
       
    48         return Buffer(self.buf, offset, length, struct_prefix=self.prefix)
       
    49     
       
    50     def pread (self, offset, length) :
       
    51         """
       
    52             Read a random-access region of raw data
       
    53         """
       
    54 
       
    55         return self.buf[offset:offset + length]
       
    56     
       
    57     def pread_struct (self, offset, fmt) :
       
    58         """
       
    59             Read structured data using the given struct format from the given offset.
       
    60         """
       
    61 
       
    62         return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset)
       
    63 
       
    64     def pread_item (self, offset, fmt) :
       
    65         """
       
    66             Read a single item of structured data from the given offset.
       
    67         """
       
    68 
       
    69         value, = self.pread_struct(offset, fmt)
       
    70 
       
    71         return value
       
    72 
       
    73     def iter_offsets (self, count, size, offset=0) :
       
    74         """
       
    75             Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`.
       
    76         """
       
    77 
       
    78         return xrange(offset, offset + count * size, size)
       
    79     
       
    80     def item_size (self, fmt) :
       
    81         """
       
    82             Returns the size in bytes of the given item format
       
    83         """
       
    84 
       
    85         return struct.calcsize(self.prefix + fmt)
       
    86 
       
    87     def unpack_item (self, fmt, data) :
       
    88         """
       
    89             Unpacks a single item from the given data
       
    90         """
       
    91 
       
    92         value, = struct.unpack(self.prefix + fmt, data)
       
    93         
       
    94         return value
       
    95 
       
    96 def mmap_buffer (file, size) :
       
    97     """
       
    98         Create and return a new read-only mmap'd region
       
    99     """
       
   100 
       
   101     return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ)
       
   102 
       
   103 import exif_data
       
   104 
       
   105 class Tag (object) :
       
   106     """
       
   107         Represents a single Tag in an IFD
       
   108     """
       
   109 
       
   110     def __init__ (self, offset, tag, type, count, value_ref) :
       
   111         """
       
   112             Build a Tag with the given binary items from the IFD entry
       
   113         """
       
   114         
       
   115         self.offset = offset
       
   116         self.tag = tag
       
   117         self.type = type
       
   118         self.count = count
       
   119         self.value_ref = value_ref
       
   120         
       
   121         # lookup the type for this tag
       
   122         self.type_data = exif_data.FIELD_TYPES.get(type)
       
   123 
       
   124         # unpack it
       
   125         if self.type_data :
       
   126             self.type_format, self.type_name = self.type_data
       
   127     
       
   128         # lookup the tag data for this tag
       
   129         self.tag_data = exif_data.EXIF_TAGS.get(tag)
       
   130         
       
   131         # unpack it
       
   132         if self.tag_data :
       
   133             # the EXIF tag name
       
   134             self.tag_name = tag_data[0]
       
   135             
       
   136             # the optional value formatting specification
       
   137             if len(self.tag_data) > 1 :
       
   138                 self.tag_value_spec = self.tag_data[1]
       
   139 
       
   140             else :
       
   141                 self.tag_value_spec = None
       
   142 
       
   143     @property
       
   144     def name (self) :
       
   145         """
       
   146             Lookup the name of this tag via its code, returns None if unknown.
       
   147         """
       
   148 
       
   149         if self.tag_data :
       
   150             return self.tag_name
       
   151 
       
   152         else :
       
   153             return None
       
   154     
       
   155     def readable_value (self, value) :
       
   156         """
       
   157             Convert the given value for this tag into a human-readable string.
       
   158 
       
   159             Returns the value itself by default.
       
   160         """
       
   161 
       
   162         if self.tag_data and self.tag_value_spec :
       
   163             # map it
       
   164             return exif_data.tag_value(self.tag_value_spec, value)
       
   165 
       
   166         else :
       
   167             # nope...
       
   168             return value
       
   169 
       
   170 # size of an IFD entry in bytes
       
   171 IFD_ENTRY_SIZE = 12
       
   172 
       
   173 class IFD (Buffer) :
       
   174     """
       
   175         Represents an IFD (Image file directory) region in EXIF data.
       
   176     """
       
   177 
       
   178     def __init__ (self, buffer, **buffer_opts) :
       
   179         """
       
   180             Access the IFD data from the given bufferable object with given buffer opts.
       
   181 
       
   182             This will read the `count` and `next_offset` values.
       
   183         """
       
   184 
       
   185         # init
       
   186         super(IFD, self).__init__(buffer, **buffer_opts)
       
   187         
       
   188         # read header
       
   189         self.count = self.pread_item(0, 'H')
       
   190 
       
   191         # read next-offset
       
   192         self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I')
       
   193     
       
   194     def iter_tags (self) :
       
   195         """
       
   196             Iterate over all the Tag objects in this IFD
       
   197         """
       
   198         
       
   199         # read each tag
       
   200         for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
       
   201             # read the tag data
       
   202             tag, type, count, value_ref = self.pread_struct(offset, 'HHII')
       
   203             
       
   204             # yield the new Tag
       
   205             yield Tag(offset, tag, type, count, value_ref)
       
   206 
       
   207 class EXIF (Buffer) :
       
   208     """
       
   209         Represents the EXIF data embedded in some image file in the form of a Region.
       
   210     """
       
   211 
       
   212     def __init__ (self, buffer, tags=None, **buffer_opts) :
       
   213         """
       
   214             Access the EXIF data from the given bufferable object with the given buffer options.
       
   215 
       
   216             `tags`, if given, specifies that only the given named tags should be loaded.
       
   217         """
       
   218 
       
   219         # init Buffer
       
   220         super(EXIF, self).__init__(buffer, **buffer_opts)
       
   221 
       
   222         # store
       
   223         self.buffer = buffer
       
   224     
       
   225     def iter_ifds (self) :
       
   226         """
       
   227             Iterate over all of the IFD objects in this EXIF.
       
   228         """
       
   229 
       
   230         # starting offset
       
   231         offset = self.pread_item(0x04, 'I')
       
   232 
       
   233         while offset :
       
   234             # create and read the IFD
       
   235             ifd = IFD(self, offset=offset)
       
   236 
       
   237             # yield it
       
   238             yield ifd
       
   239 
       
   240             # skip to next offset
       
   241             offset = ifd.next_offset
       
   242     
       
   243     __iter__ = iter_ifds
       
   244     
       
   245     def tag_values (self, tag) :
       
   246         """
       
   247             Get the raw values for the given tag as a tuple.
       
   248 
       
   249             Returns None if the tag could not be recognized.
       
   250         """
       
   251 
       
   252         # unknown tag?
       
   253         if not tag.type_data :
       
   254             return None
       
   255 
       
   256         # size of the data
       
   257         data_size = tag.count * self.item_size(tag.type_format)
       
   258 
       
   259         # inline or external?
       
   260         if data_size > 0x04 :
       
   261             # point at the external data
       
   262             offset = self.unpack_item('I', tag.value_ref)
       
   263 
       
   264         else :
       
   265             # point at the inline data
       
   266             offset = tag.offset + 0x08
       
   267         
       
   268         # read values
       
   269         return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format))
       
   270     
       
   271     def tag_value (self, tag) :
       
   272         """
       
   273             Return the human-readable string value for the given tag.
       
   274         """
       
   275         
       
   276         # load the raw values
       
   277         values = self.tag_values(tag)
       
   278 
       
   279         # unknown?
       
   280         if not values :
       
   281             return ""
       
   282 
       
   283         # return as comma-separated formatted string, yes
       
   284         return ", ".join(tag.readable_value(value) for value in values)
       
   285 
       
   286 # mapping from two-byte TIFF byte order marker to struct prefix
       
   287 TIFF_BYTE_ORDER = {
       
   288     'II': '<',
       
   289     'MM': '>',
       
   290 }
       
   291 
       
   292 # "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file"
       
   293 TIFF_BYTEORDER_MAGIC = 42 
       
   294 
       
   295 def tiff_load (file, length=0, **opts) :
       
   296     """
       
   297         Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load.
       
   298     """
       
   299 
       
   300     # all Exif data offsets are relative to the beginning of this TIFF header
       
   301     offset = file.tell()
       
   302 
       
   303     # mmap the region for the EXIF data
       
   304     buffer = mmap_region(file, length)
       
   305 
       
   306     # read byte-order header
       
   307     byte_order = file.read(2)
       
   308 
       
   309     # map to struct prefix
       
   310     struct_prefix = TIFF_BYTE_ORDER[byte_order]
       
   311 
       
   312     # validate
       
   313     check_value, = read_struct(file, struct_prefix + 'H')
       
   314 
       
   315     if check_value != TIFF_BYTEORDER_MAGIC  :
       
   316         raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value))
       
   317 
       
   318     # build and return the EXIF object with the correct offset/size from the mmap region
       
   319     return EXIF(buffer, offset=offset, size=length, **opts)
       
   320 
       
   321 # the JPEG markers that don't have any data
       
   322 JPEG_NOSIZE_MARKERS = (0xD8, 0xD9)
       
   323 
       
   324 # the first marker in a JPEG File
       
   325 JPEG_START_MARKER = 0xD8
       
   326 
       
   327 # the JPEG APP1 marker used for EXIF
       
   328 JPEG_EXIF_MARKER = 0xE1
       
   329 
       
   330 # the JPEG APP1 Exif header
       
   331 JPEG_EXIF_HEADER = "Exif\x00\x00"
       
   332 
       
   333 def jpeg_markers (file) :
       
   334     """
       
   335         Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples.
       
   336 
       
   337         The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data
       
   338         region, and may be seek'd around if needed.
       
   339         
       
   340         XXX: find a real implementation of this somewhere?
       
   341     """
       
   342 
       
   343     while True :
       
   344         # read type
       
   345         marker_byte, marker_type = read_struct(file, '!BB')
       
   346         
       
   347         # validate
       
   348         if marker_byte != 0xff :
       
   349             raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
       
   350 
       
   351         # special cases for no data
       
   352         if marker_byte in JPEG_NOSIZE_MARKERS :
       
   353             size = 0
       
   354 
       
   355         else :
       
   356             # read size field
       
   357             size, = read_struct(file, '!H')
       
   358             
       
   359             # validate
       
   360             if size < 0x02 :
       
   361                 raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size))
       
   362             
       
   363             else :
       
   364                 # do not count the size field itself
       
   365                 size = size - 2
       
   366             
       
   367         # ok, data is at current position
       
   368         offset = file.tell()
       
   369         
       
   370         # yield
       
   371         yield marker_type, size
       
   372 
       
   373         # absolute seek to next marker
       
   374         file.seek(offset + size)
       
   375 
       
   376 def jpeg_find_exif (file) :
       
   377     """
       
   378         Find the Exif/TIFF section in the given JPEG file.
       
   379 
       
   380         If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will
       
   381         be returned.
       
   382         
       
   383         Returns None if no EXIF section was found.
       
   384     """
       
   385 
       
   386     for count, (marker, size) in enumerate(jpeg_markers(file)) :
       
   387         # verify that it's a JPEG file
       
   388         if count == 0 :
       
   389             # must start with the right marker
       
   390             if marker != JPEG_START_MARKER :
       
   391                 raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, ))
       
   392 
       
   393         # look for APP1 marker (0xE1) with EXIF signature
       
   394         elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
       
   395             # skipped the initial Exif marker signature
       
   396             return size - JPEG_EXIF_HEADER
       
   397 
       
   398     # nothing
       
   399     return None
       
   400 
       
   401 def jpeg_load (file, **opts) :
       
   402     """
       
   403         Loads the embedded Exif TIFF data from the given JPEG file using tiff_load.
       
   404 
       
   405         Returns None if no EXIF data could be found.
       
   406     """
       
   407         
       
   408     # look for the right section
       
   409     size = jpeg_find_exif(file)
       
   410     
       
   411     # not found?
       
   412     if not res :
       
   413         # nothing
       
   414         return
       
   415 
       
   416     else :    
       
   417         # load it as TIFF data
       
   418         return tiff_load(file, size, **opts)
       
   419 
       
   420 def load_path (path, **opts) :
       
   421     """
       
   422         Loads an EXIF object from the given filesystem path.
       
   423 
       
   424         Returns None if it could not be parsed.
       
   425     """
       
   426     
       
   427     # file extension
       
   428     root, fext = os.path.splitext(path)
       
   429 
       
   430     # map
       
   431     func = {
       
   432         '.jpeg':    jpeg_load,
       
   433         '.jpg':     jpeg_load,
       
   434         '.tiff':    tiff_load,  # XXX: untested
       
   435     }.get(fext.lower())
       
   436     
       
   437     # not recognized?
       
   438     if not func :
       
   439         # XXX: sniff the file
       
   440         return None
       
   441 
       
   442     # open it
       
   443     file = open(path, 'rb')
       
   444 
       
   445     # try and load it
       
   446     return func(file, **opts)
       
   447 
       
   448 def dump_exif (exif) :
       
   449     """
       
   450         Dump all tags from the given EXIF object to stdout
       
   451     """
       
   452 
       
   453     print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size)
       
   454 
       
   455     for i, ifd in enumerate(exif.iter_ifds()) :
       
   456         print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset)
       
   457         
       
   458         for i, tag in enumerate(exif.iter_tags()) :
       
   459             print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % (
       
   460                 i, 
       
   461                 tag.offset,
       
   462                 tag.code, tag.name or '???',
       
   463                 tag.type, tag.type_name if tag.type_data else '???',
       
   464                 tag.count,
       
   465             )
       
   466             
       
   467             for i, value in enumerate(exif.tag_values(tag)) :
       
   468                 print "\t\t\t%02d: %s" % (i, tag.readable_value(value))
       
   469 
       
   470 def main (path) :
       
   471     """
       
   472         Load and dump EXIF data from the given path
       
   473     """
       
   474     
       
   475     # try and load it
       
   476     exif = load_path(path)
       
   477 
       
   478     if not exif :
       
   479         raise Exception("No EXIF data found")
       
   480     
       
   481     # dump it
       
   482     print "%s: " % path
       
   483     print
       
   484 
       
   485     dump_exif(exif)
       
   486 
       
   487 if __name__ == '__main__' :
       
   488     from sys import argv
       
   489 
       
   490     main(argv[1])
       
   491