"""
A custom EXIF parsing module, aimed at high performance.
"""
import struct, mmap, os
def read_struct (file, fmt) :
"""
Utility function to read data from the a file using struct
"""
# length of data
fmt_size = struct.calcsize(fmt)
# get data
file_data = file.read(fmt_size)
# unpack single item, this should raise an error if file_data is too short
return struct.unpack(fmt, file_data)
class Buffer (object) :
"""
Wraps a buffer object (anything that supports the python buffer protocol) for read-only access.
Includes an offset for relative values, and an endianess for reading binary data.
"""
def __init__ (self, obj, offset=None, size=None, struct_prefix='=') :
"""
Create a new Buffer object with a new underlying buffer, created from the given object, offset and size.
The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'.
Standard size/alignment are assumed.
"""
# store
self.buf = buffer(obj, offset, size)
self.offset = offset
self.size = size
self.prefix = struct_prefix
def subregion (self, offset, length=None) :
"""
Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given
length, if any, and the same struct_prefix.
"""
return Buffer(self.buf, offset, length, struct_prefix=self.prefix)
def pread (self, offset, length) :
"""
Read a random-access region of raw data
"""
return self.buf[offset:offset + length]
def pread_struct (self, offset, fmt) :
"""
Read structured data using the given struct format from the given offset.
"""
return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset)
def pread_item (self, offset, fmt) :
"""
Read a single item of structured data from the given offset.
"""
value, = self.pread_struct(offset, fmt)
return value
def iter_offsets (self, count, size, offset=0) :
"""
Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`.
"""
return xrange(offset, offset + count * size, size)
def item_size (self, fmt) :
"""
Returns the size in bytes of the given item format
"""
return struct.calcsize(self.prefix + fmt)
def unpack_item (self, fmt, data) :
"""
Unpacks a single item from the given data
"""
value, = struct.unpack(self.prefix + fmt, data)
return value
def mmap_buffer (file, size) :
"""
Create and return a new read-only mmap'd region
"""
return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ)
import exif_data
class Tag (object) :
"""
Represents a single Tag in an IFD
"""
def __init__ (self, offset, tag, type, count, value_ref) :
"""
Build a Tag with the given binary items from the IFD entry
"""
self.offset = offset
self.tag = tag
self.type = type
self.count = count
self.value_ref = value_ref
# lookup the type for this tag
self.type_data = exif_data.FIELD_TYPES.get(type)
# unpack it
if self.type_data :
self.type_format, self.type_name = self.type_data
# lookup the tag data for this tag
self.tag_data = exif_data.EXIF_TAGS.get(tag)
# unpack it
if self.tag_data :
# the EXIF tag name
self.tag_name = tag_data[0]
# the optional value formatting specification
if len(self.tag_data) > 1 :
self.tag_value_spec = self.tag_data[1]
else :
self.tag_value_spec = None
@property
def name (self) :
"""
Lookup the name of this tag via its code, returns None if unknown.
"""
if self.tag_data :
return self.tag_name
else :
return None
def readable_value (self, value) :
"""
Convert the given value for this tag into a human-readable string.
Returns the value itself by default.
"""
if self.tag_data and self.tag_value_spec :
# map it
return exif_data.tag_value(self.tag_value_spec, value)
else :
# nope...
return value
# size of an IFD entry in bytes
IFD_ENTRY_SIZE = 12
class IFD (Buffer) :
"""
Represents an IFD (Image file directory) region in EXIF data.
"""
def __init__ (self, buffer, **buffer_opts) :
"""
Access the IFD data from the given bufferable object with given buffer opts.
This will read the `count` and `next_offset` values.
"""
# init
super(IFD, self).__init__(buffer, **buffer_opts)
# read header
self.count = self.pread_item(0, 'H')
# read next-offset
self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I')
def iter_tags (self) :
"""
Iterate over all the Tag objects in this IFD
"""
# read each tag
for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
# read the tag data
tag, type, count, value_ref = self.pread_struct(offset, 'HHII')
# yield the new Tag
yield Tag(offset, tag, type, count, value_ref)
class EXIF (Buffer) :
"""
Represents the EXIF data embedded in some image file in the form of a Region.
"""
def __init__ (self, buffer, tags=None, **buffer_opts) :
"""
Access the EXIF data from the given bufferable object with the given buffer options.
`tags`, if given, specifies that only the given named tags should be loaded.
"""
# init Buffer
super(EXIF, self).__init__(buffer, **buffer_opts)
# store
self.buffer = buffer
def iter_ifds (self) :
"""
Iterate over all of the IFD objects in this EXIF.
"""
# starting offset
offset = self.pread_item(0x04, 'I')
while offset :
# create and read the IFD
ifd = IFD(self, offset=offset)
# yield it
yield ifd
# skip to next offset
offset = ifd.next_offset
__iter__ = iter_ifds
def tag_values (self, tag) :
"""
Get the raw values for the given tag as a tuple.
Returns None if the tag could not be recognized.
"""
# unknown tag?
if not tag.type_data :
return None
# size of the data
data_size = tag.count * self.item_size(tag.type_format)
# inline or external?
if data_size > 0x04 :
# point at the external data
offset = self.unpack_item('I', tag.value_ref)
else :
# point at the inline data
offset = tag.offset + 0x08
# read values
return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format))
def tag_value (self, tag) :
"""
Return the human-readable string value for the given tag.
"""
# load the raw values
values = self.tag_values(tag)
# unknown?
if not values :
return ""
# return as comma-separated formatted string, yes
return ", ".join(tag.readable_value(value) for value in values)
# mapping from two-byte TIFF byte order marker to struct prefix
TIFF_BYTE_ORDER = {
'II': '<',
'MM': '>',
}
# "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file"
TIFF_BYTEORDER_MAGIC = 42
def tiff_load (file, length=0, **opts) :
"""
Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load.
"""
# all Exif data offsets are relative to the beginning of this TIFF header
offset = file.tell()
# mmap the region for the EXIF data
buffer = mmap_region(file, length)
# read byte-order header
byte_order = file.read(2)
# map to struct prefix
struct_prefix = TIFF_BYTE_ORDER[byte_order]
# validate
check_value, = read_struct(file, struct_prefix + 'H')
if check_value != TIFF_BYTEORDER_MAGIC :
raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value))
# build and return the EXIF object with the correct offset/size from the mmap region
return EXIF(buffer, offset=offset, size=length, **opts)
# the JPEG markers that don't have any data
JPEG_NOSIZE_MARKERS = (0xD8, 0xD9)
# the first marker in a JPEG File
JPEG_START_MARKER = 0xD8
# the JPEG APP1 marker used for EXIF
JPEG_EXIF_MARKER = 0xE1
# the JPEG APP1 Exif header
JPEG_EXIF_HEADER = "Exif\x00\x00"
def jpeg_markers (file) :
"""
Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples.
The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data
region, and may be seek'd around if needed.
XXX: find a real implementation of this somewhere?
"""
while True :
# read type
marker_byte, marker_type = read_struct(file, '!BB')
# validate
if marker_byte != 0xff :
raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
# special cases for no data
if marker_byte in JPEG_NOSIZE_MARKERS :
size = 0
else :
# read size field
size, = read_struct(file, '!H')
# validate
if size < 0x02 :
raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size))
else :
# do not count the size field itself
size = size - 2
# ok, data is at current position
offset = file.tell()
# yield
yield marker_type, size
# absolute seek to next marker
file.seek(offset + size)
def jpeg_find_exif (file) :
"""
Find the Exif/TIFF section in the given JPEG file.
If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will
be returned.
Returns None if no EXIF section was found.
"""
for count, (marker, size) in enumerate(jpeg_markers(file)) :
# verify that it's a JPEG file
if count == 0 :
# must start with the right marker
if marker != JPEG_START_MARKER :
raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, ))
# look for APP1 marker (0xE1) with EXIF signature
elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
# skipped the initial Exif marker signature
return size - JPEG_EXIF_HEADER
# nothing
return None
def jpeg_load (file, **opts) :
"""
Loads the embedded Exif TIFF data from the given JPEG file using tiff_load.
Returns None if no EXIF data could be found.
"""
# look for the right section
size = jpeg_find_exif(file)
# not found?
if not res :
# nothing
return
else :
# load it as TIFF data
return tiff_load(file, size, **opts)
def load_path (path, **opts) :
"""
Loads an EXIF object from the given filesystem path.
Returns None if it could not be parsed.
"""
# file extension
root, fext = os.path.splitext(path)
# map
func = {
'.jpeg': jpeg_load,
'.jpg': jpeg_load,
'.tiff': tiff_load, # XXX: untested
}.get(fext.lower())
# not recognized?
if not func :
# XXX: sniff the file
return None
# open it
file = open(path, 'rb')
# try and load it
return func(file, **opts)
def dump_exif (exif) :
"""
Dump all tags from the given EXIF object to stdout
"""
print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size)
for i, ifd in enumerate(exif.iter_ifds()) :
print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset)
for i, tag in enumerate(exif.iter_tags()) :
print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % (
i,
tag.offset,
tag.code, tag.name or '???',
tag.type, tag.type_name if tag.type_data else '???',
tag.count,
)
for i, value in enumerate(exif.tag_values(tag)) :
print "\t\t\t%02d: %s" % (i, tag.readable_value(value))
def main (path) :
"""
Load and dump EXIF data from the given path
"""
# try and load it
exif = load_path(path)
if not exif :
raise Exception("No EXIF data found")
# dump it
print "%s: " % path
print
dump_exif(exif)
if __name__ == '__main__' :
from sys import argv
main(argv[1])