diff -r 698dc68a985d -r ef2c1ffdca8f degal/exif.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/degal/exif.py Sat Jun 13 18:34:55 2009 +0300 @@ -0,0 +1,491 @@ +""" + A custom EXIF parsing module, aimed at high performance. +""" + +import struct, mmap, os + +def read_struct (file, fmt) : + """ + Utility function to read data from the a file using struct + """ + + # length of data + fmt_size = struct.calcsize(fmt) + + # get data + file_data = file.read(fmt_size) + + # unpack single item, this should raise an error if file_data is too short + return struct.unpack(fmt, file_data) + +class Buffer (object) : + """ + Wraps a buffer object (anything that supports the python buffer protocol) for read-only access. + + Includes an offset for relative values, and an endianess for reading binary data. + """ + + def __init__ (self, obj, offset=None, size=None, struct_prefix='=') : + """ + Create a new Buffer object with a new underlying buffer, created from the given object, offset and size. + + The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'. + Standard size/alignment are assumed. + """ + + # store + self.buf = buffer(obj, offset, size) + self.offset = offset + self.size = size + self.prefix = struct_prefix + + def subregion (self, offset, length=None) : + """ + Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given + length, if any, and the same struct_prefix. + """ + + return Buffer(self.buf, offset, length, struct_prefix=self.prefix) + + def pread (self, offset, length) : + """ + Read a random-access region of raw data + """ + + return self.buf[offset:offset + length] + + def pread_struct (self, offset, fmt) : + """ + Read structured data using the given struct format from the given offset. + """ + + return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset) + + def pread_item (self, offset, fmt) : + """ + Read a single item of structured data from the given offset. + """ + + value, = self.pread_struct(offset, fmt) + + return value + + def iter_offsets (self, count, size, offset=0) : + """ + Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`. + """ + + return xrange(offset, offset + count * size, size) + + def item_size (self, fmt) : + """ + Returns the size in bytes of the given item format + """ + + return struct.calcsize(self.prefix + fmt) + + def unpack_item (self, fmt, data) : + """ + Unpacks a single item from the given data + """ + + value, = struct.unpack(self.prefix + fmt, data) + + return value + +def mmap_buffer (file, size) : + """ + Create and return a new read-only mmap'd region + """ + + return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ) + +import exif_data + +class Tag (object) : + """ + Represents a single Tag in an IFD + """ + + def __init__ (self, offset, tag, type, count, value_ref) : + """ + Build a Tag with the given binary items from the IFD entry + """ + + self.offset = offset + self.tag = tag + self.type = type + self.count = count + self.value_ref = value_ref + + # lookup the type for this tag + self.type_data = exif_data.FIELD_TYPES.get(type) + + # unpack it + if self.type_data : + self.type_format, self.type_name = self.type_data + + # lookup the tag data for this tag + self.tag_data = exif_data.EXIF_TAGS.get(tag) + + # unpack it + if self.tag_data : + # the EXIF tag name + self.tag_name = tag_data[0] + + # the optional value formatting specification + if len(self.tag_data) > 1 : + self.tag_value_spec = self.tag_data[1] + + else : + self.tag_value_spec = None + + @property + def name (self) : + """ + Lookup the name of this tag via its code, returns None if unknown. + """ + + if self.tag_data : + return self.tag_name + + else : + return None + + def readable_value (self, value) : + """ + Convert the given value for this tag into a human-readable string. + + Returns the value itself by default. + """ + + if self.tag_data and self.tag_value_spec : + # map it + return exif_data.tag_value(self.tag_value_spec, value) + + else : + # nope... + return value + +# size of an IFD entry in bytes +IFD_ENTRY_SIZE = 12 + +class IFD (Buffer) : + """ + Represents an IFD (Image file directory) region in EXIF data. + """ + + def __init__ (self, buffer, **buffer_opts) : + """ + Access the IFD data from the given bufferable object with given buffer opts. + + This will read the `count` and `next_offset` values. + """ + + # init + super(IFD, self).__init__(buffer, **buffer_opts) + + # read header + self.count = self.pread_item(0, 'H') + + # read next-offset + self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I') + + def iter_tags (self) : + """ + Iterate over all the Tag objects in this IFD + """ + + # read each tag + for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : + # read the tag data + tag, type, count, value_ref = self.pread_struct(offset, 'HHII') + + # yield the new Tag + yield Tag(offset, tag, type, count, value_ref) + +class EXIF (Buffer) : + """ + Represents the EXIF data embedded in some image file in the form of a Region. + """ + + def __init__ (self, buffer, tags=None, **buffer_opts) : + """ + Access the EXIF data from the given bufferable object with the given buffer options. + + `tags`, if given, specifies that only the given named tags should be loaded. + """ + + # init Buffer + super(EXIF, self).__init__(buffer, **buffer_opts) + + # store + self.buffer = buffer + + def iter_ifds (self) : + """ + Iterate over all of the IFD objects in this EXIF. + """ + + # starting offset + offset = self.pread_item(0x04, 'I') + + while offset : + # create and read the IFD + ifd = IFD(self, offset=offset) + + # yield it + yield ifd + + # skip to next offset + offset = ifd.next_offset + + __iter__ = iter_ifds + + def tag_values (self, tag) : + """ + Get the raw values for the given tag as a tuple. + + Returns None if the tag could not be recognized. + """ + + # unknown tag? + if not tag.type_data : + return None + + # size of the data + data_size = tag.count * self.item_size(tag.type_format) + + # inline or external? + if data_size > 0x04 : + # point at the external data + offset = self.unpack_item('I', tag.value_ref) + + else : + # point at the inline data + offset = tag.offset + 0x08 + + # read values + return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format)) + + def tag_value (self, tag) : + """ + Return the human-readable string value for the given tag. + """ + + # load the raw values + values = self.tag_values(tag) + + # unknown? + if not values : + return "" + + # return as comma-separated formatted string, yes + return ", ".join(tag.readable_value(value) for value in values) + +# mapping from two-byte TIFF byte order marker to struct prefix +TIFF_BYTE_ORDER = { + 'II': '<', + 'MM': '>', +} + +# "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file" +TIFF_BYTEORDER_MAGIC = 42 + +def tiff_load (file, length=0, **opts) : + """ + Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load. + """ + + # all Exif data offsets are relative to the beginning of this TIFF header + offset = file.tell() + + # mmap the region for the EXIF data + buffer = mmap_region(file, length) + + # read byte-order header + byte_order = file.read(2) + + # map to struct prefix + struct_prefix = TIFF_BYTE_ORDER[byte_order] + + # validate + check_value, = read_struct(file, struct_prefix + 'H') + + if check_value != TIFF_BYTEORDER_MAGIC : + raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value)) + + # build and return the EXIF object with the correct offset/size from the mmap region + return EXIF(buffer, offset=offset, size=length, **opts) + +# the JPEG markers that don't have any data +JPEG_NOSIZE_MARKERS = (0xD8, 0xD9) + +# the first marker in a JPEG File +JPEG_START_MARKER = 0xD8 + +# the JPEG APP1 marker used for EXIF +JPEG_EXIF_MARKER = 0xE1 + +# the JPEG APP1 Exif header +JPEG_EXIF_HEADER = "Exif\x00\x00" + +def jpeg_markers (file) : + """ + Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples. + + The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data + region, and may be seek'd around if needed. + + XXX: find a real implementation of this somewhere? + """ + + while True : + # read type + marker_byte, marker_type = read_struct(file, '!BB') + + # validate + if marker_byte != 0xff : + raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) + + # special cases for no data + if marker_byte in JPEG_NOSIZE_MARKERS : + size = 0 + + else : + # read size field + size, = read_struct(file, '!H') + + # validate + if size < 0x02 : + raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size)) + + else : + # do not count the size field itself + size = size - 2 + + # ok, data is at current position + offset = file.tell() + + # yield + yield marker_type, size + + # absolute seek to next marker + file.seek(offset + size) + +def jpeg_find_exif (file) : + """ + Find the Exif/TIFF section in the given JPEG file. + + If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will + be returned. + + Returns None if no EXIF section was found. + """ + + for count, (marker, size) in enumerate(jpeg_markers(file)) : + # verify that it's a JPEG file + if count == 0 : + # must start with the right marker + if marker != JPEG_START_MARKER : + raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, )) + + # look for APP1 marker (0xE1) with EXIF signature + elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: + # skipped the initial Exif marker signature + return size - JPEG_EXIF_HEADER + + # nothing + return None + +def jpeg_load (file, **opts) : + """ + Loads the embedded Exif TIFF data from the given JPEG file using tiff_load. + + Returns None if no EXIF data could be found. + """ + + # look for the right section + size = jpeg_find_exif(file) + + # not found? + if not res : + # nothing + return + + else : + # load it as TIFF data + return tiff_load(file, size, **opts) + +def load_path (path, **opts) : + """ + Loads an EXIF object from the given filesystem path. + + Returns None if it could not be parsed. + """ + + # file extension + root, fext = os.path.splitext(path) + + # map + func = { + '.jpeg': jpeg_load, + '.jpg': jpeg_load, + '.tiff': tiff_load, # XXX: untested + }.get(fext.lower()) + + # not recognized? + if not func : + # XXX: sniff the file + return None + + # open it + file = open(path, 'rb') + + # try and load it + return func(file, **opts) + +def dump_exif (exif) : + """ + Dump all tags from the given EXIF object to stdout + """ + + print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size) + + for i, ifd in enumerate(exif.iter_ifds()) : + print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset) + + for i, tag in enumerate(exif.iter_tags()) : + print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % ( + i, + tag.offset, + tag.code, tag.name or '???', + tag.type, tag.type_name if tag.type_data else '???', + tag.count, + ) + + for i, value in enumerate(exif.tag_values(tag)) : + print "\t\t\t%02d: %s" % (i, tag.readable_value(value)) + +def main (path) : + """ + Load and dump EXIF data from the given path + """ + + # try and load it + exif = load_path(path) + + if not exif : + raise Exception("No EXIF data found") + + # dump it + print "%s: " % path + print + + dump_exif(exif) + +if __name__ == '__main__' : + from sys import argv + + main(argv[1]) +