terom@102: """ terom@102: A custom EXIF parsing module, aimed at high performance. terom@102: """ terom@102: terom@102: import struct, mmap, os terom@102: terom@102: def read_struct (file, fmt) : terom@102: """ terom@102: Utility function to read data from the a file using struct terom@102: """ terom@102: terom@102: # length of data terom@102: fmt_size = struct.calcsize(fmt) terom@102: terom@102: # get data terom@102: file_data = file.read(fmt_size) terom@102: terom@102: # unpack single item, this should raise an error if file_data is too short terom@102: return struct.unpack(fmt, file_data) terom@102: terom@102: class Buffer (object) : terom@102: """ terom@102: Wraps a buffer object (anything that supports the python buffer protocol) for read-only access. terom@102: terom@102: Includes an offset for relative values, and an endianess for reading binary data. terom@102: """ terom@102: terom@102: def __init__ (self, obj, offset=None, size=None, struct_prefix='=') : terom@102: """ terom@102: Create a new Buffer object with a new underlying buffer, created from the given object, offset and size. terom@102: terom@102: The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'. terom@102: Standard size/alignment are assumed. terom@102: """ terom@102: terom@102: # store terom@103: self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None)) terom@102: self.offset = offset terom@102: self.size = size terom@102: self.prefix = struct_prefix terom@102: terom@102: def subregion (self, offset, length=None) : terom@102: """ terom@102: Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given terom@102: length, if any, and the same struct_prefix. terom@102: """ terom@102: terom@102: return Buffer(self.buf, offset, length, struct_prefix=self.prefix) terom@102: terom@102: def pread (self, offset, length) : terom@102: """ terom@102: Read a random-access region of raw data terom@102: """ terom@102: terom@102: return self.buf[offset:offset + length] terom@102: terom@102: def pread_struct (self, offset, fmt) : terom@102: """ terom@102: Read structured data using the given struct format from the given offset. terom@102: """ terom@102: terom@102: return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset) terom@102: terom@102: def pread_item (self, offset, fmt) : terom@102: """ terom@102: Read a single item of structured data from the given offset. terom@102: """ terom@102: terom@102: value, = self.pread_struct(offset, fmt) terom@102: terom@102: return value terom@102: terom@102: def iter_offsets (self, count, size, offset=0) : terom@102: """ terom@102: Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`. terom@102: """ terom@102: terom@102: return xrange(offset, offset + count * size, size) terom@102: terom@102: def item_size (self, fmt) : terom@102: """ terom@102: Returns the size in bytes of the given item format terom@102: """ terom@102: terom@102: return struct.calcsize(self.prefix + fmt) terom@102: terom@102: def unpack_item (self, fmt, data) : terom@102: """ terom@102: Unpacks a single item from the given data terom@102: """ terom@102: terom@102: value, = struct.unpack(self.prefix + fmt, data) terom@102: terom@102: return value terom@102: terom@102: def mmap_buffer (file, size) : terom@102: """ terom@102: Create and return a new read-only mmap'd region terom@102: """ terom@102: terom@102: return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ) terom@102: terom@102: import exif_data terom@102: terom@102: class Tag (object) : terom@102: """ terom@102: Represents a single Tag in an IFD terom@102: """ terom@102: terom@103: def __init__ (self, offset, tag, type, count, data_raw) : terom@102: """ terom@102: Build a Tag with the given binary items from the IFD entry terom@102: """ terom@102: terom@102: self.offset = offset terom@102: self.tag = tag terom@102: self.type = type terom@102: self.count = count terom@103: self.data_raw = data_raw terom@102: terom@102: # lookup the type for this tag terom@102: self.type_data = exif_data.FIELD_TYPES.get(type) terom@102: terom@102: # unpack it terom@102: if self.type_data : terom@103: self.type_format, self.type_name, self.type_func = self.type_data terom@102: terom@102: # lookup the tag data for this tag terom@102: self.tag_data = exif_data.EXIF_TAGS.get(tag) terom@102: terom@102: # unpack it terom@102: if self.tag_data : terom@102: # the EXIF tag name terom@104: self.tag_name, self.tag_value_spec = self.tag_data terom@102: terom@102: @property terom@102: def name (self) : terom@102: """ terom@102: Lookup the name of this tag via its code, returns None if unknown. terom@102: """ terom@102: terom@102: if self.tag_data : terom@102: return self.tag_name terom@102: terom@102: else : terom@102: return None terom@102: terom@103: def process_values (self, raw_values) : terom@103: """ terom@103: Process the given raw values unpacked from the file. terom@103: """ terom@103: terom@103: if self.type_data and self.type_func : terom@103: # use the filter func terom@103: return self.type_func(raw_values) terom@103: terom@103: else : terom@103: # nada, just leave them terom@103: return raw_values terom@103: terom@104: def readable_value (self, values) : terom@102: """ terom@104: Convert the given values for this tag into a human-readable string. terom@102: terom@104: Returns the comma-separated values by default. terom@102: """ terom@102: terom@104: if self.tag_data : terom@104: spec = self.tag_value_spec terom@102: terom@102: else : terom@104: # fallback to default terom@104: spec = None terom@104: terom@104: # map it terom@104: return exif_data.map_values(spec, values) terom@102: terom@102: # size of an IFD entry in bytes terom@102: IFD_ENTRY_SIZE = 12 terom@102: terom@102: class IFD (Buffer) : terom@102: """ terom@102: Represents an IFD (Image file directory) region in EXIF data. terom@102: """ terom@102: terom@102: def __init__ (self, buffer, **buffer_opts) : terom@102: """ terom@102: Access the IFD data from the given bufferable object with given buffer opts. terom@102: terom@102: This will read the `count` and `next_offset` values. terom@102: """ terom@102: terom@102: # init terom@102: super(IFD, self).__init__(buffer, **buffer_opts) terom@102: terom@102: # read header terom@102: self.count = self.pread_item(0, 'H') terom@102: terom@102: # read next-offset terom@102: self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I') terom@102: terom@102: def iter_tags (self) : terom@102: """ terom@102: Iterate over all the Tag objects in this IFD terom@102: """ terom@102: terom@102: # read each tag terom@102: for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : terom@102: # read the tag data terom@103: tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s') terom@102: terom@102: # yield the new Tag terom@103: yield Tag(offset, tag, type, count, data_raw) terom@102: terom@102: class EXIF (Buffer) : terom@102: """ terom@102: Represents the EXIF data embedded in some image file in the form of a Region. terom@102: """ terom@102: terom@102: def __init__ (self, buffer, tags=None, **buffer_opts) : terom@102: """ terom@102: Access the EXIF data from the given bufferable object with the given buffer options. terom@102: terom@102: `tags`, if given, specifies that only the given named tags should be loaded. terom@102: """ terom@102: terom@102: # init Buffer terom@102: super(EXIF, self).__init__(buffer, **buffer_opts) terom@102: terom@102: # store terom@102: self.buffer = buffer terom@102: terom@102: def iter_ifds (self) : terom@102: """ terom@104: Iterate over the primary IFDs in this EXIF. terom@102: """ terom@102: terom@102: # starting offset terom@102: offset = self.pread_item(0x04, 'I') terom@102: terom@102: while offset : terom@103: # create and read the IFD, operating on the right sub-buffer terom@103: ifd = IFD(self.buf, offset=offset) terom@102: terom@102: # yield it terom@102: yield ifd terom@102: terom@102: # skip to next offset terom@102: offset = ifd.next_offset terom@102: terom@104: def iter_all_ifds (self) : terom@104: """ terom@104: Iterate over all of the IFDs contained within this EXIF, or within other IFDs. terom@104: """ terom@104: terom@102: __iter__ = iter_ifds terom@102: terom@103: def tag_data_info (self, tag) : terom@103: """ terom@103: Calculate the location, format and size of the given tag's data. terom@103: terom@103: Returns a (fmt, offset, size) tuple. terom@103: """ terom@103: # unknown tag? terom@103: if not tag.type_data : terom@103: return None terom@103: terom@103: # data format terom@103: if len(tag.type_format) == 1 : terom@103: # let struct handle the count terom@103: fmt = "%d%s" % (tag.count, tag.type_format) terom@103: terom@103: else : terom@103: # handle the count ourselves terom@103: fmt = tag.type_format * tag.count terom@103: terom@103: # size of the data terom@103: size = self.item_size(fmt) terom@103: terom@103: # inline or external? terom@103: if size > 0x04 : terom@103: # point at the external data terom@103: offset = self.unpack_item('I', tag.data_raw) terom@103: terom@103: else : terom@103: # point at the inline data terom@103: offset = tag.offset + 0x08 terom@103: terom@103: return fmt, offset, size terom@103: terom@103: def tag_values_raw (self, tag) : terom@102: """ terom@102: Get the raw values for the given tag as a tuple. terom@102: terom@102: Returns None if the tag could not be recognized. terom@102: """ terom@102: terom@103: # find the data terom@103: data_info = self.tag_data_info(tag) terom@102: terom@103: # not found? terom@103: if not data_info : terom@103: return None terom@103: terom@103: # unpack terom@103: data_fmt, data_offset, data_size = data_info terom@102: terom@102: # read values terom@103: return self.pread_struct(data_offset, data_fmt) terom@102: terom@103: def tag_values (self, tag) : terom@103: """ terom@103: Gets the processed values for the given tag as a list. terom@103: """ terom@103: terom@103: # read + process terom@103: return tag.process_values(self.tag_values_raw(tag)) terom@103: terom@102: def tag_value (self, tag) : terom@102: """ terom@102: Return the human-readable string value for the given tag. terom@102: """ terom@102: terom@102: # load the raw values terom@102: values = self.tag_values(tag) terom@102: terom@102: # unknown? terom@102: if not values : terom@102: return "" terom@102: terom@102: # return as comma-separated formatted string, yes terom@104: return tag.readable_value(values) terom@102: terom@102: # mapping from two-byte TIFF byte order marker to struct prefix terom@102: TIFF_BYTE_ORDER = { terom@102: 'II': '<', terom@102: 'MM': '>', terom@102: } terom@102: terom@102: # "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file" terom@102: TIFF_BYTEORDER_MAGIC = 42 terom@102: terom@102: def tiff_load (file, length=0, **opts) : terom@102: """ terom@102: Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load. terom@102: """ terom@102: terom@102: # all Exif data offsets are relative to the beginning of this TIFF header terom@102: offset = file.tell() terom@102: terom@102: # mmap the region for the EXIF data terom@103: buffer = mmap_buffer(file, length) terom@102: terom@102: # read byte-order header terom@102: byte_order = file.read(2) terom@102: terom@102: # map to struct prefix terom@102: struct_prefix = TIFF_BYTE_ORDER[byte_order] terom@102: terom@102: # validate terom@102: check_value, = read_struct(file, struct_prefix + 'H') terom@102: terom@102: if check_value != TIFF_BYTEORDER_MAGIC : terom@102: raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value)) terom@102: terom@102: # build and return the EXIF object with the correct offset/size from the mmap region terom@102: return EXIF(buffer, offset=offset, size=length, **opts) terom@102: terom@102: # the JPEG markers that don't have any data terom@102: JPEG_NOSIZE_MARKERS = (0xD8, 0xD9) terom@102: terom@102: # the first marker in a JPEG File terom@102: JPEG_START_MARKER = 0xD8 terom@102: terom@102: # the JPEG APP1 marker used for EXIF terom@102: JPEG_EXIF_MARKER = 0xE1 terom@102: terom@102: # the JPEG APP1 Exif header terom@102: JPEG_EXIF_HEADER = "Exif\x00\x00" terom@102: terom@102: def jpeg_markers (file) : terom@102: """ terom@102: Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples. terom@102: terom@102: The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data terom@102: region, and may be seek'd around if needed. terom@102: terom@102: XXX: find a real implementation of this somewhere? terom@102: """ terom@102: terom@102: while True : terom@102: # read type terom@102: marker_byte, marker_type = read_struct(file, '!BB') terom@102: terom@102: # validate terom@102: if marker_byte != 0xff : terom@102: raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) terom@102: terom@102: # special cases for no data terom@103: if marker_type in JPEG_NOSIZE_MARKERS : terom@102: size = 0 terom@102: terom@102: else : terom@102: # read size field terom@102: size, = read_struct(file, '!H') terom@102: terom@102: # validate terom@102: if size < 0x02 : terom@102: raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size)) terom@102: terom@102: else : terom@102: # do not count the size field itself terom@102: size = size - 2 terom@102: terom@102: # ok, data is at current position terom@102: offset = file.tell() terom@102: terom@102: # yield terom@102: yield marker_type, size terom@102: terom@102: # absolute seek to next marker terom@102: file.seek(offset + size) terom@102: terom@102: def jpeg_find_exif (file) : terom@102: """ terom@102: Find the Exif/TIFF section in the given JPEG file. terom@102: terom@102: If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will terom@102: be returned. terom@102: terom@102: Returns None if no EXIF section was found. terom@102: """ terom@102: terom@102: for count, (marker, size) in enumerate(jpeg_markers(file)) : terom@102: # verify that it's a JPEG file terom@102: if count == 0 : terom@102: # must start with the right marker terom@102: if marker != JPEG_START_MARKER : terom@102: raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, )) terom@102: terom@102: # look for APP1 marker (0xE1) with EXIF signature terom@102: elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: terom@102: # skipped the initial Exif marker signature terom@103: return size - len(JPEG_EXIF_HEADER) terom@102: terom@102: # nothing terom@102: return None terom@102: terom@102: def jpeg_load (file, **opts) : terom@102: """ terom@102: Loads the embedded Exif TIFF data from the given JPEG file using tiff_load. terom@102: terom@102: Returns None if no EXIF data could be found. terom@102: """ terom@102: terom@102: # look for the right section terom@102: size = jpeg_find_exif(file) terom@102: terom@102: # not found? terom@103: if not size : terom@102: # nothing terom@102: return terom@102: terom@102: else : terom@102: # load it as TIFF data terom@102: return tiff_load(file, size, **opts) terom@102: terom@102: def load_path (path, **opts) : terom@102: """ terom@102: Loads an EXIF object from the given filesystem path. terom@102: terom@102: Returns None if it could not be parsed. terom@102: """ terom@102: terom@102: # file extension terom@102: root, fext = os.path.splitext(path) terom@102: terom@102: # map terom@102: func = { terom@102: '.jpeg': jpeg_load, terom@102: '.jpg': jpeg_load, terom@102: '.tiff': tiff_load, # XXX: untested terom@102: }.get(fext.lower()) terom@102: terom@102: # not recognized? terom@102: if not func : terom@102: # XXX: sniff the file terom@102: return None terom@102: terom@102: # open it terom@102: file = open(path, 'rb') terom@102: terom@102: # try and load it terom@102: return func(file, **opts) terom@102: terom@102: def dump_exif (exif) : terom@102: """ terom@102: Dump all tags from the given EXIF object to stdout terom@102: """ terom@102: terom@103: print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size) terom@102: terom@102: for i, ifd in enumerate(exif.iter_ifds()) : terom@103: print "\tIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % ( terom@103: i, terom@103: ifd.offset, ifd.offset + exif.offset, terom@103: ifd.count, terom@103: ifd.next_offset terom@103: ) terom@102: terom@103: for i, tag in enumerate(ifd.iter_tags()) : terom@103: data_info = exif.tag_data_info(tag) terom@103: terom@103: if data_info : terom@103: data_fmt, data_offset, data_size = data_info terom@103: terom@103: else : terom@103: data_fmt = data_offset = data_size = None terom@103: terom@103: print "\t\tTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s:" % ( terom@102: i, terom@103: tag.offset, tag.offset + exif.offset, terom@103: tag.tag, tag.name or '???', terom@102: tag.type, tag.type_name if tag.type_data else '???', terom@102: tag.count, terom@103: data_fmt, data_offset, data_size, terom@102: ) terom@104: terom@104: values = exif.tag_values(tag) terom@102: terom@104: for i, value in enumerate(values) : terom@104: print "\t\t\t%02d: %r" % (i, value) terom@102: terom@104: print "\t\t\t-> %s" % (tag.readable_value(values), ) terom@104: terom@104: def main (path, quiet=False) : terom@102: """ terom@102: Load and dump EXIF data from the given path terom@102: """ terom@102: terom@102: # try and load it terom@102: exif = load_path(path) terom@102: terom@102: if not exif : terom@102: raise Exception("No EXIF data found") terom@102: terom@104: if not quiet : terom@104: # dump it terom@104: print "%s: " % path terom@104: print terom@102: terom@104: dump_exif(exif) terom@102: terom@102: if __name__ == '__main__' : terom@102: from sys import argv terom@102: terom@104: main(argv[1], '-q' in argv) terom@102: