diff -r 2e2ef5c99985 -r f74d8cf678ce degal/exif.py --- a/degal/exif.py Sun Jun 14 16:09:04 2009 +0300 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,669 +0,0 @@ -""" - A custom EXIF parsing module, aimed at high performance. -""" - -import struct, mmap, os - -from utils import lazy_load, lazy_load_iter - -def read_struct (file, fmt) : - """ - Utility function to read data from the a file using struct - """ - - # length of data - fmt_size = struct.calcsize(fmt) - - # get data - file_data = file.read(fmt_size) - - # unpack single item, this should raise an error if file_data is too short - return struct.unpack(fmt, file_data) - -class Buffer (object) : - """ - Wraps a buffer object (anything that supports the python buffer protocol) for read-only access. - - Includes an offset for relative values, and an endianess for reading binary data. - """ - - def __init__ (self, obj, offset=None, size=None, struct_prefix='=') : - """ - Create a new Buffer object with a new underlying buffer, created from the given object, offset and size. - - The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'. - Standard size/alignment are assumed. - """ - - # store - self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None)) - self.offset = offset - self.size = size - self.prefix = struct_prefix - - def subregion (self, offset, length=None) : - """ - Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given - length, if any, and the same struct_prefix. - """ - - return Buffer(self.buf, offset, length, struct_prefix=self.prefix) - - def pread (self, offset, length) : - """ - Read a random-access region of raw data - """ - - return self.buf[offset:offset + length] - - def pread_struct (self, offset, fmt) : - """ - Read structured data using the given struct format from the given offset. - """ - - return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset) - - def pread_item (self, offset, fmt) : - """ - Read a single item of structured data from the given offset. - """ - - value, = self.pread_struct(offset, fmt) - - return value - - def iter_offsets (self, count, size, offset=0) : - """ - Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`. - """ - - return xrange(offset, offset + count * size, size) - - def item_size (self, fmt) : - """ - Returns the size in bytes of the given item format - """ - - return struct.calcsize(self.prefix + fmt) - - def unpack_item (self, fmt, data) : - """ - Unpacks a single item from the given data - """ - - value, = struct.unpack(self.prefix + fmt, data) - - return value - -def mmap_buffer (file, size) : - """ - Create and return a new read-only mmap'd region - """ - - return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ) - -import exif_data - -class Tag (object) : - """ - Represents a single Tag in an IFD - """ - - def __init__ (self, ifd, offset, tag, type, count, data_raw) : - """ - Build a Tag with the given binary items from the IFD entry - """ - - self.ifd = ifd - self.offset = offset - self.tag = tag - self.type = type - self.count = count - self.data_raw = data_raw - - # lookup the type for this tag - self.type_data = exif_data.FIELD_TYPES.get(type) - - # unpack it - if self.type_data : - self.type_format, self.type_name, self.type_func = self.type_data - - # lookup the tag data for this tag - self.tag_data = self.ifd.tag_dict.get(tag) - - @property - def name (self) : - """ - Lookup the name of this tag via its code, returns None if unknown. - """ - - if self.tag_data : - return self.tag_data.name - - else : - return None - - def is_subifd (self) : - """ - Tests if this Tag is of a IFDTag type - """ - - return self.tag_data and isinstance(self.tag_data, exif_data.IFDTag) - - @lazy_load - def subifd (self) : - """ - Load the sub-IFD for this tag - """ - - # the tag_dict to use - tag_dict = self.tag_data.ifd_tags or self.ifd.tag_dict - - # construct, return - return self.ifd.exif._load_subifd(self, tag_dict) - - def process_values (self, raw_values) : - """ - Process the given raw values unpacked from the file. - """ - - if self.type_data and self.type_func : - # use the filter func - return self.type_func(raw_values) - - else : - # nada, just leave them - return raw_values - - def readable_value (self, values) : - """ - Convert the given values for this tag into a human-readable string. - - Returns the comma-separated values by default. - """ - - if self.tag_data : - # map it - return self.tag_data.map_values(values) - - else : - # default value-mapping - return ", ".join(str(value) for value in values) - -# size of an IFD entry in bytes -IFD_ENTRY_SIZE = 12 - -class IFD (Buffer) : - """ - Represents an IFD (Image file directory) region in EXIF data. - """ - - def __init__ (self, exif, buffer, tag_dict, **buffer_opts) : - """ - Access the IFD data from the given bufferable object with given buffer opts. - - This will read the `count` and `next_offset` values. - """ - - # init - super(IFD, self).__init__(buffer, **buffer_opts) - - # store - self.exif = exif - self.tag_dict = tag_dict - - # read header - self.count = self.pread_item(0, 'H') - - # read next-offset - self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I') - - @lazy_load_iter - def tags (self) : - """ - Iterate over all the Tag objects in this IFD - """ - - # read each tag - for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : - # read the tag data - tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s') - - # yield the new Tag - yield Tag(self, self.offset + offset, tag, type, count, data_raw) - - def get_tags (self, filter=None) : - """ - Yield a series of tag objects for this IFD and all sub-IFDs. - """ - - for tag in self.tags : - if tag.is_subifd() : - # recurse - for subtag in tag.subifd.get_tags(filter=filter) : - yield subtag - - else : - # normal tag - yield tag - -class EXIF (Buffer) : - """ - Represents the EXIF data embedded in some image file in the form of a Region. - """ - - def __init__ (self, buffer, **buffer_opts) : - """ - Access the EXIF data from the given bufferable object with the given buffer options. - """ - - # init Buffer - super(EXIF, self).__init__(buffer, **buffer_opts) - - # store - self.buffer = buffer - - @lazy_load_iter - def ifds (self) : - """ - Iterate over the primary IFDs in this EXIF. - """ - - # starting offset - offset = self.pread_item(0x04, 'I') - - while offset : - # create and read the IFD, operating on the right sub-buffer - ifd = IFD(self, self.buf, exif_data.EXIF_TAGS, offset=offset) - - # yield it - yield ifd - - # skip to next offset - offset = ifd.next_offset - - def _load_subifd (self, tag, tag_dict) : - """ - Creates and returns a sub-IFD for the given tag. - """ - - # locate it - offset, = self.tag_values_raw(tag) - - # construct the new IFD - return IFD(self, self.buf, tag_dict, offset=offset) - - def tag_data_info (self, tag) : - """ - Calculate the location, format and size of the given tag's data. - - Returns a (fmt, offset, size) tuple. - """ - # unknown tag? - if not tag.type_data : - return None - - # data format - if len(tag.type_format) == 1 : - # let struct handle the count - fmt = "%d%s" % (tag.count, tag.type_format) - - else : - # handle the count ourselves - fmt = tag.type_format * tag.count - - # size of the data - size = self.item_size(fmt) - - # inline or external? - if size > 0x04 : - # point at the external data - offset = self.unpack_item('I', tag.data_raw) - - else : - # point at the inline data - offset = tag.offset + 0x08 - - return fmt, offset, size - - def tag_values_raw (self, tag) : - """ - Get the raw values for the given tag as a tuple. - - Returns None if the tag could not be recognized. - """ - - # find the data - data_info = self.tag_data_info(tag) - - # not found? - if not data_info : - return None - - # unpack - data_fmt, data_offset, data_size = data_info - - # read values - return self.pread_struct(data_offset, data_fmt) - - def tag_values (self, tag) : - """ - Gets the processed values for the given tag as a list. - """ - - # read + process - return tag.process_values(self.tag_values_raw(tag)) - - def tag_value (self, tag) : - """ - Return the human-readable string value for the given tag. - """ - - # load the raw values - values = self.tag_values(tag) - - # unknown? - if not values : - return "" - - # return as comma-separated formatted string, yes - return tag.readable_value(values) - - def get_main_tags (self, **opts) : - """ - Get the tags for the main image's IFD as a dict. - """ - - if not self.ifds : - # weird case - raise Exception("No IFD for main image found") - - # the main IFD is always the first one - main_ifd = self.ifds[0] - - # do it - return dict((tag.name, self.tag_value(tag)) for tag in main_ifd.get_tags(**opts)) - -# mapping from two-byte TIFF byte order marker to struct prefix -TIFF_BYTE_ORDER = { - 'II': '<', - 'MM': '>', -} - -# "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file" -TIFF_BYTEORDER_MAGIC = 42 - -def tiff_load (file, length=0, **opts) : - """ - Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load. - """ - - # all Exif data offsets are relative to the beginning of this TIFF header - offset = file.tell() - - # mmap the region for the EXIF data - buffer = mmap_buffer(file, length) - - # read byte-order header - byte_order = file.read(2) - - # map to struct prefix - struct_prefix = TIFF_BYTE_ORDER[byte_order] - - # validate - check_value, = read_struct(file, struct_prefix + 'H') - - if check_value != TIFF_BYTEORDER_MAGIC : - raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value)) - - # build and return the EXIF object with the correct offset/size from the mmap region - return EXIF(buffer, offset=offset, size=length, **opts) - -# the JPEG markers that don't have any data -JPEG_NOSIZE_MARKERS = (0xD8, 0xD9) - -# the first marker in a JPEG File -JPEG_START_MARKER = 0xD8 - -# the JPEG APP1 marker used for EXIF -JPEG_EXIF_MARKER = 0xE1 - -# the JPEG APP1 Exif header -JPEG_EXIF_HEADER = "Exif\x00\x00" - -def jpeg_markers (file) : - """ - Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples. - - The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data - region, and may be seek'd around if needed. - - XXX: find a real implementation of this somewhere? - """ - - while True : - # read type - marker_byte, marker_type = read_struct(file, '!BB') - - # validate - if marker_byte != 0xff : - raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) - - # special cases for no data - if marker_type in JPEG_NOSIZE_MARKERS : - size = 0 - - else : - # read size field - size, = read_struct(file, '!H') - - # validate - if size < 0x02 : - raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size)) - - else : - # do not count the size field itself - size = size - 2 - - # ok, data is at current position - offset = file.tell() - - # yield - yield marker_type, size - - # absolute seek to next marker - file.seek(offset + size) - -def jpeg_find_exif (file) : - """ - Find the Exif/TIFF section in the given JPEG file. - - If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will - be returned. - - Returns None if no EXIF section was found. - """ - - for count, (marker, size) in enumerate(jpeg_markers(file)) : - # verify that it's a JPEG file - if count == 0 : - # must start with the right marker - if marker != JPEG_START_MARKER : - raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, )) - - # look for APP1 marker (0xE1) with EXIF signature - elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: - # skipped the initial Exif marker signature - return size - len(JPEG_EXIF_HEADER) - - # nothing - return None - -def jpeg_load (file, **opts) : - """ - Loads the embedded Exif TIFF data from the given JPEG file using tiff_load. - - Returns None if no EXIF data could be found. - """ - - # look for the right section - size = jpeg_find_exif(file) - - # not found? - if not size : - # nothing - return - - else : - # load it as TIFF data - return tiff_load(file, size, **opts) - -def load_path (path, **opts) : - """ - Loads an EXIF object from the given filesystem path. - - Returns None if it could not be parsed. - """ - - # file extension - root, fext = os.path.splitext(path) - - # map - func = { - '.jpeg': jpeg_load, - '.jpg': jpeg_load, - '.tiff': tiff_load, # XXX: untested - }.get(fext.lower()) - - # not recognized? - if not func : - # XXX: sniff the file - return None - - # open it - file = open(path, 'rb') - - # try and load it - return func(file, **opts) - -def dump_tag (exif, i, tag, indent=2) : - """ - Dump the given tag - """ - - data_info = exif.tag_data_info(tag) - - if data_info : - data_fmt, data_offset, data_size = data_info - - else : - data_fmt = data_offset = data_size = None - - print "%sTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s, is_subifd=%s:" % ( - '\t'*indent, - i, - tag.offset, tag.offset + exif.offset, - tag.tag, tag.name or '???', - tag.type, tag.type_name if tag.type_data else '???', - tag.count, - data_fmt, data_offset, data_size, - tag.is_subifd(), - ) - - if tag.is_subifd() : - # recurse - dump_ifd(exif, 0, tag.subifd, indent + 1) - - else : - # dump each value - values = exif.tag_values(tag) - - for i, value in enumerate(values) : - print "%s\t%02d: %.120r" % ('\t'*indent, i, value) - - # and then the readable one - print "%s\t-> %.120s" % ('\t'*indent, tag.readable_value(values), ) - - -def dump_ifd (exif, i, ifd, indent=1) : - """ - Dump the given IFD, recursively - """ - - print "%sIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % ( - '\t'*indent, - i, - ifd.offset, ifd.offset + exif.offset, - ifd.count, - ifd.next_offset - ) - - for i, tag in enumerate(ifd.tags) : - # dump - dump_tag(exif, i, tag, indent + 1) - - -def dump_exif (exif) : - """ - Dump all tags from the given EXIF object to stdout - """ - - print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size) - - for i, ifd in enumerate(exif.ifds) : - # dump - dump_ifd(exif, i, ifd) - - -def list_tags (exif) : - """ - Print a neat listing of tags to stdout - """ - - for k, v in exif.get_main_tags().iteritems() : - print "%30s: %s" % (k, v) - -def main_path (path, dump) : - # dump path - print "%s: " % path - - # try and load it - exif = load_path(path) - - if not exif : - raise Exception("No EXIF data found") - - if dump : - # dump everything - dump_exif(exif) - - else : - # list them - list_tags(exif) - - -def main (paths, dump=False) : - """ - Load and dump EXIF data from the given path - """ - - # handle each one - for path in paths : - main_path(path, dump=dump) - -if __name__ == '__main__' : - import getopt - from sys import argv - - # defaults - dump = False - - # parse args - opts, args = getopt.getopt(argv[1:], "d", ["dump"]) - - for opt, val in opts : - if opt in ('-d', "--dump") : - dump = True - - main(args, dump=dump) -