--- a/degal/exif.py Sun Jun 14 16:09:04 2009 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,669 +0,0 @@
-"""
- A custom EXIF parsing module, aimed at high performance.
-"""
-
-import struct, mmap, os
-
-from utils import lazy_load, lazy_load_iter
-
-def read_struct (file, fmt) :
- """
- Utility function to read data from the a file using struct
- """
-
- # length of data
- fmt_size = struct.calcsize(fmt)
-
- # get data
- file_data = file.read(fmt_size)
-
- # unpack single item, this should raise an error if file_data is too short
- return struct.unpack(fmt, file_data)
-
-class Buffer (object) :
- """
- Wraps a buffer object (anything that supports the python buffer protocol) for read-only access.
-
- Includes an offset for relative values, and an endianess for reading binary data.
- """
-
- def __init__ (self, obj, offset=None, size=None, struct_prefix='=') :
- """
- Create a new Buffer object with a new underlying buffer, created from the given object, offset and size.
-
- The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'.
- Standard size/alignment are assumed.
- """
-
- # store
- self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None))
- self.offset = offset
- self.size = size
- self.prefix = struct_prefix
-
- def subregion (self, offset, length=None) :
- """
- Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given
- length, if any, and the same struct_prefix.
- """
-
- return Buffer(self.buf, offset, length, struct_prefix=self.prefix)
-
- def pread (self, offset, length) :
- """
- Read a random-access region of raw data
- """
-
- return self.buf[offset:offset + length]
-
- def pread_struct (self, offset, fmt) :
- """
- Read structured data using the given struct format from the given offset.
- """
-
- return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset)
-
- def pread_item (self, offset, fmt) :
- """
- Read a single item of structured data from the given offset.
- """
-
- value, = self.pread_struct(offset, fmt)
-
- return value
-
- def iter_offsets (self, count, size, offset=0) :
- """
- Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`.
- """
-
- return xrange(offset, offset + count * size, size)
-
- def item_size (self, fmt) :
- """
- Returns the size in bytes of the given item format
- """
-
- return struct.calcsize(self.prefix + fmt)
-
- def unpack_item (self, fmt, data) :
- """
- Unpacks a single item from the given data
- """
-
- value, = struct.unpack(self.prefix + fmt, data)
-
- return value
-
-def mmap_buffer (file, size) :
- """
- Create and return a new read-only mmap'd region
- """
-
- return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ)
-
-import exif_data
-
-class Tag (object) :
- """
- Represents a single Tag in an IFD
- """
-
- def __init__ (self, ifd, offset, tag, type, count, data_raw) :
- """
- Build a Tag with the given binary items from the IFD entry
- """
-
- self.ifd = ifd
- self.offset = offset
- self.tag = tag
- self.type = type
- self.count = count
- self.data_raw = data_raw
-
- # lookup the type for this tag
- self.type_data = exif_data.FIELD_TYPES.get(type)
-
- # unpack it
- if self.type_data :
- self.type_format, self.type_name, self.type_func = self.type_data
-
- # lookup the tag data for this tag
- self.tag_data = self.ifd.tag_dict.get(tag)
-
- @property
- def name (self) :
- """
- Lookup the name of this tag via its code, returns None if unknown.
- """
-
- if self.tag_data :
- return self.tag_data.name
-
- else :
- return None
-
- def is_subifd (self) :
- """
- Tests if this Tag is of a IFDTag type
- """
-
- return self.tag_data and isinstance(self.tag_data, exif_data.IFDTag)
-
- @lazy_load
- def subifd (self) :
- """
- Load the sub-IFD for this tag
- """
-
- # the tag_dict to use
- tag_dict = self.tag_data.ifd_tags or self.ifd.tag_dict
-
- # construct, return
- return self.ifd.exif._load_subifd(self, tag_dict)
-
- def process_values (self, raw_values) :
- """
- Process the given raw values unpacked from the file.
- """
-
- if self.type_data and self.type_func :
- # use the filter func
- return self.type_func(raw_values)
-
- else :
- # nada, just leave them
- return raw_values
-
- def readable_value (self, values) :
- """
- Convert the given values for this tag into a human-readable string.
-
- Returns the comma-separated values by default.
- """
-
- if self.tag_data :
- # map it
- return self.tag_data.map_values(values)
-
- else :
- # default value-mapping
- return ", ".join(str(value) for value in values)
-
-# size of an IFD entry in bytes
-IFD_ENTRY_SIZE = 12
-
-class IFD (Buffer) :
- """
- Represents an IFD (Image file directory) region in EXIF data.
- """
-
- def __init__ (self, exif, buffer, tag_dict, **buffer_opts) :
- """
- Access the IFD data from the given bufferable object with given buffer opts.
-
- This will read the `count` and `next_offset` values.
- """
-
- # init
- super(IFD, self).__init__(buffer, **buffer_opts)
-
- # store
- self.exif = exif
- self.tag_dict = tag_dict
-
- # read header
- self.count = self.pread_item(0, 'H')
-
- # read next-offset
- self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I')
-
- @lazy_load_iter
- def tags (self) :
- """
- Iterate over all the Tag objects in this IFD
- """
-
- # read each tag
- for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) :
- # read the tag data
- tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s')
-
- # yield the new Tag
- yield Tag(self, self.offset + offset, tag, type, count, data_raw)
-
- def get_tags (self, filter=None) :
- """
- Yield a series of tag objects for this IFD and all sub-IFDs.
- """
-
- for tag in self.tags :
- if tag.is_subifd() :
- # recurse
- for subtag in tag.subifd.get_tags(filter=filter) :
- yield subtag
-
- else :
- # normal tag
- yield tag
-
-class EXIF (Buffer) :
- """
- Represents the EXIF data embedded in some image file in the form of a Region.
- """
-
- def __init__ (self, buffer, **buffer_opts) :
- """
- Access the EXIF data from the given bufferable object with the given buffer options.
- """
-
- # init Buffer
- super(EXIF, self).__init__(buffer, **buffer_opts)
-
- # store
- self.buffer = buffer
-
- @lazy_load_iter
- def ifds (self) :
- """
- Iterate over the primary IFDs in this EXIF.
- """
-
- # starting offset
- offset = self.pread_item(0x04, 'I')
-
- while offset :
- # create and read the IFD, operating on the right sub-buffer
- ifd = IFD(self, self.buf, exif_data.EXIF_TAGS, offset=offset)
-
- # yield it
- yield ifd
-
- # skip to next offset
- offset = ifd.next_offset
-
- def _load_subifd (self, tag, tag_dict) :
- """
- Creates and returns a sub-IFD for the given tag.
- """
-
- # locate it
- offset, = self.tag_values_raw(tag)
-
- # construct the new IFD
- return IFD(self, self.buf, tag_dict, offset=offset)
-
- def tag_data_info (self, tag) :
- """
- Calculate the location, format and size of the given tag's data.
-
- Returns a (fmt, offset, size) tuple.
- """
- # unknown tag?
- if not tag.type_data :
- return None
-
- # data format
- if len(tag.type_format) == 1 :
- # let struct handle the count
- fmt = "%d%s" % (tag.count, tag.type_format)
-
- else :
- # handle the count ourselves
- fmt = tag.type_format * tag.count
-
- # size of the data
- size = self.item_size(fmt)
-
- # inline or external?
- if size > 0x04 :
- # point at the external data
- offset = self.unpack_item('I', tag.data_raw)
-
- else :
- # point at the inline data
- offset = tag.offset + 0x08
-
- return fmt, offset, size
-
- def tag_values_raw (self, tag) :
- """
- Get the raw values for the given tag as a tuple.
-
- Returns None if the tag could not be recognized.
- """
-
- # find the data
- data_info = self.tag_data_info(tag)
-
- # not found?
- if not data_info :
- return None
-
- # unpack
- data_fmt, data_offset, data_size = data_info
-
- # read values
- return self.pread_struct(data_offset, data_fmt)
-
- def tag_values (self, tag) :
- """
- Gets the processed values for the given tag as a list.
- """
-
- # read + process
- return tag.process_values(self.tag_values_raw(tag))
-
- def tag_value (self, tag) :
- """
- Return the human-readable string value for the given tag.
- """
-
- # load the raw values
- values = self.tag_values(tag)
-
- # unknown?
- if not values :
- return ""
-
- # return as comma-separated formatted string, yes
- return tag.readable_value(values)
-
- def get_main_tags (self, **opts) :
- """
- Get the tags for the main image's IFD as a dict.
- """
-
- if not self.ifds :
- # weird case
- raise Exception("No IFD for main image found")
-
- # the main IFD is always the first one
- main_ifd = self.ifds[0]
-
- # do it
- return dict((tag.name, self.tag_value(tag)) for tag in main_ifd.get_tags(**opts))
-
-# mapping from two-byte TIFF byte order marker to struct prefix
-TIFF_BYTE_ORDER = {
- 'II': '<',
- 'MM': '>',
-}
-
-# "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file"
-TIFF_BYTEORDER_MAGIC = 42
-
-def tiff_load (file, length=0, **opts) :
- """
- Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load.
- """
-
- # all Exif data offsets are relative to the beginning of this TIFF header
- offset = file.tell()
-
- # mmap the region for the EXIF data
- buffer = mmap_buffer(file, length)
-
- # read byte-order header
- byte_order = file.read(2)
-
- # map to struct prefix
- struct_prefix = TIFF_BYTE_ORDER[byte_order]
-
- # validate
- check_value, = read_struct(file, struct_prefix + 'H')
-
- if check_value != TIFF_BYTEORDER_MAGIC :
- raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value))
-
- # build and return the EXIF object with the correct offset/size from the mmap region
- return EXIF(buffer, offset=offset, size=length, **opts)
-
-# the JPEG markers that don't have any data
-JPEG_NOSIZE_MARKERS = (0xD8, 0xD9)
-
-# the first marker in a JPEG File
-JPEG_START_MARKER = 0xD8
-
-# the JPEG APP1 marker used for EXIF
-JPEG_EXIF_MARKER = 0xE1
-
-# the JPEG APP1 Exif header
-JPEG_EXIF_HEADER = "Exif\x00\x00"
-
-def jpeg_markers (file) :
- """
- Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples.
-
- The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data
- region, and may be seek'd around if needed.
-
- XXX: find a real implementation of this somewhere?
- """
-
- while True :
- # read type
- marker_byte, marker_type = read_struct(file, '!BB')
-
- # validate
- if marker_byte != 0xff :
- raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type))
-
- # special cases for no data
- if marker_type in JPEG_NOSIZE_MARKERS :
- size = 0
-
- else :
- # read size field
- size, = read_struct(file, '!H')
-
- # validate
- if size < 0x02 :
- raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size))
-
- else :
- # do not count the size field itself
- size = size - 2
-
- # ok, data is at current position
- offset = file.tell()
-
- # yield
- yield marker_type, size
-
- # absolute seek to next marker
- file.seek(offset + size)
-
-def jpeg_find_exif (file) :
- """
- Find the Exif/TIFF section in the given JPEG file.
-
- If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will
- be returned.
-
- Returns None if no EXIF section was found.
- """
-
- for count, (marker, size) in enumerate(jpeg_markers(file)) :
- # verify that it's a JPEG file
- if count == 0 :
- # must start with the right marker
- if marker != JPEG_START_MARKER :
- raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, ))
-
- # look for APP1 marker (0xE1) with EXIF signature
- elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER:
- # skipped the initial Exif marker signature
- return size - len(JPEG_EXIF_HEADER)
-
- # nothing
- return None
-
-def jpeg_load (file, **opts) :
- """
- Loads the embedded Exif TIFF data from the given JPEG file using tiff_load.
-
- Returns None if no EXIF data could be found.
- """
-
- # look for the right section
- size = jpeg_find_exif(file)
-
- # not found?
- if not size :
- # nothing
- return
-
- else :
- # load it as TIFF data
- return tiff_load(file, size, **opts)
-
-def load_path (path, **opts) :
- """
- Loads an EXIF object from the given filesystem path.
-
- Returns None if it could not be parsed.
- """
-
- # file extension
- root, fext = os.path.splitext(path)
-
- # map
- func = {
- '.jpeg': jpeg_load,
- '.jpg': jpeg_load,
- '.tiff': tiff_load, # XXX: untested
- }.get(fext.lower())
-
- # not recognized?
- if not func :
- # XXX: sniff the file
- return None
-
- # open it
- file = open(path, 'rb')
-
- # try and load it
- return func(file, **opts)
-
-def dump_tag (exif, i, tag, indent=2) :
- """
- Dump the given tag
- """
-
- data_info = exif.tag_data_info(tag)
-
- if data_info :
- data_fmt, data_offset, data_size = data_info
-
- else :
- data_fmt = data_offset = data_size = None
-
- print "%sTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s, is_subifd=%s:" % (
- '\t'*indent,
- i,
- tag.offset, tag.offset + exif.offset,
- tag.tag, tag.name or '???',
- tag.type, tag.type_name if tag.type_data else '???',
- tag.count,
- data_fmt, data_offset, data_size,
- tag.is_subifd(),
- )
-
- if tag.is_subifd() :
- # recurse
- dump_ifd(exif, 0, tag.subifd, indent + 1)
-
- else :
- # dump each value
- values = exif.tag_values(tag)
-
- for i, value in enumerate(values) :
- print "%s\t%02d: %.120r" % ('\t'*indent, i, value)
-
- # and then the readable one
- print "%s\t-> %.120s" % ('\t'*indent, tag.readable_value(values), )
-
-
-def dump_ifd (exif, i, ifd, indent=1) :
- """
- Dump the given IFD, recursively
- """
-
- print "%sIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % (
- '\t'*indent,
- i,
- ifd.offset, ifd.offset + exif.offset,
- ifd.count,
- ifd.next_offset
- )
-
- for i, tag in enumerate(ifd.tags) :
- # dump
- dump_tag(exif, i, tag, indent + 1)
-
-
-def dump_exif (exif) :
- """
- Dump all tags from the given EXIF object to stdout
- """
-
- print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size)
-
- for i, ifd in enumerate(exif.ifds) :
- # dump
- dump_ifd(exif, i, ifd)
-
-
-def list_tags (exif) :
- """
- Print a neat listing of tags to stdout
- """
-
- for k, v in exif.get_main_tags().iteritems() :
- print "%30s: %s" % (k, v)
-
-def main_path (path, dump) :
- # dump path
- print "%s: " % path
-
- # try and load it
- exif = load_path(path)
-
- if not exif :
- raise Exception("No EXIF data found")
-
- if dump :
- # dump everything
- dump_exif(exif)
-
- else :
- # list them
- list_tags(exif)
-
-
-def main (paths, dump=False) :
- """
- Load and dump EXIF data from the given path
- """
-
- # handle each one
- for path in paths :
- main_path(path, dump=dump)
-
-if __name__ == '__main__' :
- import getopt
- from sys import argv
-
- # defaults
- dump = False
-
- # parse args
- opts, args = getopt.getopt(argv[1:], "d", ["dump"])
-
- for opt, val in opts :
- if opt in ('-d', "--dump") :
- dump = True
-
- main(args, dump=dump)
-