|
1 """ |
|
2 A custom EXIF parsing module, aimed at high performance. |
|
3 """ |
|
4 |
|
5 import struct, mmap, os |
|
6 |
|
7 def read_struct (file, fmt) : |
|
8 """ |
|
9 Utility function to read data from the a file using struct |
|
10 """ |
|
11 |
|
12 # length of data |
|
13 fmt_size = struct.calcsize(fmt) |
|
14 |
|
15 # get data |
|
16 file_data = file.read(fmt_size) |
|
17 |
|
18 # unpack single item, this should raise an error if file_data is too short |
|
19 return struct.unpack(fmt, file_data) |
|
20 |
|
21 class Buffer (object) : |
|
22 """ |
|
23 Wraps a buffer object (anything that supports the python buffer protocol) for read-only access. |
|
24 |
|
25 Includes an offset for relative values, and an endianess for reading binary data. |
|
26 """ |
|
27 |
|
28 def __init__ (self, obj, offset=None, size=None, struct_prefix='=') : |
|
29 """ |
|
30 Create a new Buffer object with a new underlying buffer, created from the given object, offset and size. |
|
31 |
|
32 The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'. |
|
33 Standard size/alignment are assumed. |
|
34 """ |
|
35 |
|
36 # store |
|
37 self.buf = buffer(obj, offset, size) |
|
38 self.offset = offset |
|
39 self.size = size |
|
40 self.prefix = struct_prefix |
|
41 |
|
42 def subregion (self, offset, length=None) : |
|
43 """ |
|
44 Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given |
|
45 length, if any, and the same struct_prefix. |
|
46 """ |
|
47 |
|
48 return Buffer(self.buf, offset, length, struct_prefix=self.prefix) |
|
49 |
|
50 def pread (self, offset, length) : |
|
51 """ |
|
52 Read a random-access region of raw data |
|
53 """ |
|
54 |
|
55 return self.buf[offset:offset + length] |
|
56 |
|
57 def pread_struct (self, offset, fmt) : |
|
58 """ |
|
59 Read structured data using the given struct format from the given offset. |
|
60 """ |
|
61 |
|
62 return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset) |
|
63 |
|
64 def pread_item (self, offset, fmt) : |
|
65 """ |
|
66 Read a single item of structured data from the given offset. |
|
67 """ |
|
68 |
|
69 value, = self.pread_struct(offset, fmt) |
|
70 |
|
71 return value |
|
72 |
|
73 def iter_offsets (self, count, size, offset=0) : |
|
74 """ |
|
75 Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`. |
|
76 """ |
|
77 |
|
78 return xrange(offset, offset + count * size, size) |
|
79 |
|
80 def item_size (self, fmt) : |
|
81 """ |
|
82 Returns the size in bytes of the given item format |
|
83 """ |
|
84 |
|
85 return struct.calcsize(self.prefix + fmt) |
|
86 |
|
87 def unpack_item (self, fmt, data) : |
|
88 """ |
|
89 Unpacks a single item from the given data |
|
90 """ |
|
91 |
|
92 value, = struct.unpack(self.prefix + fmt, data) |
|
93 |
|
94 return value |
|
95 |
|
96 def mmap_buffer (file, size) : |
|
97 """ |
|
98 Create and return a new read-only mmap'd region |
|
99 """ |
|
100 |
|
101 return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ) |
|
102 |
|
103 import exif_data |
|
104 |
|
105 class Tag (object) : |
|
106 """ |
|
107 Represents a single Tag in an IFD |
|
108 """ |
|
109 |
|
110 def __init__ (self, offset, tag, type, count, value_ref) : |
|
111 """ |
|
112 Build a Tag with the given binary items from the IFD entry |
|
113 """ |
|
114 |
|
115 self.offset = offset |
|
116 self.tag = tag |
|
117 self.type = type |
|
118 self.count = count |
|
119 self.value_ref = value_ref |
|
120 |
|
121 # lookup the type for this tag |
|
122 self.type_data = exif_data.FIELD_TYPES.get(type) |
|
123 |
|
124 # unpack it |
|
125 if self.type_data : |
|
126 self.type_format, self.type_name = self.type_data |
|
127 |
|
128 # lookup the tag data for this tag |
|
129 self.tag_data = exif_data.EXIF_TAGS.get(tag) |
|
130 |
|
131 # unpack it |
|
132 if self.tag_data : |
|
133 # the EXIF tag name |
|
134 self.tag_name = tag_data[0] |
|
135 |
|
136 # the optional value formatting specification |
|
137 if len(self.tag_data) > 1 : |
|
138 self.tag_value_spec = self.tag_data[1] |
|
139 |
|
140 else : |
|
141 self.tag_value_spec = None |
|
142 |
|
143 @property |
|
144 def name (self) : |
|
145 """ |
|
146 Lookup the name of this tag via its code, returns None if unknown. |
|
147 """ |
|
148 |
|
149 if self.tag_data : |
|
150 return self.tag_name |
|
151 |
|
152 else : |
|
153 return None |
|
154 |
|
155 def readable_value (self, value) : |
|
156 """ |
|
157 Convert the given value for this tag into a human-readable string. |
|
158 |
|
159 Returns the value itself by default. |
|
160 """ |
|
161 |
|
162 if self.tag_data and self.tag_value_spec : |
|
163 # map it |
|
164 return exif_data.tag_value(self.tag_value_spec, value) |
|
165 |
|
166 else : |
|
167 # nope... |
|
168 return value |
|
169 |
|
170 # size of an IFD entry in bytes |
|
171 IFD_ENTRY_SIZE = 12 |
|
172 |
|
173 class IFD (Buffer) : |
|
174 """ |
|
175 Represents an IFD (Image file directory) region in EXIF data. |
|
176 """ |
|
177 |
|
178 def __init__ (self, buffer, **buffer_opts) : |
|
179 """ |
|
180 Access the IFD data from the given bufferable object with given buffer opts. |
|
181 |
|
182 This will read the `count` and `next_offset` values. |
|
183 """ |
|
184 |
|
185 # init |
|
186 super(IFD, self).__init__(buffer, **buffer_opts) |
|
187 |
|
188 # read header |
|
189 self.count = self.pread_item(0, 'H') |
|
190 |
|
191 # read next-offset |
|
192 self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I') |
|
193 |
|
194 def iter_tags (self) : |
|
195 """ |
|
196 Iterate over all the Tag objects in this IFD |
|
197 """ |
|
198 |
|
199 # read each tag |
|
200 for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : |
|
201 # read the tag data |
|
202 tag, type, count, value_ref = self.pread_struct(offset, 'HHII') |
|
203 |
|
204 # yield the new Tag |
|
205 yield Tag(offset, tag, type, count, value_ref) |
|
206 |
|
207 class EXIF (Buffer) : |
|
208 """ |
|
209 Represents the EXIF data embedded in some image file in the form of a Region. |
|
210 """ |
|
211 |
|
212 def __init__ (self, buffer, tags=None, **buffer_opts) : |
|
213 """ |
|
214 Access the EXIF data from the given bufferable object with the given buffer options. |
|
215 |
|
216 `tags`, if given, specifies that only the given named tags should be loaded. |
|
217 """ |
|
218 |
|
219 # init Buffer |
|
220 super(EXIF, self).__init__(buffer, **buffer_opts) |
|
221 |
|
222 # store |
|
223 self.buffer = buffer |
|
224 |
|
225 def iter_ifds (self) : |
|
226 """ |
|
227 Iterate over all of the IFD objects in this EXIF. |
|
228 """ |
|
229 |
|
230 # starting offset |
|
231 offset = self.pread_item(0x04, 'I') |
|
232 |
|
233 while offset : |
|
234 # create and read the IFD |
|
235 ifd = IFD(self, offset=offset) |
|
236 |
|
237 # yield it |
|
238 yield ifd |
|
239 |
|
240 # skip to next offset |
|
241 offset = ifd.next_offset |
|
242 |
|
243 __iter__ = iter_ifds |
|
244 |
|
245 def tag_values (self, tag) : |
|
246 """ |
|
247 Get the raw values for the given tag as a tuple. |
|
248 |
|
249 Returns None if the tag could not be recognized. |
|
250 """ |
|
251 |
|
252 # unknown tag? |
|
253 if not tag.type_data : |
|
254 return None |
|
255 |
|
256 # size of the data |
|
257 data_size = tag.count * self.item_size(tag.type_format) |
|
258 |
|
259 # inline or external? |
|
260 if data_size > 0x04 : |
|
261 # point at the external data |
|
262 offset = self.unpack_item('I', tag.value_ref) |
|
263 |
|
264 else : |
|
265 # point at the inline data |
|
266 offset = tag.offset + 0x08 |
|
267 |
|
268 # read values |
|
269 return self.pread_struct(offset, "%d%s" % (tag.count, tag.type_format)) |
|
270 |
|
271 def tag_value (self, tag) : |
|
272 """ |
|
273 Return the human-readable string value for the given tag. |
|
274 """ |
|
275 |
|
276 # load the raw values |
|
277 values = self.tag_values(tag) |
|
278 |
|
279 # unknown? |
|
280 if not values : |
|
281 return "" |
|
282 |
|
283 # return as comma-separated formatted string, yes |
|
284 return ", ".join(tag.readable_value(value) for value in values) |
|
285 |
|
286 # mapping from two-byte TIFF byte order marker to struct prefix |
|
287 TIFF_BYTE_ORDER = { |
|
288 'II': '<', |
|
289 'MM': '>', |
|
290 } |
|
291 |
|
292 # "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file" |
|
293 TIFF_BYTEORDER_MAGIC = 42 |
|
294 |
|
295 def tiff_load (file, length=0, **opts) : |
|
296 """ |
|
297 Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load. |
|
298 """ |
|
299 |
|
300 # all Exif data offsets are relative to the beginning of this TIFF header |
|
301 offset = file.tell() |
|
302 |
|
303 # mmap the region for the EXIF data |
|
304 buffer = mmap_region(file, length) |
|
305 |
|
306 # read byte-order header |
|
307 byte_order = file.read(2) |
|
308 |
|
309 # map to struct prefix |
|
310 struct_prefix = TIFF_BYTE_ORDER[byte_order] |
|
311 |
|
312 # validate |
|
313 check_value, = read_struct(file, struct_prefix + 'H') |
|
314 |
|
315 if check_value != TIFF_BYTEORDER_MAGIC : |
|
316 raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value)) |
|
317 |
|
318 # build and return the EXIF object with the correct offset/size from the mmap region |
|
319 return EXIF(buffer, offset=offset, size=length, **opts) |
|
320 |
|
321 # the JPEG markers that don't have any data |
|
322 JPEG_NOSIZE_MARKERS = (0xD8, 0xD9) |
|
323 |
|
324 # the first marker in a JPEG File |
|
325 JPEG_START_MARKER = 0xD8 |
|
326 |
|
327 # the JPEG APP1 marker used for EXIF |
|
328 JPEG_EXIF_MARKER = 0xE1 |
|
329 |
|
330 # the JPEG APP1 Exif header |
|
331 JPEG_EXIF_HEADER = "Exif\x00\x00" |
|
332 |
|
333 def jpeg_markers (file) : |
|
334 """ |
|
335 Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples. |
|
336 |
|
337 The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data |
|
338 region, and may be seek'd around if needed. |
|
339 |
|
340 XXX: find a real implementation of this somewhere? |
|
341 """ |
|
342 |
|
343 while True : |
|
344 # read type |
|
345 marker_byte, marker_type = read_struct(file, '!BB') |
|
346 |
|
347 # validate |
|
348 if marker_byte != 0xff : |
|
349 raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) |
|
350 |
|
351 # special cases for no data |
|
352 if marker_byte in JPEG_NOSIZE_MARKERS : |
|
353 size = 0 |
|
354 |
|
355 else : |
|
356 # read size field |
|
357 size, = read_struct(file, '!H') |
|
358 |
|
359 # validate |
|
360 if size < 0x02 : |
|
361 raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size)) |
|
362 |
|
363 else : |
|
364 # do not count the size field itself |
|
365 size = size - 2 |
|
366 |
|
367 # ok, data is at current position |
|
368 offset = file.tell() |
|
369 |
|
370 # yield |
|
371 yield marker_type, size |
|
372 |
|
373 # absolute seek to next marker |
|
374 file.seek(offset + size) |
|
375 |
|
376 def jpeg_find_exif (file) : |
|
377 """ |
|
378 Find the Exif/TIFF section in the given JPEG file. |
|
379 |
|
380 If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will |
|
381 be returned. |
|
382 |
|
383 Returns None if no EXIF section was found. |
|
384 """ |
|
385 |
|
386 for count, (marker, size) in enumerate(jpeg_markers(file)) : |
|
387 # verify that it's a JPEG file |
|
388 if count == 0 : |
|
389 # must start with the right marker |
|
390 if marker != JPEG_START_MARKER : |
|
391 raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, )) |
|
392 |
|
393 # look for APP1 marker (0xE1) with EXIF signature |
|
394 elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: |
|
395 # skipped the initial Exif marker signature |
|
396 return size - JPEG_EXIF_HEADER |
|
397 |
|
398 # nothing |
|
399 return None |
|
400 |
|
401 def jpeg_load (file, **opts) : |
|
402 """ |
|
403 Loads the embedded Exif TIFF data from the given JPEG file using tiff_load. |
|
404 |
|
405 Returns None if no EXIF data could be found. |
|
406 """ |
|
407 |
|
408 # look for the right section |
|
409 size = jpeg_find_exif(file) |
|
410 |
|
411 # not found? |
|
412 if not res : |
|
413 # nothing |
|
414 return |
|
415 |
|
416 else : |
|
417 # load it as TIFF data |
|
418 return tiff_load(file, size, **opts) |
|
419 |
|
420 def load_path (path, **opts) : |
|
421 """ |
|
422 Loads an EXIF object from the given filesystem path. |
|
423 |
|
424 Returns None if it could not be parsed. |
|
425 """ |
|
426 |
|
427 # file extension |
|
428 root, fext = os.path.splitext(path) |
|
429 |
|
430 # map |
|
431 func = { |
|
432 '.jpeg': jpeg_load, |
|
433 '.jpg': jpeg_load, |
|
434 '.tiff': tiff_load, # XXX: untested |
|
435 }.get(fext.lower()) |
|
436 |
|
437 # not recognized? |
|
438 if not func : |
|
439 # XXX: sniff the file |
|
440 return None |
|
441 |
|
442 # open it |
|
443 file = open(path, 'rb') |
|
444 |
|
445 # try and load it |
|
446 return func(file, **opts) |
|
447 |
|
448 def dump_exif (exif) : |
|
449 """ |
|
450 Dump all tags from the given EXIF object to stdout |
|
451 """ |
|
452 |
|
453 print "EXIF offset=%d, size=%d:" % (exif.offset, exif.size) |
|
454 |
|
455 for i, ifd in enumerate(exif.iter_ifds()) : |
|
456 print "\tIFD %d, offset=%d, size=%d, count=%d, next=%d:" % (i, ifd.offset, ifd.size, ifd.count, ifd.next_offset) |
|
457 |
|
458 for i, tag in enumerate(exif.iter_tags()) : |
|
459 print "\t\tTag %d, offset=%d, tag=%d/%s, type=%d/%s, count=%d:" % ( |
|
460 i, |
|
461 tag.offset, |
|
462 tag.code, tag.name or '???', |
|
463 tag.type, tag.type_name if tag.type_data else '???', |
|
464 tag.count, |
|
465 ) |
|
466 |
|
467 for i, value in enumerate(exif.tag_values(tag)) : |
|
468 print "\t\t\t%02d: %s" % (i, tag.readable_value(value)) |
|
469 |
|
470 def main (path) : |
|
471 """ |
|
472 Load and dump EXIF data from the given path |
|
473 """ |
|
474 |
|
475 # try and load it |
|
476 exif = load_path(path) |
|
477 |
|
478 if not exif : |
|
479 raise Exception("No EXIF data found") |
|
480 |
|
481 # dump it |
|
482 print "%s: " % path |
|
483 print |
|
484 |
|
485 dump_exif(exif) |
|
486 |
|
487 if __name__ == '__main__' : |
|
488 from sys import argv |
|
489 |
|
490 main(argv[1]) |
|
491 |