author | Tero Marttila <terom@fixme.fi> |
Sat, 13 Jun 2009 20:59:53 +0300 | |
branch | new-exif |
changeset 105 | effae6f38749 |
parent 104 | 6afe59e5ffae |
child 106 | a4f605bd122c |
permissions | -rw-r--r-- |
102 | 1 |
""" |
2 |
A custom EXIF parsing module, aimed at high performance. |
|
3 |
""" |
|
4 |
||
5 |
import struct, mmap, os |
|
6 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
7 |
from utils import lazy_load, lazy_load_iter |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
8 |
|
102 | 9 |
def read_struct (file, fmt) : |
10 |
""" |
|
11 |
Utility function to read data from the a file using struct |
|
12 |
""" |
|
13 |
||
14 |
# length of data |
|
15 |
fmt_size = struct.calcsize(fmt) |
|
16 |
||
17 |
# get data |
|
18 |
file_data = file.read(fmt_size) |
|
19 |
||
20 |
# unpack single item, this should raise an error if file_data is too short |
|
21 |
return struct.unpack(fmt, file_data) |
|
22 |
||
23 |
class Buffer (object) : |
|
24 |
""" |
|
25 |
Wraps a buffer object (anything that supports the python buffer protocol) for read-only access. |
|
26 |
||
27 |
Includes an offset for relative values, and an endianess for reading binary data. |
|
28 |
""" |
|
29 |
||
30 |
def __init__ (self, obj, offset=None, size=None, struct_prefix='=') : |
|
31 |
""" |
|
32 |
Create a new Buffer object with a new underlying buffer, created from the given object, offset and size. |
|
33 |
||
34 |
The endiannes is given in the form of a struct-module prefix, which should be one of '<' or '>'. |
|
35 |
Standard size/alignment are assumed. |
|
36 |
""" |
|
37 |
||
38 |
# store |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
39 |
self.buf = buffer(obj, *(arg for arg in (offset, size) if arg is not None)) |
102 | 40 |
self.offset = offset |
41 |
self.size = size |
|
42 |
self.prefix = struct_prefix |
|
43 |
||
44 |
def subregion (self, offset, length=None) : |
|
45 |
""" |
|
46 |
Create a new sub-Buffer referencing a view of this buffer, at the given offset, and with the given |
|
47 |
length, if any, and the same struct_prefix. |
|
48 |
""" |
|
49 |
||
50 |
return Buffer(self.buf, offset, length, struct_prefix=self.prefix) |
|
51 |
||
52 |
def pread (self, offset, length) : |
|
53 |
""" |
|
54 |
Read a random-access region of raw data |
|
55 |
""" |
|
56 |
||
57 |
return self.buf[offset:offset + length] |
|
58 |
||
59 |
def pread_struct (self, offset, fmt) : |
|
60 |
""" |
|
61 |
Read structured data using the given struct format from the given offset. |
|
62 |
""" |
|
63 |
||
64 |
return struct.unpack_from(self.prefix + fmt, self.buf, offset=offset) |
|
65 |
||
66 |
def pread_item (self, offset, fmt) : |
|
67 |
""" |
|
68 |
Read a single item of structured data from the given offset. |
|
69 |
""" |
|
70 |
||
71 |
value, = self.pread_struct(offset, fmt) |
|
72 |
||
73 |
return value |
|
74 |
||
75 |
def iter_offsets (self, count, size, offset=0) : |
|
76 |
""" |
|
77 |
Yield a series of offsets for `count` items of `size` bytes, beginning at `offset`. |
|
78 |
""" |
|
79 |
||
80 |
return xrange(offset, offset + count * size, size) |
|
81 |
||
82 |
def item_size (self, fmt) : |
|
83 |
""" |
|
84 |
Returns the size in bytes of the given item format |
|
85 |
""" |
|
86 |
||
87 |
return struct.calcsize(self.prefix + fmt) |
|
88 |
||
89 |
def unpack_item (self, fmt, data) : |
|
90 |
""" |
|
91 |
Unpacks a single item from the given data |
|
92 |
""" |
|
93 |
||
94 |
value, = struct.unpack(self.prefix + fmt, data) |
|
95 |
||
96 |
return value |
|
97 |
||
98 |
def mmap_buffer (file, size) : |
|
99 |
""" |
|
100 |
Create and return a new read-only mmap'd region |
|
101 |
""" |
|
102 |
||
103 |
return mmap.mmap(file.fileno(), size, access=mmap.ACCESS_READ) |
|
104 |
||
105 |
import exif_data |
|
106 |
||
107 |
class Tag (object) : |
|
108 |
""" |
|
109 |
Represents a single Tag in an IFD |
|
110 |
""" |
|
111 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
112 |
def __init__ (self, ifd, offset, tag, type, count, data_raw) : |
102 | 113 |
""" |
114 |
Build a Tag with the given binary items from the IFD entry |
|
115 |
""" |
|
116 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
117 |
self.ifd = ifd |
102 | 118 |
self.offset = offset |
119 |
self.tag = tag |
|
120 |
self.type = type |
|
121 |
self.count = count |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
122 |
self.data_raw = data_raw |
102 | 123 |
|
124 |
# lookup the type for this tag |
|
125 |
self.type_data = exif_data.FIELD_TYPES.get(type) |
|
126 |
||
127 |
# unpack it |
|
128 |
if self.type_data : |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
129 |
self.type_format, self.type_name, self.type_func = self.type_data |
102 | 130 |
|
131 |
# lookup the tag data for this tag |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
132 |
self.tag_data = self.ifd.tag_dict.get(tag) |
102 | 133 |
|
134 |
@property |
|
135 |
def name (self) : |
|
136 |
""" |
|
137 |
Lookup the name of this tag via its code, returns None if unknown. |
|
138 |
""" |
|
139 |
||
140 |
if self.tag_data : |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
141 |
return self.tag_data.name |
102 | 142 |
|
143 |
else : |
|
144 |
return None |
|
145 |
||
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
146 |
def process_values (self, raw_values) : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
147 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
148 |
Process the given raw values unpacked from the file. |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
149 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
150 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
151 |
if self.type_data and self.type_func : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
152 |
# use the filter func |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
153 |
return self.type_func(raw_values) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
154 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
155 |
else : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
156 |
# nada, just leave them |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
157 |
return raw_values |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
158 |
|
104 | 159 |
def readable_value (self, values) : |
102 | 160 |
""" |
104 | 161 |
Convert the given values for this tag into a human-readable string. |
102 | 162 |
|
104 | 163 |
Returns the comma-separated values by default. |
102 | 164 |
""" |
165 |
||
104 | 166 |
if self.tag_data : |
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
167 |
# map it |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
168 |
return self.tag_data.map_values(values) |
102 | 169 |
|
170 |
else : |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
171 |
# default value-mapping |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
172 |
return ", ".join(str(value) for value in values) |
102 | 173 |
|
174 |
# size of an IFD entry in bytes |
|
175 |
IFD_ENTRY_SIZE = 12 |
|
176 |
||
177 |
class IFD (Buffer) : |
|
178 |
""" |
|
179 |
Represents an IFD (Image file directory) region in EXIF data. |
|
180 |
""" |
|
181 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
182 |
def __init__ (self, buffer, tag_dict, **buffer_opts) : |
102 | 183 |
""" |
184 |
Access the IFD data from the given bufferable object with given buffer opts. |
|
185 |
||
186 |
This will read the `count` and `next_offset` values. |
|
187 |
""" |
|
188 |
||
189 |
# init |
|
190 |
super(IFD, self).__init__(buffer, **buffer_opts) |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
191 |
|
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
192 |
# store |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
193 |
self.tag_dict = tag_dict |
102 | 194 |
|
195 |
# read header |
|
196 |
self.count = self.pread_item(0, 'H') |
|
197 |
||
198 |
# read next-offset |
|
199 |
self.next_offset = self.pread_item(0x02 + self.count * IFD_ENTRY_SIZE, 'I') |
|
200 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
201 |
@lazy_load_iter |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
202 |
def tags (self) : |
102 | 203 |
""" |
204 |
Iterate over all the Tag objects in this IFD |
|
205 |
""" |
|
206 |
||
207 |
# read each tag |
|
208 |
for offset in self.iter_offsets(self.count, IFD_ENTRY_SIZE, 0x02) : |
|
209 |
# read the tag data |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
210 |
tag, type, count, data_raw = self.pread_struct(offset, 'HHI4s') |
102 | 211 |
|
212 |
# yield the new Tag |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
213 |
yield Tag(self, offset, tag, type, count, data_raw) |
102 | 214 |
|
215 |
class EXIF (Buffer) : |
|
216 |
""" |
|
217 |
Represents the EXIF data embedded in some image file in the form of a Region. |
|
218 |
""" |
|
219 |
||
220 |
def __init__ (self, buffer, tags=None, **buffer_opts) : |
|
221 |
""" |
|
222 |
Access the EXIF data from the given bufferable object with the given buffer options. |
|
223 |
||
224 |
`tags`, if given, specifies that only the given named tags should be loaded. |
|
225 |
""" |
|
226 |
||
227 |
# init Buffer |
|
228 |
super(EXIF, self).__init__(buffer, **buffer_opts) |
|
229 |
||
230 |
# store |
|
231 |
self.buffer = buffer |
|
232 |
||
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
233 |
@lazy_load_iter |
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
234 |
def ifds (self) : |
102 | 235 |
""" |
104 | 236 |
Iterate over the primary IFDs in this EXIF. |
102 | 237 |
""" |
238 |
||
239 |
# starting offset |
|
240 |
offset = self.pread_item(0x04, 'I') |
|
241 |
||
242 |
while offset : |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
243 |
# create and read the IFD, operating on the right sub-buffer |
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
244 |
ifd = IFD(self.buf, exif_data.EXIF_TAGS, offset=offset) |
102 | 245 |
|
246 |
# yield it |
|
247 |
yield ifd |
|
248 |
||
249 |
# skip to next offset |
|
250 |
offset = ifd.next_offset |
|
251 |
||
104 | 252 |
def iter_all_ifds (self) : |
253 |
""" |
|
254 |
Iterate over all of the IFDs contained within this EXIF, or within other IFDs. |
|
255 |
""" |
|
102 | 256 |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
257 |
def tag_data_info (self, tag) : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
258 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
259 |
Calculate the location, format and size of the given tag's data. |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
260 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
261 |
Returns a (fmt, offset, size) tuple. |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
262 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
263 |
# unknown tag? |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
264 |
if not tag.type_data : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
265 |
return None |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
266 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
267 |
# data format |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
268 |
if len(tag.type_format) == 1 : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
269 |
# let struct handle the count |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
270 |
fmt = "%d%s" % (tag.count, tag.type_format) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
271 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
272 |
else : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
273 |
# handle the count ourselves |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
274 |
fmt = tag.type_format * tag.count |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
275 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
276 |
# size of the data |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
277 |
size = self.item_size(fmt) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
278 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
279 |
# inline or external? |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
280 |
if size > 0x04 : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
281 |
# point at the external data |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
282 |
offset = self.unpack_item('I', tag.data_raw) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
283 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
284 |
else : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
285 |
# point at the inline data |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
286 |
offset = tag.offset + 0x08 |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
287 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
288 |
return fmt, offset, size |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
289 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
290 |
def tag_values_raw (self, tag) : |
102 | 291 |
""" |
292 |
Get the raw values for the given tag as a tuple. |
|
293 |
||
294 |
Returns None if the tag could not be recognized. |
|
295 |
""" |
|
296 |
||
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
297 |
# find the data |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
298 |
data_info = self.tag_data_info(tag) |
102 | 299 |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
300 |
# not found? |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
301 |
if not data_info : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
302 |
return None |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
303 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
304 |
# unpack |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
305 |
data_fmt, data_offset, data_size = data_info |
102 | 306 |
|
307 |
# read values |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
308 |
return self.pread_struct(data_offset, data_fmt) |
102 | 309 |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
310 |
def tag_values (self, tag) : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
311 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
312 |
Gets the processed values for the given tag as a list. |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
313 |
""" |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
314 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
315 |
# read + process |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
316 |
return tag.process_values(self.tag_values_raw(tag)) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
317 |
|
102 | 318 |
def tag_value (self, tag) : |
319 |
""" |
|
320 |
Return the human-readable string value for the given tag. |
|
321 |
""" |
|
322 |
||
323 |
# load the raw values |
|
324 |
values = self.tag_values(tag) |
|
325 |
||
326 |
# unknown? |
|
327 |
if not values : |
|
328 |
return "" |
|
329 |
||
330 |
# return as comma-separated formatted string, yes |
|
104 | 331 |
return tag.readable_value(values) |
102 | 332 |
|
333 |
# mapping from two-byte TIFF byte order marker to struct prefix |
|
334 |
TIFF_BYTE_ORDER = { |
|
335 |
'II': '<', |
|
336 |
'MM': '>', |
|
337 |
} |
|
338 |
||
339 |
# "An arbitrary but carefully chosen number (42) that further identifies the file as a TIFF file" |
|
340 |
TIFF_BYTEORDER_MAGIC = 42 |
|
341 |
||
342 |
def tiff_load (file, length=0, **opts) : |
|
343 |
""" |
|
344 |
Load the Exif/TIFF data from the given file at its current position with optional length, using exif_load. |
|
345 |
""" |
|
346 |
||
347 |
# all Exif data offsets are relative to the beginning of this TIFF header |
|
348 |
offset = file.tell() |
|
349 |
||
350 |
# mmap the region for the EXIF data |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
351 |
buffer = mmap_buffer(file, length) |
102 | 352 |
|
353 |
# read byte-order header |
|
354 |
byte_order = file.read(2) |
|
355 |
||
356 |
# map to struct prefix |
|
357 |
struct_prefix = TIFF_BYTE_ORDER[byte_order] |
|
358 |
||
359 |
# validate |
|
360 |
check_value, = read_struct(file, struct_prefix + 'H') |
|
361 |
||
362 |
if check_value != TIFF_BYTEORDER_MAGIC : |
|
363 |
raise Exception("Invalid byte-order for TIFF: %2c -> %d" % (byte_order, check_value)) |
|
364 |
||
365 |
# build and return the EXIF object with the correct offset/size from the mmap region |
|
366 |
return EXIF(buffer, offset=offset, size=length, **opts) |
|
367 |
||
368 |
# the JPEG markers that don't have any data |
|
369 |
JPEG_NOSIZE_MARKERS = (0xD8, 0xD9) |
|
370 |
||
371 |
# the first marker in a JPEG File |
|
372 |
JPEG_START_MARKER = 0xD8 |
|
373 |
||
374 |
# the JPEG APP1 marker used for EXIF |
|
375 |
JPEG_EXIF_MARKER = 0xE1 |
|
376 |
||
377 |
# the JPEG APP1 Exif header |
|
378 |
JPEG_EXIF_HEADER = "Exif\x00\x00" |
|
379 |
||
380 |
def jpeg_markers (file) : |
|
381 |
""" |
|
382 |
Iterate over the JPEG markers in the given file, yielding (type_byte, size) tuples. |
|
383 |
||
384 |
The size fields will be 0 for markers with no data. The file will be positioned at the beginning of the data |
|
385 |
region, and may be seek'd around if needed. |
|
386 |
||
387 |
XXX: find a real implementation of this somewhere? |
|
388 |
""" |
|
389 |
||
390 |
while True : |
|
391 |
# read type |
|
392 |
marker_byte, marker_type = read_struct(file, '!BB') |
|
393 |
||
394 |
# validate |
|
395 |
if marker_byte != 0xff : |
|
396 |
raise Exception("Not a JPEG marker: %x%x" % (marker_byte, marker_type)) |
|
397 |
||
398 |
# special cases for no data |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
399 |
if marker_type in JPEG_NOSIZE_MARKERS : |
102 | 400 |
size = 0 |
401 |
||
402 |
else : |
|
403 |
# read size field |
|
404 |
size, = read_struct(file, '!H') |
|
405 |
||
406 |
# validate |
|
407 |
if size < 0x02 : |
|
408 |
raise Exception("Invalid size for marker %x%x: %x" % (marker_byte, marker_type, size)) |
|
409 |
||
410 |
else : |
|
411 |
# do not count the size field itself |
|
412 |
size = size - 2 |
|
413 |
||
414 |
# ok, data is at current position |
|
415 |
offset = file.tell() |
|
416 |
||
417 |
# yield |
|
418 |
yield marker_type, size |
|
419 |
||
420 |
# absolute seek to next marker |
|
421 |
file.seek(offset + size) |
|
422 |
||
423 |
def jpeg_find_exif (file) : |
|
424 |
""" |
|
425 |
Find the Exif/TIFF section in the given JPEG file. |
|
426 |
||
427 |
If found, the file will be seek'd to the start of the Exif/TIFF header, and the size of the Exif/TIFF data will |
|
428 |
be returned. |
|
429 |
||
430 |
Returns None if no EXIF section was found. |
|
431 |
""" |
|
432 |
||
433 |
for count, (marker, size) in enumerate(jpeg_markers(file)) : |
|
434 |
# verify that it's a JPEG file |
|
435 |
if count == 0 : |
|
436 |
# must start with the right marker |
|
437 |
if marker != JPEG_START_MARKER : |
|
438 |
raise Exception("JPEG file must start with 0xFF%02x marker" % (marker, )) |
|
439 |
||
440 |
# look for APP1 marker (0xE1) with EXIF signature |
|
441 |
elif marker == JPEG_EXIF_MARKER and file.read(len(JPEG_EXIF_HEADER)) == JPEG_EXIF_HEADER: |
|
442 |
# skipped the initial Exif marker signature |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
443 |
return size - len(JPEG_EXIF_HEADER) |
102 | 444 |
|
445 |
# nothing |
|
446 |
return None |
|
447 |
||
448 |
def jpeg_load (file, **opts) : |
|
449 |
""" |
|
450 |
Loads the embedded Exif TIFF data from the given JPEG file using tiff_load. |
|
451 |
||
452 |
Returns None if no EXIF data could be found. |
|
453 |
""" |
|
454 |
||
455 |
# look for the right section |
|
456 |
size = jpeg_find_exif(file) |
|
457 |
||
458 |
# not found? |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
459 |
if not size : |
102 | 460 |
# nothing |
461 |
return |
|
462 |
||
463 |
else : |
|
464 |
# load it as TIFF data |
|
465 |
return tiff_load(file, size, **opts) |
|
466 |
||
467 |
def load_path (path, **opts) : |
|
468 |
""" |
|
469 |
Loads an EXIF object from the given filesystem path. |
|
470 |
||
471 |
Returns None if it could not be parsed. |
|
472 |
""" |
|
473 |
||
474 |
# file extension |
|
475 |
root, fext = os.path.splitext(path) |
|
476 |
||
477 |
# map |
|
478 |
func = { |
|
479 |
'.jpeg': jpeg_load, |
|
480 |
'.jpg': jpeg_load, |
|
481 |
'.tiff': tiff_load, # XXX: untested |
|
482 |
}.get(fext.lower()) |
|
483 |
||
484 |
# not recognized? |
|
485 |
if not func : |
|
486 |
# XXX: sniff the file |
|
487 |
return None |
|
488 |
||
489 |
# open it |
|
490 |
file = open(path, 'rb') |
|
491 |
||
492 |
# try and load it |
|
493 |
return func(file, **opts) |
|
494 |
||
495 |
def dump_exif (exif) : |
|
496 |
""" |
|
497 |
Dump all tags from the given EXIF object to stdout |
|
498 |
""" |
|
499 |
||
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
500 |
print "EXIF offset=%#08x, size=%d:" % (exif.offset, exif.size) |
102 | 501 |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
502 |
for i, ifd in enumerate(exif.ifds) : |
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
503 |
print "\tIFD:%d offset=%#04x(%#08x), count=%d, next=%d:" % ( |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
504 |
i, |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
505 |
ifd.offset, ifd.offset + exif.offset, |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
506 |
ifd.count, |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
507 |
ifd.next_offset |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
508 |
) |
102 | 509 |
|
105
effae6f38749
refactor exif_data to use a Tag class for each tag, instead of a magic tuple
Tero Marttila <terom@fixme.fi>
parents:
104
diff
changeset
|
510 |
for i, tag in enumerate(ifd.tags) : |
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
511 |
data_info = exif.tag_data_info(tag) |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
512 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
513 |
if data_info : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
514 |
data_fmt, data_offset, data_size = data_info |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
515 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
516 |
else : |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
517 |
data_fmt = data_offset = data_size = None |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
518 |
|
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
519 |
print "\t\tTag:%d offset=%#04x(%#08x), tag=%d/%s, type=%d/%s, count=%d, fmt=%s, offset=%#04x, size=%s:" % ( |
102 | 520 |
i, |
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
521 |
tag.offset, tag.offset + exif.offset, |
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
522 |
tag.tag, tag.name or '???', |
102 | 523 |
tag.type, tag.type_name if tag.type_data else '???', |
524 |
tag.count, |
|
103
63e89dc2d6f1
new exif.py seems to work now, although still missing sub-IFDs
Tero Marttila <terom@fixme.fi>
parents:
102
diff
changeset
|
525 |
data_fmt, data_offset, data_size, |
102 | 526 |
) |
104 | 527 |
|
528 |
values = exif.tag_values(tag) |
|
102 | 529 |
|
104 | 530 |
for i, value in enumerate(values) : |
531 |
print "\t\t\t%02d: %r" % (i, value) |
|
102 | 532 |
|
104 | 533 |
print "\t\t\t-> %s" % (tag.readable_value(values), ) |
534 |
||
535 |
def main (path, quiet=False) : |
|
102 | 536 |
""" |
537 |
Load and dump EXIF data from the given path |
|
538 |
""" |
|
539 |
||
540 |
# try and load it |
|
541 |
exif = load_path(path) |
|
542 |
||
543 |
if not exif : |
|
544 |
raise Exception("No EXIF data found") |
|
545 |
||
104 | 546 |
if not quiet : |
547 |
# dump it |
|
548 |
print "%s: " % path |
|
549 |
||
102 | 550 |
|
104 | 551 |
dump_exif(exif) |
102 | 552 |
|
553 |
if __name__ == '__main__' : |
|
554 |
from sys import argv |
|
555 |
||
104 | 556 |
main(argv[1], '-q' in argv) |
102 | 557 |