]>
Commit | Line | Data |
---|---|---|
54256267 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import collections | |
4 | import io | |
5 | import struct | |
6 | import zlib | |
7 | ||
70f767dc PH |
8 | from .utils import ( |
9 | compat_str, | |
10 | ExtractorError, | |
11 | ) | |
54256267 PH |
12 | |
13 | ||
0cb20563 PH |
14 | def _extract_tags(file_contents): |
15 | if file_contents[1:3] != b'WS': | |
16 | raise ExtractorError( | |
17 | 'Not an SWF file; header is %r' % file_contents[:3]) | |
18 | if file_contents[:1] == b'C': | |
19 | content = zlib.decompress(file_contents[8:]) | |
20 | else: | |
21 | raise NotImplementedError( | |
22 | 'Unsupported compression format %r' % | |
23 | file_contents[:1]) | |
24 | ||
25 | # Determine number of bits in framesize rectangle | |
26 | framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 | |
27 | framesize_len = (5 + 4 * framesize_nbits + 7) // 8 | |
28 | ||
29 | pos = framesize_len + 2 + 2 | |
54256267 PH |
30 | while pos < len(content): |
31 | header16 = struct.unpack('<H', content[pos:pos + 2])[0] | |
32 | pos += 2 | |
33 | tag_code = header16 >> 6 | |
34 | tag_len = header16 & 0x3f | |
35 | if tag_len == 0x3f: | |
36 | tag_len = struct.unpack('<I', content[pos:pos + 4])[0] | |
37 | pos += 4 | |
0cb20563 PH |
38 | assert pos + tag_len <= len(content), \ |
39 | ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' | |
40 | % (tag_code, pos, tag_len, len(content))) | |
54256267 PH |
41 | yield (tag_code, content[pos:pos + tag_len]) |
42 | pos += tag_len | |
43 | ||
44 | ||
45 | class _AVMClass_Object(object): | |
46 | def __init__(self, avm_class): | |
47 | self.avm_class = avm_class | |
48 | ||
49 | def __repr__(self): | |
50 | return '%s#%x' % (self.avm_class.name, id(self)) | |
51 | ||
52 | ||
0d989011 PH |
53 | class _ScopeDict(dict): |
54 | def __init__(self, avm_class): | |
55 | super(_ScopeDict, self).__init__() | |
56 | self.avm_class = avm_class | |
57 | ||
58 | def __repr__(self): | |
59 | return '%s__Scope(%s)' % ( | |
60 | self.avm_class.name, | |
61 | super(_ScopeDict, self).__repr__()) | |
62 | ||
63 | ||
54256267 PH |
64 | class _AVMClass(object): |
65 | def __init__(self, name_idx, name): | |
66 | self.name_idx = name_idx | |
67 | self.name = name | |
68 | self.method_names = {} | |
69 | self.method_idxs = {} | |
70 | self.methods = {} | |
71 | self.method_pyfunctions = {} | |
70f767dc | 72 | |
0d989011 | 73 | self.variables = _ScopeDict(self) |
54256267 PH |
74 | |
75 | def make_object(self): | |
76 | return _AVMClass_Object(self) | |
77 | ||
01b4b745 PH |
78 | def __repr__(self): |
79 | return '_AVMClass(%s)' % (self.name) | |
80 | ||
81 | def register_methods(self, methods): | |
82 | self.method_names.update(methods.items()) | |
83 | self.method_idxs.update(dict( | |
84 | (idx, name) | |
85 | for name, idx in methods.items())) | |
86 | ||
54256267 PH |
87 | |
88 | def _read_int(reader): | |
89 | res = 0 | |
90 | shift = 0 | |
91 | for _ in range(5): | |
92 | buf = reader.read(1) | |
93 | assert len(buf) == 1 | |
94 | b = struct.unpack('<B', buf)[0] | |
95 | res = res | ((b & 0x7f) << shift) | |
96 | if b & 0x80 == 0: | |
97 | break | |
98 | shift += 7 | |
99 | return res | |
100 | ||
101 | ||
102 | def _u30(reader): | |
103 | res = _read_int(reader) | |
104 | assert res & 0xf0000000 == 0 | |
105 | return res | |
106 | u32 = _read_int | |
107 | ||
108 | ||
109 | def _s32(reader): | |
110 | v = _read_int(reader) | |
111 | if v & 0x80000000 != 0: | |
112 | v = - ((v ^ 0xffffffff) + 1) | |
113 | return v | |
114 | ||
115 | ||
116 | def _s24(reader): | |
117 | bs = reader.read(3) | |
118 | assert len(bs) == 3 | |
e75c24e8 PH |
119 | last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' |
120 | return struct.unpack('<i', bs + last_byte)[0] | |
54256267 PH |
121 | |
122 | ||
123 | def _read_string(reader): | |
124 | slen = _u30(reader) | |
125 | resb = reader.read(slen) | |
126 | assert len(resb) == slen | |
127 | return resb.decode('utf-8') | |
128 | ||
129 | ||
130 | def _read_bytes(count, reader): | |
0cb20563 | 131 | assert count >= 0 |
54256267 PH |
132 | resb = reader.read(count) |
133 | assert len(resb) == count | |
134 | return resb | |
135 | ||
136 | ||
137 | def _read_byte(reader): | |
138 | resb = _read_bytes(1, reader=reader) | |
139 | res = struct.unpack('<B', resb)[0] | |
140 | return res | |
141 | ||
142 | ||
143 | class SWFInterpreter(object): | |
144 | def __init__(self, file_contents): | |
54256267 | 145 | code_tag = next(tag |
0cb20563 | 146 | for tag_code, tag in _extract_tags(file_contents) |
54256267 PH |
147 | if tag_code == 82) |
148 | p = code_tag.index(b'\0', 4) + 1 | |
149 | code_reader = io.BytesIO(code_tag[p:]) | |
150 | ||
151 | # Parse ABC (AVM2 ByteCode) | |
152 | ||
153 | # Define a couple convenience methods | |
154 | u30 = lambda *args: _u30(*args, reader=code_reader) | |
155 | s32 = lambda *args: _s32(*args, reader=code_reader) | |
156 | u32 = lambda *args: _u32(*args, reader=code_reader) | |
157 | read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | |
158 | read_byte = lambda *args: _read_byte(*args, reader=code_reader) | |
159 | ||
160 | # minor_version + major_version | |
161 | read_bytes(2 + 2) | |
162 | ||
163 | # Constant pool | |
164 | int_count = u30() | |
165 | for _c in range(1, int_count): | |
166 | s32() | |
167 | uint_count = u30() | |
168 | for _c in range(1, uint_count): | |
169 | u32() | |
170 | double_count = u30() | |
0cb20563 | 171 | read_bytes(max(0, (double_count - 1)) * 8) |
54256267 | 172 | string_count = u30() |
70f767dc | 173 | self.constant_strings = [''] |
54256267 PH |
174 | for _c in range(1, string_count): |
175 | s = _read_string(code_reader) | |
70f767dc | 176 | self.constant_strings.append(s) |
54256267 PH |
177 | namespace_count = u30() |
178 | for _c in range(1, namespace_count): | |
179 | read_bytes(1) # kind | |
180 | u30() # name | |
181 | ns_set_count = u30() | |
182 | for _c in range(1, ns_set_count): | |
183 | count = u30() | |
184 | for _c2 in range(count): | |
185 | u30() | |
186 | multiname_count = u30() | |
187 | MULTINAME_SIZES = { | |
188 | 0x07: 2, # QName | |
189 | 0x0d: 2, # QNameA | |
190 | 0x0f: 1, # RTQName | |
191 | 0x10: 1, # RTQNameA | |
192 | 0x11: 0, # RTQNameL | |
193 | 0x12: 0, # RTQNameLA | |
194 | 0x09: 2, # Multiname | |
195 | 0x0e: 2, # MultinameA | |
196 | 0x1b: 1, # MultinameL | |
197 | 0x1c: 1, # MultinameLA | |
198 | } | |
199 | self.multinames = [''] | |
200 | for _c in range(1, multiname_count): | |
201 | kind = u30() | |
202 | assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | |
203 | if kind == 0x07: | |
204 | u30() # namespace_idx | |
205 | name_idx = u30() | |
70f767dc | 206 | self.multinames.append(self.constant_strings[name_idx]) |
54256267 PH |
207 | else: |
208 | self.multinames.append('[MULTINAME kind: %d]' % kind) | |
209 | for _c2 in range(MULTINAME_SIZES[kind]): | |
210 | u30() | |
211 | ||
212 | # Methods | |
213 | method_count = u30() | |
214 | MethodInfo = collections.namedtuple( | |
215 | 'MethodInfo', | |
216 | ['NEED_ARGUMENTS', 'NEED_REST']) | |
217 | method_infos = [] | |
218 | for method_id in range(method_count): | |
219 | param_count = u30() | |
220 | u30() # return type | |
221 | for _ in range(param_count): | |
222 | u30() # param type | |
223 | u30() # name index (always 0 for youtube) | |
224 | flags = read_byte() | |
225 | if flags & 0x08 != 0: | |
226 | # Options present | |
227 | option_count = u30() | |
228 | for c in range(option_count): | |
229 | u30() # val | |
230 | read_bytes(1) # kind | |
231 | if flags & 0x80 != 0: | |
232 | # Param names present | |
233 | for _ in range(param_count): | |
234 | u30() # param name | |
235 | mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | |
236 | method_infos.append(mi) | |
237 | ||
238 | # Metadata | |
239 | metadata_count = u30() | |
240 | for _c in range(metadata_count): | |
241 | u30() # name | |
242 | item_count = u30() | |
243 | for _c2 in range(item_count): | |
244 | u30() # key | |
245 | u30() # value | |
246 | ||
247 | def parse_traits_info(): | |
248 | trait_name_idx = u30() | |
249 | kind_full = read_byte() | |
250 | kind = kind_full & 0x0f | |
251 | attrs = kind_full >> 4 | |
252 | methods = {} | |
253 | if kind in [0x00, 0x06]: # Slot or Const | |
254 | u30() # Slot id | |
255 | u30() # type_name_idx | |
256 | vindex = u30() | |
257 | if vindex != 0: | |
258 | read_byte() # vkind | |
259 | elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter | |
260 | u30() # disp_id | |
261 | method_idx = u30() | |
262 | methods[self.multinames[trait_name_idx]] = method_idx | |
263 | elif kind == 0x04: # Class | |
264 | u30() # slot_id | |
265 | u30() # classi | |
266 | elif kind == 0x05: # Function | |
267 | u30() # slot_id | |
268 | function_idx = u30() | |
269 | methods[function_idx] = self.multinames[trait_name_idx] | |
270 | else: | |
271 | raise ExtractorError('Unsupported trait kind %d' % kind) | |
272 | ||
273 | if attrs & 0x4 != 0: # Metadata present | |
274 | metadata_count = u30() | |
275 | for _c3 in range(metadata_count): | |
276 | u30() # metadata index | |
277 | ||
278 | return methods | |
279 | ||
280 | # Classes | |
281 | class_count = u30() | |
282 | classes = [] | |
283 | for class_id in range(class_count): | |
284 | name_idx = u30() | |
01b4b745 PH |
285 | |
286 | cname = self.multinames[name_idx] | |
287 | avm_class = _AVMClass(name_idx, cname) | |
288 | classes.append(avm_class) | |
289 | ||
54256267 PH |
290 | u30() # super_name idx |
291 | flags = read_byte() | |
292 | if flags & 0x08 != 0: # Protected namespace is present | |
293 | u30() # protected_ns_idx | |
294 | intrf_count = u30() | |
295 | for _c2 in range(intrf_count): | |
296 | u30() | |
297 | u30() # iinit | |
298 | trait_count = u30() | |
299 | for _c2 in range(trait_count): | |
01b4b745 PH |
300 | trait_methods = parse_traits_info() |
301 | avm_class.register_methods(trait_methods) | |
302 | ||
54256267 PH |
303 | assert len(classes) == class_count |
304 | self._classes_by_name = dict((c.name, c) for c in classes) | |
305 | ||
306 | for avm_class in classes: | |
307 | u30() # cinit | |
308 | trait_count = u30() | |
309 | for _c2 in range(trait_count): | |
310 | trait_methods = parse_traits_info() | |
01b4b745 | 311 | avm_class.register_methods(trait_methods) |
54256267 PH |
312 | |
313 | # Scripts | |
314 | script_count = u30() | |
315 | for _c in range(script_count): | |
316 | u30() # init | |
317 | trait_count = u30() | |
318 | for _c2 in range(trait_count): | |
319 | parse_traits_info() | |
320 | ||
321 | # Method bodies | |
322 | method_body_count = u30() | |
323 | Method = collections.namedtuple('Method', ['code', 'local_count']) | |
324 | for _c in range(method_body_count): | |
325 | method_idx = u30() | |
326 | u30() # max_stack | |
327 | local_count = u30() | |
328 | u30() # init_scope_depth | |
329 | u30() # max_scope_depth | |
330 | code_length = u30() | |
331 | code = read_bytes(code_length) | |
332 | for avm_class in classes: | |
333 | if method_idx in avm_class.method_idxs: | |
334 | m = Method(code, local_count) | |
335 | avm_class.methods[avm_class.method_idxs[method_idx]] = m | |
336 | exception_count = u30() | |
337 | for _c2 in range(exception_count): | |
338 | u30() # from | |
339 | u30() # to | |
340 | u30() # target | |
341 | u30() # exc_type | |
342 | u30() # var_name | |
343 | trait_count = u30() | |
344 | for _c2 in range(trait_count): | |
345 | parse_traits_info() | |
346 | ||
347 | assert p + code_reader.tell() == len(code_tag) | |
348 | ||
349 | def extract_class(self, class_name): | |
350 | try: | |
351 | return self._classes_by_name[class_name] | |
352 | except KeyError: | |
353 | raise ExtractorError('Class %r not found' % class_name) | |
354 | ||
355 | def extract_function(self, avm_class, func_name): | |
01b4b745 | 356 | print('Extracting %s.%s' % (avm_class.name, func_name)) |
54256267 PH |
357 | if func_name in avm_class.method_pyfunctions: |
358 | return avm_class.method_pyfunctions[func_name] | |
359 | if func_name in self._classes_by_name: | |
360 | return self._classes_by_name[func_name].make_object() | |
361 | if func_name not in avm_class.methods: | |
01b4b745 PH |
362 | raise ExtractorError('Cannot find function %s.%s' % ( |
363 | avm_class.name, func_name)) | |
54256267 PH |
364 | m = avm_class.methods[func_name] |
365 | ||
366 | def resfunc(args): | |
367 | # Helper functions | |
368 | coder = io.BytesIO(m.code) | |
369 | s24 = lambda: _s24(coder) | |
370 | u30 = lambda: _u30(coder) | |
371 | ||
372 | print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) | |
e75c24e8 | 373 | registers = [avm_class.variables] + list(args) + [None] * m.local_count |
54256267 | 374 | stack = [] |
01b4b745 PH |
375 | scopes = collections.deque([ |
376 | self._classes_by_name, avm_class.variables]) | |
54256267 PH |
377 | while True: |
378 | opcode = _read_byte(coder) | |
379 | print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) | |
380 | if opcode == 17: # iftrue | |
381 | offset = s24() | |
382 | value = stack.pop() | |
383 | if value: | |
384 | coder.seek(coder.tell() + offset) | |
e75c24e8 PH |
385 | elif opcode == 18: # iffalse |
386 | offset = s24() | |
387 | value = stack.pop() | |
388 | if not value: | |
389 | coder.seek(coder.tell() + offset) | |
54256267 PH |
390 | elif opcode == 36: # pushbyte |
391 | v = _read_byte(coder) | |
392 | stack.append(v) | |
0cb20563 PH |
393 | elif opcode == 42: # dup |
394 | value = stack[-1] | |
395 | stack.append(value) | |
54256267 PH |
396 | elif opcode == 44: # pushstring |
397 | idx = u30() | |
70f767dc | 398 | stack.append(self.constant_strings[idx]) |
54256267 | 399 | elif opcode == 48: # pushscope |
54256267 | 400 | new_scope = stack.pop() |
e75c24e8 | 401 | scopes.append(new_scope) |
54256267 PH |
402 | elif opcode == 70: # callproperty |
403 | index = u30() | |
404 | mname = self.multinames[index] | |
405 | arg_count = u30() | |
406 | args = list(reversed( | |
407 | [stack.pop() for _ in range(arg_count)])) | |
408 | obj = stack.pop() | |
01b4b745 PH |
409 | |
410 | if isinstance(obj, _AVMClass_Object): | |
411 | func = self.extract_function(obj.avm_class, mname) | |
412 | res = func(args) | |
54256267 | 413 | stack.append(res) |
01b4b745 | 414 | continue |
0d989011 PH |
415 | elif isinstance(obj, _ScopeDict): |
416 | if mname in obj.avm_class.method_names: | |
417 | func = self.extract_function(obj.avm_class, mname) | |
418 | res = func(args) | |
419 | else: | |
420 | res = obj[mname] | |
421 | stack.append(res) | |
422 | continue | |
01b4b745 PH |
423 | elif isinstance(obj, compat_str): |
424 | if mname == 'split': | |
425 | assert len(args) == 1 | |
426 | assert isinstance(args[0], compat_str) | |
427 | if args[0] == '': | |
428 | res = list(obj) | |
429 | else: | |
430 | res = obj.split(args[0]) | |
431 | stack.append(res) | |
432 | continue | |
433 | elif isinstance(obj, list): | |
434 | if mname == 'slice': | |
435 | assert len(args) == 1 | |
436 | assert isinstance(args[0], int) | |
437 | res = obj[args[0]:] | |
438 | stack.append(res) | |
439 | continue | |
440 | elif mname == 'join': | |
441 | assert len(args) == 1 | |
442 | assert isinstance(args[0], compat_str) | |
443 | res = args[0].join(obj) | |
444 | stack.append(res) | |
445 | continue | |
446 | raise NotImplementedError( | |
447 | 'Unsupported property %r on %r' | |
448 | % (mname, obj)) | |
54256267 PH |
449 | elif opcode == 72: # returnvalue |
450 | res = stack.pop() | |
451 | return res | |
452 | elif opcode == 74: # constructproperty | |
453 | index = u30() | |
454 | arg_count = u30() | |
455 | args = list(reversed( | |
456 | [stack.pop() for _ in range(arg_count)])) | |
457 | obj = stack.pop() | |
458 | ||
459 | mname = self.multinames[index] | |
01b4b745 | 460 | assert isinstance(obj, _AVMClass) |
54256267 | 461 | construct_method = self.extract_function( |
01b4b745 | 462 | obj, mname) |
54256267 PH |
463 | # We do not actually call the constructor for now; |
464 | # we just pretend it does nothing | |
01b4b745 | 465 | stack.append(obj.make_object()) |
54256267 PH |
466 | elif opcode == 79: # callpropvoid |
467 | index = u30() | |
468 | mname = self.multinames[index] | |
469 | arg_count = u30() | |
470 | args = list(reversed( | |
471 | [stack.pop() for _ in range(arg_count)])) | |
472 | obj = stack.pop() | |
473 | if mname == 'reverse': | |
474 | assert isinstance(obj, list) | |
475 | obj.reverse() | |
476 | else: | |
477 | raise NotImplementedError( | |
478 | 'Unsupported (void) property %r on %r' | |
479 | % (mname, obj)) | |
480 | elif opcode == 86: # newarray | |
481 | arg_count = u30() | |
482 | arr = [] | |
483 | for i in range(arg_count): | |
484 | arr.append(stack.pop()) | |
485 | arr = arr[::-1] | |
486 | stack.append(arr) | |
70f767dc PH |
487 | elif opcode == 93: # findpropstrict |
488 | index = u30() | |
489 | mname = self.multinames[index] | |
490 | for s in reversed(scopes): | |
491 | if mname in s: | |
492 | res = s | |
493 | break | |
494 | else: | |
495 | res = scopes[0] | |
01b4b745 | 496 | stack.append(res[mname]) |
54256267 PH |
497 | elif opcode == 94: # findproperty |
498 | index = u30() | |
499 | mname = self.multinames[index] | |
e75c24e8 PH |
500 | for s in reversed(scopes): |
501 | if mname in s: | |
502 | res = s | |
503 | break | |
504 | else: | |
01b4b745 | 505 | res = avm_class.variables |
54256267 PH |
506 | stack.append(res) |
507 | elif opcode == 96: # getlex | |
508 | index = u30() | |
509 | mname = self.multinames[index] | |
e75c24e8 PH |
510 | for s in reversed(scopes): |
511 | if mname in s: | |
512 | scope = s | |
513 | break | |
514 | else: | |
01b4b745 | 515 | scope = avm_class.variables |
e75c24e8 PH |
516 | # I cannot find where static variables are initialized |
517 | # so let's just return None | |
518 | res = scope.get(mname) | |
54256267 PH |
519 | stack.append(res) |
520 | elif opcode == 97: # setproperty | |
521 | index = u30() | |
522 | value = stack.pop() | |
523 | idx = self.multinames[index] | |
524 | obj = stack.pop() | |
525 | obj[idx] = value | |
526 | elif opcode == 98: # getlocal | |
527 | index = u30() | |
528 | stack.append(registers[index]) | |
529 | elif opcode == 99: # setlocal | |
530 | index = u30() | |
531 | value = stack.pop() | |
532 | registers[index] = value | |
533 | elif opcode == 102: # getproperty | |
534 | index = u30() | |
535 | pname = self.multinames[index] | |
536 | if pname == 'length': | |
537 | obj = stack.pop() | |
538 | assert isinstance(obj, list) | |
539 | stack.append(len(obj)) | |
540 | else: # Assume attribute access | |
541 | idx = stack.pop() | |
542 | assert isinstance(idx, int) | |
543 | obj = stack.pop() | |
544 | assert isinstance(obj, list) | |
545 | stack.append(obj[idx]) | |
0cb20563 PH |
546 | elif opcode == 115: # convert_ |
547 | value = stack.pop() | |
548 | intvalue = int(value) | |
549 | stack.append(intvalue) | |
54256267 PH |
550 | elif opcode == 128: # coerce |
551 | u30() | |
552 | elif opcode == 133: # coerce_s | |
553 | assert isinstance(stack[-1], (type(None), compat_str)) | |
0cb20563 PH |
554 | elif opcode == 160: # add |
555 | value2 = stack.pop() | |
556 | value1 = stack.pop() | |
557 | res = value1 + value2 | |
558 | stack.append(res) | |
559 | elif opcode == 161: # subtract | |
560 | value2 = stack.pop() | |
561 | value1 = stack.pop() | |
562 | res = value1 - value2 | |
563 | stack.append(res) | |
54256267 PH |
564 | elif opcode == 164: # modulo |
565 | value2 = stack.pop() | |
566 | value1 = stack.pop() | |
567 | res = value1 % value2 | |
568 | stack.append(res) | |
569 | elif opcode == 175: # greaterequals | |
570 | value2 = stack.pop() | |
571 | value1 = stack.pop() | |
572 | result = value1 >= value2 | |
573 | stack.append(result) | |
574 | elif opcode == 208: # getlocal_0 | |
575 | stack.append(registers[0]) | |
576 | elif opcode == 209: # getlocal_1 | |
577 | stack.append(registers[1]) | |
578 | elif opcode == 210: # getlocal_2 | |
579 | stack.append(registers[2]) | |
580 | elif opcode == 211: # getlocal_3 | |
581 | stack.append(registers[3]) | |
70f767dc PH |
582 | elif opcode == 212: # setlocal_0 |
583 | registers[0] = stack.pop() | |
584 | elif opcode == 213: # setlocal_1 | |
585 | registers[1] = stack.pop() | |
54256267 PH |
586 | elif opcode == 214: # setlocal_2 |
587 | registers[2] = stack.pop() | |
588 | elif opcode == 215: # setlocal_3 | |
589 | registers[3] = stack.pop() | |
590 | else: | |
591 | raise NotImplementedError( | |
592 | 'Unsupported opcode %d' % opcode) | |
593 | ||
594 | avm_class.method_pyfunctions[func_name] = resfunc | |
595 | return resfunc | |
596 |