]>
Commit | Line | Data |
---|---|---|
54256267 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import collections | |
4 | import io | |
5 | import struct | |
6 | import zlib | |
7 | ||
70f767dc PH |
8 | from .utils import ( |
9 | compat_str, | |
10 | ExtractorError, | |
11 | ) | |
54256267 PH |
12 | |
13 | ||
0cb20563 PH |
14 | def _extract_tags(file_contents): |
15 | if file_contents[1:3] != b'WS': | |
16 | raise ExtractorError( | |
17 | 'Not an SWF file; header is %r' % file_contents[:3]) | |
18 | if file_contents[:1] == b'C': | |
19 | content = zlib.decompress(file_contents[8:]) | |
20 | else: | |
21 | raise NotImplementedError( | |
22 | 'Unsupported compression format %r' % | |
23 | file_contents[:1]) | |
24 | ||
25 | # Determine number of bits in framesize rectangle | |
26 | framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 | |
27 | framesize_len = (5 + 4 * framesize_nbits + 7) // 8 | |
28 | ||
29 | pos = framesize_len + 2 + 2 | |
54256267 PH |
30 | while pos < len(content): |
31 | header16 = struct.unpack('<H', content[pos:pos + 2])[0] | |
32 | pos += 2 | |
33 | tag_code = header16 >> 6 | |
34 | tag_len = header16 & 0x3f | |
35 | if tag_len == 0x3f: | |
36 | tag_len = struct.unpack('<I', content[pos:pos + 4])[0] | |
37 | pos += 4 | |
0cb20563 PH |
38 | assert pos + tag_len <= len(content), \ |
39 | ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' | |
40 | % (tag_code, pos, tag_len, len(content))) | |
54256267 PH |
41 | yield (tag_code, content[pos:pos + tag_len]) |
42 | pos += tag_len | |
43 | ||
44 | ||
45 | class _AVMClass_Object(object): | |
46 | def __init__(self, avm_class): | |
47 | self.avm_class = avm_class | |
48 | ||
49 | def __repr__(self): | |
50 | return '%s#%x' % (self.avm_class.name, id(self)) | |
51 | ||
52 | ||
53 | class _AVMClass(object): | |
54 | def __init__(self, name_idx, name): | |
55 | self.name_idx = name_idx | |
56 | self.name = name | |
57 | self.method_names = {} | |
58 | self.method_idxs = {} | |
59 | self.methods = {} | |
60 | self.method_pyfunctions = {} | |
70f767dc PH |
61 | |
62 | class ScopeDict(dict): | |
63 | def __init__(self, avm_class): | |
64 | super(ScopeDict, self).__init__() | |
65 | self.avm_class = avm_class | |
66 | ||
70f767dc PH |
67 | def __repr__(self): |
68 | return '%s__Scope(%s)' % ( | |
69 | self.avm_class.name, | |
70 | super(ScopeDict, self).__repr__()) | |
71 | ||
72 | self.variables = ScopeDict(self) | |
54256267 PH |
73 | |
74 | def make_object(self): | |
75 | return _AVMClass_Object(self) | |
76 | ||
01b4b745 PH |
77 | def __repr__(self): |
78 | return '_AVMClass(%s)' % (self.name) | |
79 | ||
80 | def register_methods(self, methods): | |
81 | self.method_names.update(methods.items()) | |
82 | self.method_idxs.update(dict( | |
83 | (idx, name) | |
84 | for name, idx in methods.items())) | |
85 | ||
54256267 PH |
86 | |
87 | def _read_int(reader): | |
88 | res = 0 | |
89 | shift = 0 | |
90 | for _ in range(5): | |
91 | buf = reader.read(1) | |
92 | assert len(buf) == 1 | |
93 | b = struct.unpack('<B', buf)[0] | |
94 | res = res | ((b & 0x7f) << shift) | |
95 | if b & 0x80 == 0: | |
96 | break | |
97 | shift += 7 | |
98 | return res | |
99 | ||
100 | ||
101 | def _u30(reader): | |
102 | res = _read_int(reader) | |
103 | assert res & 0xf0000000 == 0 | |
104 | return res | |
105 | u32 = _read_int | |
106 | ||
107 | ||
108 | def _s32(reader): | |
109 | v = _read_int(reader) | |
110 | if v & 0x80000000 != 0: | |
111 | v = - ((v ^ 0xffffffff) + 1) | |
112 | return v | |
113 | ||
114 | ||
115 | def _s24(reader): | |
116 | bs = reader.read(3) | |
117 | assert len(bs) == 3 | |
e75c24e8 PH |
118 | last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' |
119 | return struct.unpack('<i', bs + last_byte)[0] | |
54256267 PH |
120 | |
121 | ||
122 | def _read_string(reader): | |
123 | slen = _u30(reader) | |
124 | resb = reader.read(slen) | |
125 | assert len(resb) == slen | |
126 | return resb.decode('utf-8') | |
127 | ||
128 | ||
129 | def _read_bytes(count, reader): | |
0cb20563 | 130 | assert count >= 0 |
54256267 PH |
131 | resb = reader.read(count) |
132 | assert len(resb) == count | |
133 | return resb | |
134 | ||
135 | ||
136 | def _read_byte(reader): | |
137 | resb = _read_bytes(1, reader=reader) | |
138 | res = struct.unpack('<B', resb)[0] | |
139 | return res | |
140 | ||
141 | ||
142 | class SWFInterpreter(object): | |
143 | def __init__(self, file_contents): | |
54256267 | 144 | code_tag = next(tag |
0cb20563 | 145 | for tag_code, tag in _extract_tags(file_contents) |
54256267 PH |
146 | if tag_code == 82) |
147 | p = code_tag.index(b'\0', 4) + 1 | |
148 | code_reader = io.BytesIO(code_tag[p:]) | |
149 | ||
150 | # Parse ABC (AVM2 ByteCode) | |
151 | ||
152 | # Define a couple convenience methods | |
153 | u30 = lambda *args: _u30(*args, reader=code_reader) | |
154 | s32 = lambda *args: _s32(*args, reader=code_reader) | |
155 | u32 = lambda *args: _u32(*args, reader=code_reader) | |
156 | read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | |
157 | read_byte = lambda *args: _read_byte(*args, reader=code_reader) | |
158 | ||
159 | # minor_version + major_version | |
160 | read_bytes(2 + 2) | |
161 | ||
162 | # Constant pool | |
163 | int_count = u30() | |
164 | for _c in range(1, int_count): | |
165 | s32() | |
166 | uint_count = u30() | |
167 | for _c in range(1, uint_count): | |
168 | u32() | |
169 | double_count = u30() | |
0cb20563 | 170 | read_bytes(max(0, (double_count - 1)) * 8) |
54256267 | 171 | string_count = u30() |
70f767dc | 172 | self.constant_strings = [''] |
54256267 PH |
173 | for _c in range(1, string_count): |
174 | s = _read_string(code_reader) | |
70f767dc | 175 | self.constant_strings.append(s) |
54256267 PH |
176 | namespace_count = u30() |
177 | for _c in range(1, namespace_count): | |
178 | read_bytes(1) # kind | |
179 | u30() # name | |
180 | ns_set_count = u30() | |
181 | for _c in range(1, ns_set_count): | |
182 | count = u30() | |
183 | for _c2 in range(count): | |
184 | u30() | |
185 | multiname_count = u30() | |
186 | MULTINAME_SIZES = { | |
187 | 0x07: 2, # QName | |
188 | 0x0d: 2, # QNameA | |
189 | 0x0f: 1, # RTQName | |
190 | 0x10: 1, # RTQNameA | |
191 | 0x11: 0, # RTQNameL | |
192 | 0x12: 0, # RTQNameLA | |
193 | 0x09: 2, # Multiname | |
194 | 0x0e: 2, # MultinameA | |
195 | 0x1b: 1, # MultinameL | |
196 | 0x1c: 1, # MultinameLA | |
197 | } | |
198 | self.multinames = [''] | |
199 | for _c in range(1, multiname_count): | |
200 | kind = u30() | |
201 | assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | |
202 | if kind == 0x07: | |
203 | u30() # namespace_idx | |
204 | name_idx = u30() | |
70f767dc | 205 | self.multinames.append(self.constant_strings[name_idx]) |
54256267 PH |
206 | else: |
207 | self.multinames.append('[MULTINAME kind: %d]' % kind) | |
208 | for _c2 in range(MULTINAME_SIZES[kind]): | |
209 | u30() | |
210 | ||
211 | # Methods | |
212 | method_count = u30() | |
213 | MethodInfo = collections.namedtuple( | |
214 | 'MethodInfo', | |
215 | ['NEED_ARGUMENTS', 'NEED_REST']) | |
216 | method_infos = [] | |
217 | for method_id in range(method_count): | |
218 | param_count = u30() | |
219 | u30() # return type | |
220 | for _ in range(param_count): | |
221 | u30() # param type | |
222 | u30() # name index (always 0 for youtube) | |
223 | flags = read_byte() | |
224 | if flags & 0x08 != 0: | |
225 | # Options present | |
226 | option_count = u30() | |
227 | for c in range(option_count): | |
228 | u30() # val | |
229 | read_bytes(1) # kind | |
230 | if flags & 0x80 != 0: | |
231 | # Param names present | |
232 | for _ in range(param_count): | |
233 | u30() # param name | |
234 | mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | |
235 | method_infos.append(mi) | |
236 | ||
237 | # Metadata | |
238 | metadata_count = u30() | |
239 | for _c in range(metadata_count): | |
240 | u30() # name | |
241 | item_count = u30() | |
242 | for _c2 in range(item_count): | |
243 | u30() # key | |
244 | u30() # value | |
245 | ||
246 | def parse_traits_info(): | |
247 | trait_name_idx = u30() | |
248 | kind_full = read_byte() | |
249 | kind = kind_full & 0x0f | |
250 | attrs = kind_full >> 4 | |
251 | methods = {} | |
252 | if kind in [0x00, 0x06]: # Slot or Const | |
253 | u30() # Slot id | |
254 | u30() # type_name_idx | |
255 | vindex = u30() | |
256 | if vindex != 0: | |
257 | read_byte() # vkind | |
258 | elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter | |
259 | u30() # disp_id | |
260 | method_idx = u30() | |
261 | methods[self.multinames[trait_name_idx]] = method_idx | |
262 | elif kind == 0x04: # Class | |
263 | u30() # slot_id | |
264 | u30() # classi | |
265 | elif kind == 0x05: # Function | |
266 | u30() # slot_id | |
267 | function_idx = u30() | |
268 | methods[function_idx] = self.multinames[trait_name_idx] | |
269 | else: | |
270 | raise ExtractorError('Unsupported trait kind %d' % kind) | |
271 | ||
272 | if attrs & 0x4 != 0: # Metadata present | |
273 | metadata_count = u30() | |
274 | for _c3 in range(metadata_count): | |
275 | u30() # metadata index | |
276 | ||
277 | return methods | |
278 | ||
279 | # Classes | |
280 | class_count = u30() | |
281 | classes = [] | |
282 | for class_id in range(class_count): | |
283 | name_idx = u30() | |
01b4b745 PH |
284 | |
285 | cname = self.multinames[name_idx] | |
286 | avm_class = _AVMClass(name_idx, cname) | |
287 | classes.append(avm_class) | |
288 | ||
54256267 PH |
289 | u30() # super_name idx |
290 | flags = read_byte() | |
291 | if flags & 0x08 != 0: # Protected namespace is present | |
292 | u30() # protected_ns_idx | |
293 | intrf_count = u30() | |
294 | for _c2 in range(intrf_count): | |
295 | u30() | |
296 | u30() # iinit | |
297 | trait_count = u30() | |
298 | for _c2 in range(trait_count): | |
01b4b745 PH |
299 | trait_methods = parse_traits_info() |
300 | avm_class.register_methods(trait_methods) | |
301 | ||
54256267 PH |
302 | assert len(classes) == class_count |
303 | self._classes_by_name = dict((c.name, c) for c in classes) | |
304 | ||
305 | for avm_class in classes: | |
306 | u30() # cinit | |
307 | trait_count = u30() | |
308 | for _c2 in range(trait_count): | |
309 | trait_methods = parse_traits_info() | |
01b4b745 | 310 | avm_class.register_methods(trait_methods) |
54256267 PH |
311 | |
312 | # Scripts | |
313 | script_count = u30() | |
314 | for _c in range(script_count): | |
315 | u30() # init | |
316 | trait_count = u30() | |
317 | for _c2 in range(trait_count): | |
318 | parse_traits_info() | |
319 | ||
320 | # Method bodies | |
321 | method_body_count = u30() | |
322 | Method = collections.namedtuple('Method', ['code', 'local_count']) | |
323 | for _c in range(method_body_count): | |
324 | method_idx = u30() | |
325 | u30() # max_stack | |
326 | local_count = u30() | |
327 | u30() # init_scope_depth | |
328 | u30() # max_scope_depth | |
329 | code_length = u30() | |
330 | code = read_bytes(code_length) | |
331 | for avm_class in classes: | |
332 | if method_idx in avm_class.method_idxs: | |
333 | m = Method(code, local_count) | |
334 | avm_class.methods[avm_class.method_idxs[method_idx]] = m | |
335 | exception_count = u30() | |
336 | for _c2 in range(exception_count): | |
337 | u30() # from | |
338 | u30() # to | |
339 | u30() # target | |
340 | u30() # exc_type | |
341 | u30() # var_name | |
342 | trait_count = u30() | |
343 | for _c2 in range(trait_count): | |
344 | parse_traits_info() | |
345 | ||
346 | assert p + code_reader.tell() == len(code_tag) | |
347 | ||
348 | def extract_class(self, class_name): | |
349 | try: | |
350 | return self._classes_by_name[class_name] | |
351 | except KeyError: | |
352 | raise ExtractorError('Class %r not found' % class_name) | |
353 | ||
354 | def extract_function(self, avm_class, func_name): | |
01b4b745 | 355 | print('Extracting %s.%s' % (avm_class.name, func_name)) |
54256267 PH |
356 | if func_name in avm_class.method_pyfunctions: |
357 | return avm_class.method_pyfunctions[func_name] | |
358 | if func_name in self._classes_by_name: | |
359 | return self._classes_by_name[func_name].make_object() | |
360 | if func_name not in avm_class.methods: | |
01b4b745 PH |
361 | raise ExtractorError('Cannot find function %s.%s' % ( |
362 | avm_class.name, func_name)) | |
54256267 PH |
363 | m = avm_class.methods[func_name] |
364 | ||
365 | def resfunc(args): | |
366 | # Helper functions | |
367 | coder = io.BytesIO(m.code) | |
368 | s24 = lambda: _s24(coder) | |
369 | u30 = lambda: _u30(coder) | |
370 | ||
371 | print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) | |
e75c24e8 | 372 | registers = [avm_class.variables] + list(args) + [None] * m.local_count |
54256267 | 373 | stack = [] |
01b4b745 PH |
374 | scopes = collections.deque([ |
375 | self._classes_by_name, avm_class.variables]) | |
54256267 PH |
376 | while True: |
377 | opcode = _read_byte(coder) | |
378 | print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) | |
379 | if opcode == 17: # iftrue | |
380 | offset = s24() | |
381 | value = stack.pop() | |
382 | if value: | |
383 | coder.seek(coder.tell() + offset) | |
e75c24e8 PH |
384 | elif opcode == 18: # iffalse |
385 | offset = s24() | |
386 | value = stack.pop() | |
387 | if not value: | |
388 | coder.seek(coder.tell() + offset) | |
54256267 PH |
389 | elif opcode == 36: # pushbyte |
390 | v = _read_byte(coder) | |
391 | stack.append(v) | |
0cb20563 PH |
392 | elif opcode == 42: # dup |
393 | value = stack[-1] | |
394 | stack.append(value) | |
54256267 PH |
395 | elif opcode == 44: # pushstring |
396 | idx = u30() | |
70f767dc | 397 | stack.append(self.constant_strings[idx]) |
54256267 | 398 | elif opcode == 48: # pushscope |
54256267 | 399 | new_scope = stack.pop() |
e75c24e8 | 400 | scopes.append(new_scope) |
54256267 PH |
401 | elif opcode == 70: # callproperty |
402 | index = u30() | |
403 | mname = self.multinames[index] | |
404 | arg_count = u30() | |
405 | args = list(reversed( | |
406 | [stack.pop() for _ in range(arg_count)])) | |
407 | obj = stack.pop() | |
01b4b745 PH |
408 | |
409 | if isinstance(obj, _AVMClass_Object): | |
410 | func = self.extract_function(obj.avm_class, mname) | |
411 | res = func(args) | |
54256267 | 412 | stack.append(res) |
01b4b745 PH |
413 | continue |
414 | elif isinstance(obj, compat_str): | |
415 | if mname == 'split': | |
416 | assert len(args) == 1 | |
417 | assert isinstance(args[0], compat_str) | |
418 | if args[0] == '': | |
419 | res = list(obj) | |
420 | else: | |
421 | res = obj.split(args[0]) | |
422 | stack.append(res) | |
423 | continue | |
424 | elif isinstance(obj, list): | |
425 | if mname == 'slice': | |
426 | assert len(args) == 1 | |
427 | assert isinstance(args[0], int) | |
428 | res = obj[args[0]:] | |
429 | stack.append(res) | |
430 | continue | |
431 | elif mname == 'join': | |
432 | assert len(args) == 1 | |
433 | assert isinstance(args[0], compat_str) | |
434 | res = args[0].join(obj) | |
435 | stack.append(res) | |
436 | continue | |
437 | raise NotImplementedError( | |
438 | 'Unsupported property %r on %r' | |
439 | % (mname, obj)) | |
54256267 PH |
440 | elif opcode == 72: # returnvalue |
441 | res = stack.pop() | |
442 | return res | |
443 | elif opcode == 74: # constructproperty | |
444 | index = u30() | |
445 | arg_count = u30() | |
446 | args = list(reversed( | |
447 | [stack.pop() for _ in range(arg_count)])) | |
448 | obj = stack.pop() | |
449 | ||
450 | mname = self.multinames[index] | |
01b4b745 | 451 | assert isinstance(obj, _AVMClass) |
54256267 | 452 | construct_method = self.extract_function( |
01b4b745 | 453 | obj, mname) |
54256267 PH |
454 | # We do not actually call the constructor for now; |
455 | # we just pretend it does nothing | |
01b4b745 | 456 | stack.append(obj.make_object()) |
54256267 PH |
457 | elif opcode == 79: # callpropvoid |
458 | index = u30() | |
459 | mname = self.multinames[index] | |
460 | arg_count = u30() | |
461 | args = list(reversed( | |
462 | [stack.pop() for _ in range(arg_count)])) | |
463 | obj = stack.pop() | |
464 | if mname == 'reverse': | |
465 | assert isinstance(obj, list) | |
466 | obj.reverse() | |
467 | else: | |
468 | raise NotImplementedError( | |
469 | 'Unsupported (void) property %r on %r' | |
470 | % (mname, obj)) | |
471 | elif opcode == 86: # newarray | |
472 | arg_count = u30() | |
473 | arr = [] | |
474 | for i in range(arg_count): | |
475 | arr.append(stack.pop()) | |
476 | arr = arr[::-1] | |
477 | stack.append(arr) | |
70f767dc PH |
478 | elif opcode == 93: # findpropstrict |
479 | index = u30() | |
480 | mname = self.multinames[index] | |
481 | for s in reversed(scopes): | |
482 | if mname in s: | |
483 | res = s | |
484 | break | |
485 | else: | |
486 | res = scopes[0] | |
01b4b745 | 487 | stack.append(res[mname]) |
54256267 PH |
488 | elif opcode == 94: # findproperty |
489 | index = u30() | |
490 | mname = self.multinames[index] | |
e75c24e8 PH |
491 | for s in reversed(scopes): |
492 | if mname in s: | |
493 | res = s | |
494 | break | |
495 | else: | |
01b4b745 | 496 | res = avm_class.variables |
54256267 PH |
497 | stack.append(res) |
498 | elif opcode == 96: # getlex | |
499 | index = u30() | |
500 | mname = self.multinames[index] | |
e75c24e8 PH |
501 | for s in reversed(scopes): |
502 | if mname in s: | |
503 | scope = s | |
504 | break | |
505 | else: | |
01b4b745 | 506 | scope = avm_class.variables |
e75c24e8 PH |
507 | # I cannot find where static variables are initialized |
508 | # so let's just return None | |
509 | res = scope.get(mname) | |
54256267 PH |
510 | stack.append(res) |
511 | elif opcode == 97: # setproperty | |
512 | index = u30() | |
513 | value = stack.pop() | |
514 | idx = self.multinames[index] | |
515 | obj = stack.pop() | |
516 | obj[idx] = value | |
517 | elif opcode == 98: # getlocal | |
518 | index = u30() | |
519 | stack.append(registers[index]) | |
520 | elif opcode == 99: # setlocal | |
521 | index = u30() | |
522 | value = stack.pop() | |
523 | registers[index] = value | |
524 | elif opcode == 102: # getproperty | |
525 | index = u30() | |
526 | pname = self.multinames[index] | |
527 | if pname == 'length': | |
528 | obj = stack.pop() | |
529 | assert isinstance(obj, list) | |
530 | stack.append(len(obj)) | |
531 | else: # Assume attribute access | |
532 | idx = stack.pop() | |
533 | assert isinstance(idx, int) | |
534 | obj = stack.pop() | |
535 | assert isinstance(obj, list) | |
536 | stack.append(obj[idx]) | |
0cb20563 PH |
537 | elif opcode == 115: # convert_ |
538 | value = stack.pop() | |
539 | intvalue = int(value) | |
540 | stack.append(intvalue) | |
54256267 PH |
541 | elif opcode == 128: # coerce |
542 | u30() | |
543 | elif opcode == 133: # coerce_s | |
544 | assert isinstance(stack[-1], (type(None), compat_str)) | |
0cb20563 PH |
545 | elif opcode == 160: # add |
546 | value2 = stack.pop() | |
547 | value1 = stack.pop() | |
548 | res = value1 + value2 | |
549 | stack.append(res) | |
550 | elif opcode == 161: # subtract | |
551 | value2 = stack.pop() | |
552 | value1 = stack.pop() | |
553 | res = value1 - value2 | |
554 | stack.append(res) | |
54256267 PH |
555 | elif opcode == 164: # modulo |
556 | value2 = stack.pop() | |
557 | value1 = stack.pop() | |
558 | res = value1 % value2 | |
559 | stack.append(res) | |
560 | elif opcode == 175: # greaterequals | |
561 | value2 = stack.pop() | |
562 | value1 = stack.pop() | |
563 | result = value1 >= value2 | |
564 | stack.append(result) | |
565 | elif opcode == 208: # getlocal_0 | |
566 | stack.append(registers[0]) | |
567 | elif opcode == 209: # getlocal_1 | |
568 | stack.append(registers[1]) | |
569 | elif opcode == 210: # getlocal_2 | |
570 | stack.append(registers[2]) | |
571 | elif opcode == 211: # getlocal_3 | |
572 | stack.append(registers[3]) | |
70f767dc PH |
573 | elif opcode == 212: # setlocal_0 |
574 | registers[0] = stack.pop() | |
575 | elif opcode == 213: # setlocal_1 | |
576 | registers[1] = stack.pop() | |
54256267 PH |
577 | elif opcode == 214: # setlocal_2 |
578 | registers[2] = stack.pop() | |
579 | elif opcode == 215: # setlocal_3 | |
580 | registers[3] = stack.pop() | |
581 | else: | |
582 | raise NotImplementedError( | |
583 | 'Unsupported opcode %d' % opcode) | |
584 | ||
585 | avm_class.method_pyfunctions[func_name] = resfunc | |
586 | return resfunc | |
587 |