]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import collections | |
4 | import io | |
5 | import zlib | |
6 | ||
7 | from .utils import ( | |
8 | compat_str, | |
9 | ExtractorError, | |
10 | struct_unpack, | |
11 | ) | |
12 | ||
13 | ||
14 | def _extract_tags(file_contents): | |
15 | if file_contents[1:3] != b'WS': | |
16 | raise ExtractorError( | |
17 | 'Not an SWF file; header is %r' % file_contents[:3]) | |
18 | if file_contents[:1] == b'C': | |
19 | content = zlib.decompress(file_contents[8:]) | |
20 | else: | |
21 | raise NotImplementedError( | |
22 | 'Unsupported compression format %r' % | |
23 | file_contents[:1]) | |
24 | ||
25 | # Determine number of bits in framesize rectangle | |
26 | framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 | |
27 | framesize_len = (5 + 4 * framesize_nbits + 7) // 8 | |
28 | ||
29 | pos = framesize_len + 2 + 2 | |
30 | while pos < len(content): | |
31 | header16 = struct_unpack('<H', content[pos:pos + 2])[0] | |
32 | pos += 2 | |
33 | tag_code = header16 >> 6 | |
34 | tag_len = header16 & 0x3f | |
35 | if tag_len == 0x3f: | |
36 | tag_len = struct_unpack('<I', content[pos:pos + 4])[0] | |
37 | pos += 4 | |
38 | assert pos + tag_len <= len(content), \ | |
39 | ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' | |
40 | % (tag_code, pos, tag_len, len(content))) | |
41 | yield (tag_code, content[pos:pos + tag_len]) | |
42 | pos += tag_len | |
43 | ||
44 | ||
45 | class _AVMClass_Object(object): | |
46 | def __init__(self, avm_class): | |
47 | self.avm_class = avm_class | |
48 | ||
49 | def __repr__(self): | |
50 | return '%s#%x' % (self.avm_class.name, id(self)) | |
51 | ||
52 | ||
53 | class _ScopeDict(dict): | |
54 | def __init__(self, avm_class): | |
55 | super(_ScopeDict, self).__init__() | |
56 | self.avm_class = avm_class | |
57 | ||
58 | def __repr__(self): | |
59 | return '%s__Scope(%s)' % ( | |
60 | self.avm_class.name, | |
61 | super(_ScopeDict, self).__repr__()) | |
62 | ||
63 | ||
64 | class _AVMClass(object): | |
65 | def __init__(self, name_idx, name): | |
66 | self.name_idx = name_idx | |
67 | self.name = name | |
68 | self.method_names = {} | |
69 | self.method_idxs = {} | |
70 | self.methods = {} | |
71 | self.method_pyfunctions = {} | |
72 | ||
73 | self.variables = _ScopeDict(self) | |
74 | ||
75 | def make_object(self): | |
76 | return _AVMClass_Object(self) | |
77 | ||
78 | def __repr__(self): | |
79 | return '_AVMClass(%s)' % (self.name) | |
80 | ||
81 | def register_methods(self, methods): | |
82 | self.method_names.update(methods.items()) | |
83 | self.method_idxs.update(dict( | |
84 | (idx, name) | |
85 | for name, idx in methods.items())) | |
86 | ||
87 | ||
88 | class _Multiname(object): | |
89 | def __init__(self, kind): | |
90 | self.kind = kind | |
91 | ||
92 | def __repr__(self): | |
93 | return '[MULTINAME kind: 0x%x]' % self.kind | |
94 | ||
95 | ||
96 | def _read_int(reader): | |
97 | res = 0 | |
98 | shift = 0 | |
99 | for _ in range(5): | |
100 | buf = reader.read(1) | |
101 | assert len(buf) == 1 | |
102 | b = struct_unpack('<B', buf)[0] | |
103 | res = res | ((b & 0x7f) << shift) | |
104 | if b & 0x80 == 0: | |
105 | break | |
106 | shift += 7 | |
107 | return res | |
108 | ||
109 | ||
110 | def _u30(reader): | |
111 | res = _read_int(reader) | |
112 | assert res & 0xf0000000 == 0 | |
113 | return res | |
114 | _u32 = _read_int | |
115 | ||
116 | ||
117 | def _s32(reader): | |
118 | v = _read_int(reader) | |
119 | if v & 0x80000000 != 0: | |
120 | v = - ((v ^ 0xffffffff) + 1) | |
121 | return v | |
122 | ||
123 | ||
124 | def _s24(reader): | |
125 | bs = reader.read(3) | |
126 | assert len(bs) == 3 | |
127 | last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' | |
128 | return struct_unpack('<i', bs + last_byte)[0] | |
129 | ||
130 | ||
131 | def _read_string(reader): | |
132 | slen = _u30(reader) | |
133 | resb = reader.read(slen) | |
134 | assert len(resb) == slen | |
135 | return resb.decode('utf-8') | |
136 | ||
137 | ||
138 | def _read_bytes(count, reader): | |
139 | assert count >= 0 | |
140 | resb = reader.read(count) | |
141 | assert len(resb) == count | |
142 | return resb | |
143 | ||
144 | ||
145 | def _read_byte(reader): | |
146 | resb = _read_bytes(1, reader=reader) | |
147 | res = struct_unpack('<B', resb)[0] | |
148 | return res | |
149 | ||
150 | ||
151 | class SWFInterpreter(object): | |
152 | def __init__(self, file_contents): | |
153 | code_tag = next(tag | |
154 | for tag_code, tag in _extract_tags(file_contents) | |
155 | if tag_code == 82) | |
156 | p = code_tag.index(b'\0', 4) + 1 | |
157 | code_reader = io.BytesIO(code_tag[p:]) | |
158 | ||
159 | # Parse ABC (AVM2 ByteCode) | |
160 | ||
161 | # Define a couple convenience methods | |
162 | u30 = lambda *args: _u30(*args, reader=code_reader) | |
163 | s32 = lambda *args: _s32(*args, reader=code_reader) | |
164 | u32 = lambda *args: _u32(*args, reader=code_reader) | |
165 | read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) | |
166 | read_byte = lambda *args: _read_byte(*args, reader=code_reader) | |
167 | ||
168 | # minor_version + major_version | |
169 | read_bytes(2 + 2) | |
170 | ||
171 | # Constant pool | |
172 | int_count = u30() | |
173 | for _c in range(1, int_count): | |
174 | s32() | |
175 | uint_count = u30() | |
176 | for _c in range(1, uint_count): | |
177 | u32() | |
178 | double_count = u30() | |
179 | read_bytes(max(0, (double_count - 1)) * 8) | |
180 | string_count = u30() | |
181 | self.constant_strings = [''] | |
182 | for _c in range(1, string_count): | |
183 | s = _read_string(code_reader) | |
184 | self.constant_strings.append(s) | |
185 | namespace_count = u30() | |
186 | for _c in range(1, namespace_count): | |
187 | read_bytes(1) # kind | |
188 | u30() # name | |
189 | ns_set_count = u30() | |
190 | for _c in range(1, ns_set_count): | |
191 | count = u30() | |
192 | for _c2 in range(count): | |
193 | u30() | |
194 | multiname_count = u30() | |
195 | MULTINAME_SIZES = { | |
196 | 0x07: 2, # QName | |
197 | 0x0d: 2, # QNameA | |
198 | 0x0f: 1, # RTQName | |
199 | 0x10: 1, # RTQNameA | |
200 | 0x11: 0, # RTQNameL | |
201 | 0x12: 0, # RTQNameLA | |
202 | 0x09: 2, # Multiname | |
203 | 0x0e: 2, # MultinameA | |
204 | 0x1b: 1, # MultinameL | |
205 | 0x1c: 1, # MultinameLA | |
206 | } | |
207 | self.multinames = [''] | |
208 | for _c in range(1, multiname_count): | |
209 | kind = u30() | |
210 | assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind | |
211 | if kind == 0x07: | |
212 | u30() # namespace_idx | |
213 | name_idx = u30() | |
214 | self.multinames.append(self.constant_strings[name_idx]) | |
215 | else: | |
216 | self.multinames.append(_Multiname(kind)) | |
217 | for _c2 in range(MULTINAME_SIZES[kind]): | |
218 | u30() | |
219 | ||
220 | # Methods | |
221 | method_count = u30() | |
222 | MethodInfo = collections.namedtuple( | |
223 | 'MethodInfo', | |
224 | ['NEED_ARGUMENTS', 'NEED_REST']) | |
225 | method_infos = [] | |
226 | for method_id in range(method_count): | |
227 | param_count = u30() | |
228 | u30() # return type | |
229 | for _ in range(param_count): | |
230 | u30() # param type | |
231 | u30() # name index (always 0 for youtube) | |
232 | flags = read_byte() | |
233 | if flags & 0x08 != 0: | |
234 | # Options present | |
235 | option_count = u30() | |
236 | for c in range(option_count): | |
237 | u30() # val | |
238 | read_bytes(1) # kind | |
239 | if flags & 0x80 != 0: | |
240 | # Param names present | |
241 | for _ in range(param_count): | |
242 | u30() # param name | |
243 | mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) | |
244 | method_infos.append(mi) | |
245 | ||
246 | # Metadata | |
247 | metadata_count = u30() | |
248 | for _c in range(metadata_count): | |
249 | u30() # name | |
250 | item_count = u30() | |
251 | for _c2 in range(item_count): | |
252 | u30() # key | |
253 | u30() # value | |
254 | ||
255 | def parse_traits_info(): | |
256 | trait_name_idx = u30() | |
257 | kind_full = read_byte() | |
258 | kind = kind_full & 0x0f | |
259 | attrs = kind_full >> 4 | |
260 | methods = {} | |
261 | if kind in [0x00, 0x06]: # Slot or Const | |
262 | u30() # Slot id | |
263 | u30() # type_name_idx | |
264 | vindex = u30() | |
265 | if vindex != 0: | |
266 | read_byte() # vkind | |
267 | elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter | |
268 | u30() # disp_id | |
269 | method_idx = u30() | |
270 | methods[self.multinames[trait_name_idx]] = method_idx | |
271 | elif kind == 0x04: # Class | |
272 | u30() # slot_id | |
273 | u30() # classi | |
274 | elif kind == 0x05: # Function | |
275 | u30() # slot_id | |
276 | function_idx = u30() | |
277 | methods[function_idx] = self.multinames[trait_name_idx] | |
278 | else: | |
279 | raise ExtractorError('Unsupported trait kind %d' % kind) | |
280 | ||
281 | if attrs & 0x4 != 0: # Metadata present | |
282 | metadata_count = u30() | |
283 | for _c3 in range(metadata_count): | |
284 | u30() # metadata index | |
285 | ||
286 | return methods | |
287 | ||
288 | # Classes | |
289 | class_count = u30() | |
290 | classes = [] | |
291 | for class_id in range(class_count): | |
292 | name_idx = u30() | |
293 | ||
294 | cname = self.multinames[name_idx] | |
295 | avm_class = _AVMClass(name_idx, cname) | |
296 | classes.append(avm_class) | |
297 | ||
298 | u30() # super_name idx | |
299 | flags = read_byte() | |
300 | if flags & 0x08 != 0: # Protected namespace is present | |
301 | u30() # protected_ns_idx | |
302 | intrf_count = u30() | |
303 | for _c2 in range(intrf_count): | |
304 | u30() | |
305 | u30() # iinit | |
306 | trait_count = u30() | |
307 | for _c2 in range(trait_count): | |
308 | trait_methods = parse_traits_info() | |
309 | avm_class.register_methods(trait_methods) | |
310 | ||
311 | assert len(classes) == class_count | |
312 | self._classes_by_name = dict((c.name, c) for c in classes) | |
313 | ||
314 | for avm_class in classes: | |
315 | u30() # cinit | |
316 | trait_count = u30() | |
317 | for _c2 in range(trait_count): | |
318 | trait_methods = parse_traits_info() | |
319 | avm_class.register_methods(trait_methods) | |
320 | ||
321 | # Scripts | |
322 | script_count = u30() | |
323 | for _c in range(script_count): | |
324 | u30() # init | |
325 | trait_count = u30() | |
326 | for _c2 in range(trait_count): | |
327 | parse_traits_info() | |
328 | ||
329 | # Method bodies | |
330 | method_body_count = u30() | |
331 | Method = collections.namedtuple('Method', ['code', 'local_count']) | |
332 | for _c in range(method_body_count): | |
333 | method_idx = u30() | |
334 | u30() # max_stack | |
335 | local_count = u30() | |
336 | u30() # init_scope_depth | |
337 | u30() # max_scope_depth | |
338 | code_length = u30() | |
339 | code = read_bytes(code_length) | |
340 | for avm_class in classes: | |
341 | if method_idx in avm_class.method_idxs: | |
342 | m = Method(code, local_count) | |
343 | avm_class.methods[avm_class.method_idxs[method_idx]] = m | |
344 | exception_count = u30() | |
345 | for _c2 in range(exception_count): | |
346 | u30() # from | |
347 | u30() # to | |
348 | u30() # target | |
349 | u30() # exc_type | |
350 | u30() # var_name | |
351 | trait_count = u30() | |
352 | for _c2 in range(trait_count): | |
353 | parse_traits_info() | |
354 | ||
355 | assert p + code_reader.tell() == len(code_tag) | |
356 | ||
357 | def extract_class(self, class_name): | |
358 | try: | |
359 | return self._classes_by_name[class_name] | |
360 | except KeyError: | |
361 | raise ExtractorError('Class %r not found' % class_name) | |
362 | ||
363 | def extract_function(self, avm_class, func_name): | |
364 | if func_name in avm_class.method_pyfunctions: | |
365 | return avm_class.method_pyfunctions[func_name] | |
366 | if func_name in self._classes_by_name: | |
367 | return self._classes_by_name[func_name].make_object() | |
368 | if func_name not in avm_class.methods: | |
369 | raise ExtractorError('Cannot find function %s.%s' % ( | |
370 | avm_class.name, func_name)) | |
371 | m = avm_class.methods[func_name] | |
372 | ||
373 | def resfunc(args): | |
374 | # Helper functions | |
375 | coder = io.BytesIO(m.code) | |
376 | s24 = lambda: _s24(coder) | |
377 | u30 = lambda: _u30(coder) | |
378 | ||
379 | registers = [avm_class.variables] + list(args) + [None] * m.local_count | |
380 | stack = [] | |
381 | scopes = collections.deque([ | |
382 | self._classes_by_name, avm_class.variables]) | |
383 | while True: | |
384 | opcode = _read_byte(coder) | |
385 | if opcode == 17: # iftrue | |
386 | offset = s24() | |
387 | value = stack.pop() | |
388 | if value: | |
389 | coder.seek(coder.tell() + offset) | |
390 | elif opcode == 18: # iffalse | |
391 | offset = s24() | |
392 | value = stack.pop() | |
393 | if not value: | |
394 | coder.seek(coder.tell() + offset) | |
395 | elif opcode == 36: # pushbyte | |
396 | v = _read_byte(coder) | |
397 | stack.append(v) | |
398 | elif opcode == 42: # dup | |
399 | value = stack[-1] | |
400 | stack.append(value) | |
401 | elif opcode == 44: # pushstring | |
402 | idx = u30() | |
403 | stack.append(self.constant_strings[idx]) | |
404 | elif opcode == 48: # pushscope | |
405 | new_scope = stack.pop() | |
406 | scopes.append(new_scope) | |
407 | elif opcode == 66: # construct | |
408 | arg_count = u30() | |
409 | args = list(reversed( | |
410 | [stack.pop() for _ in range(arg_count)])) | |
411 | obj = stack.pop() | |
412 | res = obj.avm_class.make_object() | |
413 | stack.append(res) | |
414 | elif opcode == 70: # callproperty | |
415 | index = u30() | |
416 | mname = self.multinames[index] | |
417 | arg_count = u30() | |
418 | args = list(reversed( | |
419 | [stack.pop() for _ in range(arg_count)])) | |
420 | obj = stack.pop() | |
421 | ||
422 | if isinstance(obj, _AVMClass_Object): | |
423 | func = self.extract_function(obj.avm_class, mname) | |
424 | res = func(args) | |
425 | stack.append(res) | |
426 | continue | |
427 | elif isinstance(obj, _ScopeDict): | |
428 | if mname in obj.avm_class.method_names: | |
429 | func = self.extract_function(obj.avm_class, mname) | |
430 | res = func(args) | |
431 | else: | |
432 | res = obj[mname] | |
433 | stack.append(res) | |
434 | continue | |
435 | elif isinstance(obj, compat_str): | |
436 | if mname == 'split': | |
437 | assert len(args) == 1 | |
438 | assert isinstance(args[0], compat_str) | |
439 | if args[0] == '': | |
440 | res = list(obj) | |
441 | else: | |
442 | res = obj.split(args[0]) | |
443 | stack.append(res) | |
444 | continue | |
445 | elif isinstance(obj, list): | |
446 | if mname == 'slice': | |
447 | assert len(args) == 1 | |
448 | assert isinstance(args[0], int) | |
449 | res = obj[args[0]:] | |
450 | stack.append(res) | |
451 | continue | |
452 | elif mname == 'join': | |
453 | assert len(args) == 1 | |
454 | assert isinstance(args[0], compat_str) | |
455 | res = args[0].join(obj) | |
456 | stack.append(res) | |
457 | continue | |
458 | raise NotImplementedError( | |
459 | 'Unsupported property %r on %r' | |
460 | % (mname, obj)) | |
461 | elif opcode == 72: # returnvalue | |
462 | res = stack.pop() | |
463 | return res | |
464 | elif opcode == 74: # constructproperty | |
465 | index = u30() | |
466 | arg_count = u30() | |
467 | args = list(reversed( | |
468 | [stack.pop() for _ in range(arg_count)])) | |
469 | obj = stack.pop() | |
470 | ||
471 | mname = self.multinames[index] | |
472 | assert isinstance(obj, _AVMClass) | |
473 | ||
474 | # We do not actually call the constructor for now; | |
475 | # we just pretend it does nothing | |
476 | stack.append(obj.make_object()) | |
477 | elif opcode == 79: # callpropvoid | |
478 | index = u30() | |
479 | mname = self.multinames[index] | |
480 | arg_count = u30() | |
481 | args = list(reversed( | |
482 | [stack.pop() for _ in range(arg_count)])) | |
483 | obj = stack.pop() | |
484 | if mname == 'reverse': | |
485 | assert isinstance(obj, list) | |
486 | obj.reverse() | |
487 | else: | |
488 | raise NotImplementedError( | |
489 | 'Unsupported (void) property %r on %r' | |
490 | % (mname, obj)) | |
491 | elif opcode == 86: # newarray | |
492 | arg_count = u30() | |
493 | arr = [] | |
494 | for i in range(arg_count): | |
495 | arr.append(stack.pop()) | |
496 | arr = arr[::-1] | |
497 | stack.append(arr) | |
498 | elif opcode == 93: # findpropstrict | |
499 | index = u30() | |
500 | mname = self.multinames[index] | |
501 | for s in reversed(scopes): | |
502 | if mname in s: | |
503 | res = s | |
504 | break | |
505 | else: | |
506 | res = scopes[0] | |
507 | stack.append(res[mname]) | |
508 | elif opcode == 94: # findproperty | |
509 | index = u30() | |
510 | mname = self.multinames[index] | |
511 | for s in reversed(scopes): | |
512 | if mname in s: | |
513 | res = s | |
514 | break | |
515 | else: | |
516 | res = avm_class.variables | |
517 | stack.append(res) | |
518 | elif opcode == 96: # getlex | |
519 | index = u30() | |
520 | mname = self.multinames[index] | |
521 | for s in reversed(scopes): | |
522 | if mname in s: | |
523 | scope = s | |
524 | break | |
525 | else: | |
526 | scope = avm_class.variables | |
527 | # I cannot find where static variables are initialized | |
528 | # so let's just return None | |
529 | res = scope.get(mname) | |
530 | stack.append(res) | |
531 | elif opcode == 97: # setproperty | |
532 | index = u30() | |
533 | value = stack.pop() | |
534 | idx = self.multinames[index] | |
535 | if isinstance(idx, _Multiname): | |
536 | idx = stack.pop() | |
537 | obj = stack.pop() | |
538 | obj[idx] = value | |
539 | elif opcode == 98: # getlocal | |
540 | index = u30() | |
541 | stack.append(registers[index]) | |
542 | elif opcode == 99: # setlocal | |
543 | index = u30() | |
544 | value = stack.pop() | |
545 | registers[index] = value | |
546 | elif opcode == 102: # getproperty | |
547 | index = u30() | |
548 | pname = self.multinames[index] | |
549 | if pname == 'length': | |
550 | obj = stack.pop() | |
551 | assert isinstance(obj, list) | |
552 | stack.append(len(obj)) | |
553 | else: # Assume attribute access | |
554 | idx = stack.pop() | |
555 | assert isinstance(idx, int) | |
556 | obj = stack.pop() | |
557 | assert isinstance(obj, list) | |
558 | stack.append(obj[idx]) | |
559 | elif opcode == 115: # convert_ | |
560 | value = stack.pop() | |
561 | intvalue = int(value) | |
562 | stack.append(intvalue) | |
563 | elif opcode == 128: # coerce | |
564 | u30() | |
565 | elif opcode == 133: # coerce_s | |
566 | assert isinstance(stack[-1], (type(None), compat_str)) | |
567 | elif opcode == 160: # add | |
568 | value2 = stack.pop() | |
569 | value1 = stack.pop() | |
570 | res = value1 + value2 | |
571 | stack.append(res) | |
572 | elif opcode == 161: # subtract | |
573 | value2 = stack.pop() | |
574 | value1 = stack.pop() | |
575 | res = value1 - value2 | |
576 | stack.append(res) | |
577 | elif opcode == 164: # modulo | |
578 | value2 = stack.pop() | |
579 | value1 = stack.pop() | |
580 | res = value1 % value2 | |
581 | stack.append(res) | |
582 | elif opcode == 175: # greaterequals | |
583 | value2 = stack.pop() | |
584 | value1 = stack.pop() | |
585 | result = value1 >= value2 | |
586 | stack.append(result) | |
587 | elif opcode == 208: # getlocal_0 | |
588 | stack.append(registers[0]) | |
589 | elif opcode == 209: # getlocal_1 | |
590 | stack.append(registers[1]) | |
591 | elif opcode == 210: # getlocal_2 | |
592 | stack.append(registers[2]) | |
593 | elif opcode == 211: # getlocal_3 | |
594 | stack.append(registers[3]) | |
595 | elif opcode == 212: # setlocal_0 | |
596 | registers[0] = stack.pop() | |
597 | elif opcode == 213: # setlocal_1 | |
598 | registers[1] = stack.pop() | |
599 | elif opcode == 214: # setlocal_2 | |
600 | registers[2] = stack.pop() | |
601 | elif opcode == 215: # setlocal_3 | |
602 | registers[3] = stack.pop() | |
603 | else: | |
604 | raise NotImplementedError( | |
605 | 'Unsupported opcode %d' % opcode) | |
606 | ||
607 | avm_class.method_pyfunctions[func_name] = resfunc | |
608 | return resfunc | |
609 |