]>
Commit | Line | Data |
---|---|---|
2b25cb5d PH |
1 | from __future__ import unicode_literals |
2 | ||
825abb81 | 3 | import json |
2b25cb5d PH |
4 | import re |
5 | ||
6 | from .utils import ( | |
7 | ExtractorError, | |
8 | ) | |
9 | ||
10 | ||
11 | class JSInterpreter(object): | |
12 | def __init__(self, code): | |
13 | self.code = code | |
14 | self._functions = {} | |
ad25aee2 | 15 | self._objects = {} |
2b25cb5d PH |
16 | |
17 | def interpret_statement(self, stmt, local_vars, allow_recursion=20): | |
18 | if allow_recursion < 0: | |
19 | raise ExtractorError('Recursion limit reached') | |
20 | ||
21 | if stmt.startswith('var '): | |
22 | stmt = stmt[len('var '):] | |
23 | ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + | |
24 | r'=(?P<expr>.*)$', stmt) | |
25 | if ass_m: | |
26 | if ass_m.groupdict().get('index'): | |
27 | def assign(val): | |
28 | lvar = local_vars[ass_m.group('out')] | |
29 | idx = self.interpret_expression( | |
30 | ass_m.group('index'), local_vars, allow_recursion) | |
31 | assert isinstance(idx, int) | |
32 | lvar[idx] = val | |
33 | return val | |
34 | expr = ass_m.group('expr') | |
35 | else: | |
36 | def assign(val): | |
37 | local_vars[ass_m.group('out')] = val | |
38 | return val | |
39 | expr = ass_m.group('expr') | |
40 | elif stmt.startswith('return '): | |
41 | assign = lambda v: v | |
42 | expr = stmt[len('return '):] | |
43 | else: | |
825abb81 PH |
44 | # Try interpreting it as an expression |
45 | expr = stmt | |
46 | assign = lambda v: v | |
2b25cb5d PH |
47 | |
48 | v = self.interpret_expression(expr, local_vars, allow_recursion) | |
49 | return assign(v) | |
50 | ||
51 | def interpret_expression(self, expr, local_vars, allow_recursion): | |
52 | if expr.isdigit(): | |
53 | return int(expr) | |
54 | ||
55 | if expr.isalpha(): | |
56 | return local_vars[expr] | |
57 | ||
825abb81 PH |
58 | try: |
59 | return json.loads(expr) | |
60 | except ValueError: | |
61 | pass | |
62 | ||
63 | m = re.match( | |
85b92755 | 64 | r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', |
825abb81 | 65 | expr) |
2b25cb5d | 66 | if m: |
825abb81 | 67 | variable = m.group('var') |
2b25cb5d | 68 | member = m.group('member') |
825abb81 | 69 | arg_str = m.group('args') |
ad25aee2 | 70 | |
825abb81 PH |
71 | if variable in local_vars: |
72 | obj = local_vars[variable] | |
73 | else: | |
ad25aee2 JMF |
74 | if variable not in self._objects: |
75 | self._objects[variable] = self.extract_object(variable) | |
76 | obj = self._objects[variable] | |
825abb81 PH |
77 | |
78 | if arg_str is None: | |
79 | # Member access | |
80 | if member == 'length': | |
81 | return len(obj) | |
82 | return obj[member] | |
83 | ||
84 | assert expr.endswith(')') | |
85 | # Function call | |
86 | if arg_str == '': | |
87 | argvals = tuple() | |
88 | else: | |
89 | argvals = tuple([ | |
90 | self.interpret_expression(v, local_vars, allow_recursion) | |
91 | for v in arg_str.split(',')]) | |
92 | ||
93 | if member == 'split': | |
94 | assert argvals == ('',) | |
95 | return list(obj) | |
96 | if member == 'join': | |
97 | assert len(argvals) == 1 | |
98 | return argvals[0].join(obj) | |
99 | if member == 'reverse': | |
100 | assert len(argvals) == 0 | |
ebe832dc JMF |
101 | obj.reverse() |
102 | return obj | |
825abb81 PH |
103 | if member == 'slice': |
104 | assert len(argvals) == 1 | |
105 | return obj[argvals[0]:] | |
106 | if member == 'splice': | |
107 | assert isinstance(obj, list) | |
108 | index, howMany = argvals | |
109 | res = [] | |
110 | for i in range(index, min(index + howMany, len(obj))): | |
4cb71e9b | 111 | res.append(obj.pop(index)) |
825abb81 PH |
112 | return res |
113 | ||
114 | return obj[member](argvals) | |
2b25cb5d PH |
115 | |
116 | m = re.match( | |
117 | r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) | |
118 | if m: | |
119 | val = local_vars[m.group('in')] | |
120 | idx = self.interpret_expression( | |
121 | m.group('idx'), local_vars, allow_recursion - 1) | |
122 | return val[idx] | |
123 | ||
124 | m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) | |
125 | if m: | |
126 | a = self.interpret_expression( | |
127 | m.group('a'), local_vars, allow_recursion) | |
128 | b = self.interpret_expression( | |
129 | m.group('b'), local_vars, allow_recursion) | |
130 | return a % b | |
131 | ||
132 | m = re.match( | |
892e3192 | 133 | r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) |
2b25cb5d PH |
134 | if m: |
135 | fname = m.group('func') | |
825abb81 PH |
136 | argvals = tuple([ |
137 | int(v) if v.isdigit() else local_vars[v] | |
138 | for v in m.group('args').split(',')]) | |
2b25cb5d PH |
139 | if fname not in self._functions: |
140 | self._functions[fname] = self.extract_function(fname) | |
2b25cb5d PH |
141 | return self._functions[fname](argvals) |
142 | raise ExtractorError('Unsupported JS expression %r' % expr) | |
143 | ||
ad25aee2 JMF |
144 | def extract_object(self, objname): |
145 | obj = {} | |
146 | obj_m = re.search( | |
147 | (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + | |
9f43890b | 148 | r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + |
ad25aee2 JMF |
149 | r'\}\s*;', |
150 | self.code) | |
151 | fields = obj_m.group('fields') | |
152 | # Currently, it only supports function definitions | |
153 | fields_m = re.finditer( | |
9f43890b | 154 | r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function' |
ad25aee2 JMF |
155 | r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', |
156 | fields) | |
157 | for f in fields_m: | |
158 | argnames = f.group('args').split(',') | |
159 | obj[f.group('key')] = self.build_function(argnames, f.group('code')) | |
160 | ||
161 | return obj | |
162 | ||
2b25cb5d PH |
163 | def extract_function(self, funcname): |
164 | func_m = re.search( | |
fc040bfd | 165 | (r'(?:function %s|[{;]%s\s*=\s*function)' % ( |
77ffa957 | 166 | re.escape(funcname), re.escape(funcname))) + |
2b25cb5d PH |
167 | r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', |
168 | self.code) | |
77ffa957 PH |
169 | if func_m is None: |
170 | raise ExtractorError('Could not find JS function %r' % funcname) | |
2b25cb5d PH |
171 | argnames = func_m.group('args').split(',') |
172 | ||
ad25aee2 JMF |
173 | return self.build_function(argnames, func_m.group('code')) |
174 | ||
175 | def build_function(self, argnames, code): | |
2b25cb5d PH |
176 | def resf(args): |
177 | local_vars = dict(zip(argnames, args)) | |
ad25aee2 | 178 | for stmt in code.split(';'): |
2b25cb5d PH |
179 | res = self.interpret_statement(stmt, local_vars) |
180 | return res | |
181 | return resf |