]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import json | |
4 | import operator | |
5 | import re | |
6 | ||
7 | from .utils import ( | |
8 | ExtractorError, | |
9 | ) | |
10 | ||
11 | _OPERATORS = [ | |
12 | ('|', operator.or_), | |
13 | ('^', operator.xor), | |
14 | ('&', operator.and_), | |
15 | ('>>', operator.rshift), | |
16 | ('<<', operator.lshift), | |
17 | ('-', operator.sub), | |
18 | ('+', operator.add), | |
19 | ('%', operator.mod), | |
20 | ('/', operator.truediv), | |
21 | ('*', operator.mul), | |
22 | ] | |
23 | _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] | |
24 | _ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) | |
25 | ||
26 | _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' | |
27 | ||
28 | ||
29 | class JSInterpreter(object): | |
30 | def __init__(self, code, objects=None): | |
31 | if objects is None: | |
32 | objects = {} | |
33 | self.code = code | |
34 | self._functions = {} | |
35 | self._objects = objects | |
36 | ||
37 | def interpret_statement(self, stmt, local_vars, allow_recursion=100): | |
38 | if allow_recursion < 0: | |
39 | raise ExtractorError('Recursion limit reached') | |
40 | ||
41 | should_abort = False | |
42 | stmt = stmt.lstrip() | |
43 | stmt_m = re.match(r'var\s', stmt) | |
44 | if stmt_m: | |
45 | expr = stmt[len(stmt_m.group(0)):] | |
46 | else: | |
47 | return_m = re.match(r'return(?:\s+|$)', stmt) | |
48 | if return_m: | |
49 | expr = stmt[len(return_m.group(0)):] | |
50 | should_abort = True | |
51 | else: | |
52 | # Try interpreting it as an expression | |
53 | expr = stmt | |
54 | ||
55 | v = self.interpret_expression(expr, local_vars, allow_recursion) | |
56 | return v, should_abort | |
57 | ||
58 | def interpret_expression(self, expr, local_vars, allow_recursion): | |
59 | expr = expr.strip() | |
60 | ||
61 | if expr == '': # Empty expression | |
62 | return None | |
63 | ||
64 | if expr.startswith('('): | |
65 | parens_count = 0 | |
66 | for m in re.finditer(r'[()]', expr): | |
67 | if m.group(0) == '(': | |
68 | parens_count += 1 | |
69 | else: | |
70 | parens_count -= 1 | |
71 | if parens_count == 0: | |
72 | sub_expr = expr[1:m.start()] | |
73 | sub_result = self.interpret_expression( | |
74 | sub_expr, local_vars, allow_recursion) | |
75 | remaining_expr = expr[m.end():].strip() | |
76 | if not remaining_expr: | |
77 | return sub_result | |
78 | else: | |
79 | expr = json.dumps(sub_result) + remaining_expr | |
80 | break | |
81 | else: | |
82 | raise ExtractorError('Premature end of parens in %r' % expr) | |
83 | ||
84 | for op, opfunc in _ASSIGN_OPERATORS: | |
85 | m = re.match(r'''(?x) | |
86 | (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? | |
87 | \s*%s | |
88 | (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr) | |
89 | if not m: | |
90 | continue | |
91 | right_val = self.interpret_expression( | |
92 | m.group('expr'), local_vars, allow_recursion - 1) | |
93 | ||
94 | if m.groupdict().get('index'): | |
95 | lvar = local_vars[m.group('out')] | |
96 | idx = self.interpret_expression( | |
97 | m.group('index'), local_vars, allow_recursion) | |
98 | assert isinstance(idx, int) | |
99 | cur = lvar[idx] | |
100 | val = opfunc(cur, right_val) | |
101 | lvar[idx] = val | |
102 | return val | |
103 | else: | |
104 | cur = local_vars.get(m.group('out')) | |
105 | val = opfunc(cur, right_val) | |
106 | local_vars[m.group('out')] = val | |
107 | return val | |
108 | ||
109 | if expr.isdigit(): | |
110 | return int(expr) | |
111 | ||
112 | var_m = re.match( | |
113 | r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE, | |
114 | expr) | |
115 | if var_m: | |
116 | return local_vars[var_m.group('name')] | |
117 | ||
118 | try: | |
119 | return json.loads(expr) | |
120 | except ValueError: | |
121 | pass | |
122 | ||
123 | m = re.match( | |
124 | r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE, | |
125 | expr) | |
126 | if m: | |
127 | variable = m.group('var') | |
128 | member = m.group('member') | |
129 | arg_str = m.group('args') | |
130 | ||
131 | if variable in local_vars: | |
132 | obj = local_vars[variable] | |
133 | else: | |
134 | if variable not in self._objects: | |
135 | self._objects[variable] = self.extract_object(variable) | |
136 | obj = self._objects[variable] | |
137 | ||
138 | if arg_str is None: | |
139 | # Member access | |
140 | if member == 'length': | |
141 | return len(obj) | |
142 | return obj[member] | |
143 | ||
144 | assert expr.endswith(')') | |
145 | # Function call | |
146 | if arg_str == '': | |
147 | argvals = tuple() | |
148 | else: | |
149 | argvals = tuple([ | |
150 | self.interpret_expression(v, local_vars, allow_recursion) | |
151 | for v in arg_str.split(',')]) | |
152 | ||
153 | if member == 'split': | |
154 | assert argvals == ('',) | |
155 | return list(obj) | |
156 | if member == 'join': | |
157 | assert len(argvals) == 1 | |
158 | return argvals[0].join(obj) | |
159 | if member == 'reverse': | |
160 | assert len(argvals) == 0 | |
161 | obj.reverse() | |
162 | return obj | |
163 | if member == 'slice': | |
164 | assert len(argvals) == 1 | |
165 | return obj[argvals[0]:] | |
166 | if member == 'splice': | |
167 | assert isinstance(obj, list) | |
168 | index, howMany = argvals | |
169 | res = [] | |
170 | for i in range(index, min(index + howMany, len(obj))): | |
171 | res.append(obj.pop(index)) | |
172 | return res | |
173 | ||
174 | return obj[member](argvals) | |
175 | ||
176 | m = re.match( | |
177 | r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr) | |
178 | if m: | |
179 | val = local_vars[m.group('in')] | |
180 | idx = self.interpret_expression( | |
181 | m.group('idx'), local_vars, allow_recursion - 1) | |
182 | return val[idx] | |
183 | ||
184 | for op, opfunc in _OPERATORS: | |
185 | m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr) | |
186 | if not m: | |
187 | continue | |
188 | x, abort = self.interpret_statement( | |
189 | m.group('x'), local_vars, allow_recursion - 1) | |
190 | if abort: | |
191 | raise ExtractorError( | |
192 | 'Premature left-side return of %s in %r' % (op, expr)) | |
193 | y, abort = self.interpret_statement( | |
194 | m.group('y'), local_vars, allow_recursion - 1) | |
195 | if abort: | |
196 | raise ExtractorError( | |
197 | 'Premature right-side return of %s in %r' % (op, expr)) | |
198 | return opfunc(x, y) | |
199 | ||
200 | m = re.match( | |
201 | r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr) | |
202 | if m: | |
203 | fname = m.group('func') | |
204 | argvals = tuple([ | |
205 | int(v) if v.isdigit() else local_vars[v] | |
206 | for v in m.group('args').split(',')]) | |
207 | if fname not in self._functions: | |
208 | self._functions[fname] = self.extract_function(fname) | |
209 | return self._functions[fname](argvals) | |
210 | ||
211 | raise ExtractorError('Unsupported JS expression %r' % expr) | |
212 | ||
213 | def extract_object(self, objname): | |
214 | obj = {} | |
215 | obj_m = re.search( | |
216 | (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + | |
217 | r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' + | |
218 | r'\}\s*;', | |
219 | self.code) | |
220 | fields = obj_m.group('fields') | |
221 | # Currently, it only supports function definitions | |
222 | fields_m = re.finditer( | |
223 | r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function' | |
224 | r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', | |
225 | fields) | |
226 | for f in fields_m: | |
227 | argnames = f.group('args').split(',') | |
228 | obj[f.group('key')] = self.build_function(argnames, f.group('code')) | |
229 | ||
230 | return obj | |
231 | ||
232 | def extract_function(self, funcname): | |
233 | func_m = re.search( | |
234 | r'''(?x) | |
235 | (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | |
236 | \((?P<args>[^)]*)\)\s* | |
237 | \{(?P<code>[^}]+)\}''' % ( | |
238 | re.escape(funcname), re.escape(funcname), re.escape(funcname)), | |
239 | self.code) | |
240 | if func_m is None: | |
241 | raise ExtractorError('Could not find JS function %r' % funcname) | |
242 | argnames = func_m.group('args').split(',') | |
243 | ||
244 | return self.build_function(argnames, func_m.group('code')) | |
245 | ||
246 | def call_function(self, funcname, *args): | |
247 | f = self.extract_function(funcname) | |
248 | return f(args) | |
249 | ||
250 | def build_function(self, argnames, code): | |
251 | def resf(args): | |
252 | local_vars = dict(zip(argnames, args)) | |
253 | for stmt in code.split(';'): | |
254 | res, abort = self.interpret_statement(stmt, local_vars) | |
255 | if abort: | |
256 | break | |
257 | return res | |
258 | return resf |