]>
Commit | Line | Data |
---|---|---|
2b25cb5d PH |
1 | from __future__ import unicode_literals |
2 | ||
825abb81 | 3 | import json |
9e3f1991 | 4 | import operator |
2b25cb5d PH |
5 | import re |
6 | ||
7 | from .utils import ( | |
8 | ExtractorError, | |
7769f837 | 9 | remove_quotes, |
2b25cb5d PH |
10 | ) |
11 | ||
9e3f1991 PH |
12 | _OPERATORS = [ |
13 | ('|', operator.or_), | |
14 | ('^', operator.xor), | |
15 | ('&', operator.and_), | |
16 | ('>>', operator.rshift), | |
17 | ('<<', operator.lshift), | |
18 | ('-', operator.sub), | |
19 | ('+', operator.add), | |
20 | ('%', operator.mod), | |
8cfb6efe | 21 | ('/', operator.truediv), |
9e3f1991 PH |
22 | ('*', operator.mul), |
23 | ] | |
24 | _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] | |
25 | _ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) | |
26 | ||
27 | _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' | |
28 | ||
2b25cb5d PH |
29 | |
30 | class JSInterpreter(object): | |
9e3f1991 PH |
31 | def __init__(self, code, objects=None): |
32 | if objects is None: | |
33 | objects = {} | |
3eff81fb | 34 | self.code = code |
2b25cb5d | 35 | self._functions = {} |
9e3f1991 PH |
36 | self._objects = objects |
37 | ||
9e3f1991 | 38 | def interpret_statement(self, stmt, local_vars, allow_recursion=100): |
2b25cb5d PH |
39 | if allow_recursion < 0: |
40 | raise ExtractorError('Recursion limit reached') | |
41 | ||
9e3f1991 PH |
42 | should_abort = False |
43 | stmt = stmt.lstrip() | |
44 | stmt_m = re.match(r'var\s', stmt) | |
45 | if stmt_m: | |
46 | expr = stmt[len(stmt_m.group(0)):] | |
2b25cb5d | 47 | else: |
9e3f1991 PH |
48 | return_m = re.match(r'return(?:\s+|$)', stmt) |
49 | if return_m: | |
50 | expr = stmt[len(return_m.group(0)):] | |
51 | should_abort = True | |
52 | else: | |
53 | # Try interpreting it as an expression | |
54 | expr = stmt | |
2b25cb5d PH |
55 | |
56 | v = self.interpret_expression(expr, local_vars, allow_recursion) | |
9e3f1991 | 57 | return v, should_abort |
2b25cb5d PH |
58 | |
59 | def interpret_expression(self, expr, local_vars, allow_recursion): | |
9e3f1991 | 60 | expr = expr.strip() |
9e3f1991 PH |
61 | if expr == '': # Empty expression |
62 | return None | |
63 | ||
64 | if expr.startswith('('): | |
65 | parens_count = 0 | |
66 | for m in re.finditer(r'[()]', expr): | |
67 | if m.group(0) == '(': | |
68 | parens_count += 1 | |
69 | else: | |
70 | parens_count -= 1 | |
71 | if parens_count == 0: | |
72 | sub_expr = expr[1:m.start()] | |
73 | sub_result = self.interpret_expression( | |
74 | sub_expr, local_vars, allow_recursion) | |
75 | remaining_expr = expr[m.end():].strip() | |
76 | if not remaining_expr: | |
77 | return sub_result | |
78 | else: | |
79 | expr = json.dumps(sub_result) + remaining_expr | |
80 | break | |
81 | else: | |
82 | raise ExtractorError('Premature end of parens in %r' % expr) | |
83 | ||
84 | for op, opfunc in _ASSIGN_OPERATORS: | |
85 | m = re.match(r'''(?x) | |
86 | (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? | |
87 | \s*%s | |
88 | (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr) | |
89 | if not m: | |
90 | continue | |
91 | right_val = self.interpret_expression( | |
92 | m.group('expr'), local_vars, allow_recursion - 1) | |
93 | ||
94 | if m.groupdict().get('index'): | |
95 | lvar = local_vars[m.group('out')] | |
96 | idx = self.interpret_expression( | |
97 | m.group('index'), local_vars, allow_recursion) | |
98 | assert isinstance(idx, int) | |
99 | cur = lvar[idx] | |
100 | val = opfunc(cur, right_val) | |
101 | lvar[idx] = val | |
102 | return val | |
103 | else: | |
104 | cur = local_vars.get(m.group('out')) | |
105 | val = opfunc(cur, right_val) | |
106 | local_vars[m.group('out')] = val | |
107 | return val | |
108 | ||
2b25cb5d PH |
109 | if expr.isdigit(): |
110 | return int(expr) | |
111 | ||
9e3f1991 PH |
112 | var_m = re.match( |
113 | r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE, | |
114 | expr) | |
115 | if var_m: | |
116 | return local_vars[var_m.group('name')] | |
2b25cb5d | 117 | |
825abb81 PH |
118 | try: |
119 | return json.loads(expr) | |
120 | except ValueError: | |
121 | pass | |
122 | ||
123 | m = re.match( | |
7769f837 S |
124 | r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr) |
125 | if m: | |
126 | val = local_vars[m.group('in')] | |
127 | idx = self.interpret_expression( | |
128 | m.group('idx'), local_vars, allow_recursion - 1) | |
129 | return val[idx] | |
130 | ||
131 | m = re.match( | |
132 | r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE, | |
825abb81 | 133 | expr) |
2b25cb5d | 134 | if m: |
825abb81 | 135 | variable = m.group('var') |
7769f837 | 136 | member = remove_quotes(m.group('member') or m.group('member2')) |
825abb81 | 137 | arg_str = m.group('args') |
ad25aee2 | 138 | |
825abb81 PH |
139 | if variable in local_vars: |
140 | obj = local_vars[variable] | |
141 | else: | |
1f749b66 PH |
142 | if variable not in self._objects: |
143 | self._objects[variable] = self.extract_object(variable) | |
144 | obj = self._objects[variable] | |
825abb81 PH |
145 | |
146 | if arg_str is None: | |
147 | # Member access | |
148 | if member == 'length': | |
149 | return len(obj) | |
150 | return obj[member] | |
151 | ||
152 | assert expr.endswith(')') | |
153 | # Function call | |
154 | if arg_str == '': | |
155 | argvals = tuple() | |
156 | else: | |
157 | argvals = tuple([ | |
158 | self.interpret_expression(v, local_vars, allow_recursion) | |
159 | for v in arg_str.split(',')]) | |
160 | ||
161 | if member == 'split': | |
162 | assert argvals == ('',) | |
163 | return list(obj) | |
164 | if member == 'join': | |
165 | assert len(argvals) == 1 | |
166 | return argvals[0].join(obj) | |
167 | if member == 'reverse': | |
168 | assert len(argvals) == 0 | |
ebe832dc JMF |
169 | obj.reverse() |
170 | return obj | |
825abb81 PH |
171 | if member == 'slice': |
172 | assert len(argvals) == 1 | |
173 | return obj[argvals[0]:] | |
174 | if member == 'splice': | |
175 | assert isinstance(obj, list) | |
176 | index, howMany = argvals | |
177 | res = [] | |
178 | for i in range(index, min(index + howMany, len(obj))): | |
4cb71e9b | 179 | res.append(obj.pop(index)) |
825abb81 PH |
180 | return res |
181 | ||
182 | return obj[member](argvals) | |
2b25cb5d | 183 | |
9e3f1991 PH |
184 | for op, opfunc in _OPERATORS: |
185 | m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr) | |
186 | if not m: | |
187 | continue | |
188 | x, abort = self.interpret_statement( | |
189 | m.group('x'), local_vars, allow_recursion - 1) | |
190 | if abort: | |
191 | raise ExtractorError( | |
192 | 'Premature left-side return of %s in %r' % (op, expr)) | |
193 | y, abort = self.interpret_statement( | |
194 | m.group('y'), local_vars, allow_recursion - 1) | |
195 | if abort: | |
196 | raise ExtractorError( | |
197 | 'Premature right-side return of %s in %r' % (op, expr)) | |
198 | return opfunc(x, y) | |
2b25cb5d PH |
199 | |
200 | m = re.match( | |
189935f1 | 201 | r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) |
2b25cb5d PH |
202 | if m: |
203 | fname = m.group('func') | |
825abb81 PH |
204 | argvals = tuple([ |
205 | int(v) if v.isdigit() else local_vars[v] | |
189935f1 | 206 | for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() |
1f749b66 PH |
207 | if fname not in self._functions: |
208 | self._functions[fname] = self.extract_function(fname) | |
2b25cb5d | 209 | return self._functions[fname](argvals) |
9e3f1991 | 210 | |
2b25cb5d PH |
211 | raise ExtractorError('Unsupported JS expression %r' % expr) |
212 | ||
ad25aee2 | 213 | def extract_object(self, objname): |
7769f837 | 214 | _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' |
ad25aee2 JMF |
215 | obj = {} |
216 | obj_m = re.search( | |
0e2d626d S |
217 | r'''(?x) |
218 | (?<!this\.)%s\s*=\s*{\s* | |
219 | (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) | |
220 | }\s*; | |
221 | ''' % (re.escape(objname), _FUNC_NAME_RE), | |
ad25aee2 JMF |
222 | self.code) |
223 | fields = obj_m.group('fields') | |
224 | # Currently, it only supports function definitions | |
225 | fields_m = re.finditer( | |
0e2d626d S |
226 | r'''(?x) |
227 | (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)} | |
228 | ''' % _FUNC_NAME_RE, | |
ad25aee2 JMF |
229 | fields) |
230 | for f in fields_m: | |
231 | argnames = f.group('args').split(',') | |
7769f837 | 232 | obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) |
ad25aee2 JMF |
233 | |
234 | return obj | |
235 | ||
2b25cb5d PH |
236 | def extract_function(self, funcname): |
237 | func_m = re.search( | |
9e3f1991 | 238 | r'''(?x) |
b46eabec | 239 | (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* |
9e3f1991 PH |
240 | \((?P<args>[^)]*)\)\s* |
241 | \{(?P<code>[^}]+)\}''' % ( | |
ff29bf81 | 242 | re.escape(funcname), re.escape(funcname), re.escape(funcname)), |
2b25cb5d | 243 | self.code) |
77ffa957 PH |
244 | if func_m is None: |
245 | raise ExtractorError('Could not find JS function %r' % funcname) | |
2b25cb5d PH |
246 | argnames = func_m.group('args').split(',') |
247 | ||
ad25aee2 JMF |
248 | return self.build_function(argnames, func_m.group('code')) |
249 | ||
9e3f1991 PH |
250 | def call_function(self, funcname, *args): |
251 | f = self.extract_function(funcname) | |
252 | return f(args) | |
253 | ||
ad25aee2 | 254 | def build_function(self, argnames, code): |
2b25cb5d PH |
255 | def resf(args): |
256 | local_vars = dict(zip(argnames, args)) | |
ad25aee2 | 257 | for stmt in code.split(';'): |
9e3f1991 PH |
258 | res, abort = self.interpret_statement(stmt, local_vars) |
259 | if abort: | |
260 | break | |
2b25cb5d PH |
261 | return res |
262 | return resf |