]>
Commit | Line | Data |
---|---|---|
2b25cb5d PH |
1 | from __future__ import unicode_literals |
2 | ||
825abb81 | 3 | import json |
2b25cb5d PH |
4 | import re |
5 | ||
6 | from .utils import ( | |
7 | ExtractorError, | |
8 | ) | |
9 | ||
10 | ||
11 | class JSInterpreter(object): | |
12 | def __init__(self, code): | |
13 | self.code = code | |
14 | self._functions = {} | |
ad25aee2 | 15 | self._objects = {} |
2b25cb5d PH |
16 | |
17 | def interpret_statement(self, stmt, local_vars, allow_recursion=20): | |
18 | if allow_recursion < 0: | |
19 | raise ExtractorError('Recursion limit reached') | |
20 | ||
21 | if stmt.startswith('var '): | |
22 | stmt = stmt[len('var '):] | |
23 | ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + | |
24 | r'=(?P<expr>.*)$', stmt) | |
25 | if ass_m: | |
26 | if ass_m.groupdict().get('index'): | |
27 | def assign(val): | |
28 | lvar = local_vars[ass_m.group('out')] | |
29 | idx = self.interpret_expression( | |
30 | ass_m.group('index'), local_vars, allow_recursion) | |
31 | assert isinstance(idx, int) | |
32 | lvar[idx] = val | |
33 | return val | |
34 | expr = ass_m.group('expr') | |
35 | else: | |
36 | def assign(val): | |
37 | local_vars[ass_m.group('out')] = val | |
38 | return val | |
39 | expr = ass_m.group('expr') | |
40 | elif stmt.startswith('return '): | |
41 | assign = lambda v: v | |
42 | expr = stmt[len('return '):] | |
43 | else: | |
825abb81 PH |
44 | # Try interpreting it as an expression |
45 | expr = stmt | |
46 | assign = lambda v: v | |
2b25cb5d PH |
47 | |
48 | v = self.interpret_expression(expr, local_vars, allow_recursion) | |
49 | return assign(v) | |
50 | ||
51 | def interpret_expression(self, expr, local_vars, allow_recursion): | |
52 | if expr.isdigit(): | |
53 | return int(expr) | |
54 | ||
55 | if expr.isalpha(): | |
56 | return local_vars[expr] | |
57 | ||
825abb81 PH |
58 | try: |
59 | return json.loads(expr) | |
60 | except ValueError: | |
61 | pass | |
62 | ||
63 | m = re.match( | |
64 | r'^(?P<var>[a-z]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', | |
65 | expr) | |
2b25cb5d | 66 | if m: |
825abb81 | 67 | variable = m.group('var') |
2b25cb5d | 68 | member = m.group('member') |
825abb81 | 69 | arg_str = m.group('args') |
ad25aee2 | 70 | |
825abb81 PH |
71 | if variable in local_vars: |
72 | obj = local_vars[variable] | |
73 | else: | |
ad25aee2 JMF |
74 | if variable not in self._objects: |
75 | self._objects[variable] = self.extract_object(variable) | |
76 | obj = self._objects[variable] | |
825abb81 PH |
77 | |
78 | if arg_str is None: | |
79 | # Member access | |
80 | if member == 'length': | |
81 | return len(obj) | |
82 | return obj[member] | |
83 | ||
84 | assert expr.endswith(')') | |
85 | # Function call | |
86 | if arg_str == '': | |
87 | argvals = tuple() | |
88 | else: | |
89 | argvals = tuple([ | |
90 | self.interpret_expression(v, local_vars, allow_recursion) | |
91 | for v in arg_str.split(',')]) | |
92 | ||
93 | if member == 'split': | |
94 | assert argvals == ('',) | |
95 | return list(obj) | |
96 | if member == 'join': | |
97 | assert len(argvals) == 1 | |
98 | return argvals[0].join(obj) | |
99 | if member == 'reverse': | |
100 | assert len(argvals) == 0 | |
101 | return obj[::-1] | |
102 | if member == 'slice': | |
103 | assert len(argvals) == 1 | |
104 | return obj[argvals[0]:] | |
105 | if member == 'splice': | |
106 | assert isinstance(obj, list) | |
107 | index, howMany = argvals | |
108 | res = [] | |
109 | for i in range(index, min(index + howMany, len(obj))): | |
110 | res.append(obj.pop(i)) | |
111 | return res | |
112 | ||
113 | return obj[member](argvals) | |
2b25cb5d PH |
114 | |
115 | m = re.match( | |
116 | r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) | |
117 | if m: | |
118 | val = local_vars[m.group('in')] | |
119 | idx = self.interpret_expression( | |
120 | m.group('idx'), local_vars, allow_recursion - 1) | |
121 | return val[idx] | |
122 | ||
123 | m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) | |
124 | if m: | |
125 | a = self.interpret_expression( | |
126 | m.group('a'), local_vars, allow_recursion) | |
127 | b = self.interpret_expression( | |
128 | m.group('b'), local_vars, allow_recursion) | |
129 | return a % b | |
130 | ||
131 | m = re.match( | |
825abb81 | 132 | r'^(?P<func>[.a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) |
2b25cb5d PH |
133 | if m: |
134 | fname = m.group('func') | |
825abb81 PH |
135 | argvals = tuple([ |
136 | int(v) if v.isdigit() else local_vars[v] | |
137 | for v in m.group('args').split(',')]) | |
2b25cb5d PH |
138 | if fname not in self._functions: |
139 | self._functions[fname] = self.extract_function(fname) | |
2b25cb5d PH |
140 | return self._functions[fname](argvals) |
141 | raise ExtractorError('Unsupported JS expression %r' % expr) | |
142 | ||
ad25aee2 JMF |
143 | def extract_object(self, objname): |
144 | obj = {} | |
145 | obj_m = re.search( | |
146 | (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + | |
9f43890b | 147 | r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + |
ad25aee2 JMF |
148 | r'\}\s*;', |
149 | self.code) | |
150 | fields = obj_m.group('fields') | |
151 | # Currently, it only supports function definitions | |
152 | fields_m = re.finditer( | |
9f43890b | 153 | r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function' |
ad25aee2 JMF |
154 | r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', |
155 | fields) | |
156 | for f in fields_m: | |
157 | argnames = f.group('args').split(',') | |
158 | obj[f.group('key')] = self.build_function(argnames, f.group('code')) | |
159 | ||
160 | return obj | |
161 | ||
2b25cb5d PH |
162 | def extract_function(self, funcname): |
163 | func_m = re.search( | |
fc040bfd | 164 | (r'(?:function %s|[{;]%s\s*=\s*function)' % ( |
77ffa957 | 165 | re.escape(funcname), re.escape(funcname))) + |
2b25cb5d PH |
166 | r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', |
167 | self.code) | |
77ffa957 PH |
168 | if func_m is None: |
169 | raise ExtractorError('Could not find JS function %r' % funcname) | |
2b25cb5d PH |
170 | argnames = func_m.group('args').split(',') |
171 | ||
ad25aee2 JMF |
172 | return self.build_function(argnames, func_m.group('code')) |
173 | ||
174 | def build_function(self, argnames, code): | |
2b25cb5d PH |
175 | def resf(args): |
176 | local_vars = dict(zip(argnames, args)) | |
ad25aee2 | 177 | for stmt in code.split(';'): |
2b25cb5d PH |
178 | res = self.interpret_statement(stmt, local_vars) |
179 | return res | |
180 | return resf |