]>
Commit | Line | Data |
---|---|---|
cf1eb451 JMF |
1 | from __future__ import unicode_literals |
2 | ||
3 | import base64 | |
4 | import io | |
5 | import itertools | |
6 | import os | |
cf1eb451 JMF |
7 | import time |
8 | import xml.etree.ElementTree as etree | |
9 | ||
10 | from .common import FileDownloader | |
11 | from .http import HttpFD | |
1cc79574 PH |
12 | from ..compat import ( |
13 | compat_urlparse, | |
14 | ) | |
cf1eb451 | 15 | from ..utils import ( |
b53466e1 PH |
16 | struct_pack, |
17 | struct_unpack, | |
cf1eb451 JMF |
18 | format_bytes, |
19 | encodeFilename, | |
20 | sanitize_open, | |
b509a4b1 | 21 | xpath_text, |
cf1eb451 JMF |
22 | ) |
23 | ||
24 | ||
25 | class FlvReader(io.BytesIO): | |
26 | """ | |
27 | Reader for Flv files | |
28 | The file format is documented in https://www.adobe.com/devnet/f4v.html | |
29 | """ | |
30 | ||
31 | # Utility functions for reading numbers and strings | |
32 | def read_unsigned_long_long(self): | |
b53466e1 | 33 | return struct_unpack('!Q', self.read(8))[0] |
cf1eb451 JMF |
34 | |
35 | def read_unsigned_int(self): | |
b53466e1 | 36 | return struct_unpack('!I', self.read(4))[0] |
cf1eb451 JMF |
37 | |
38 | def read_unsigned_char(self): | |
b53466e1 | 39 | return struct_unpack('!B', self.read(1))[0] |
cf1eb451 JMF |
40 | |
41 | def read_string(self): | |
42 | res = b'' | |
43 | while True: | |
44 | char = self.read(1) | |
45 | if char == b'\x00': | |
46 | break | |
47 | res += char | |
48 | return res | |
49 | ||
50 | def read_box_info(self): | |
51 | """ | |
52 | Read a box and return the info as a tuple: (box_size, box_type, box_data) | |
53 | """ | |
54 | real_size = size = self.read_unsigned_int() | |
55 | box_type = self.read(4) | |
56 | header_end = 8 | |
57 | if size == 1: | |
58 | real_size = self.read_unsigned_long_long() | |
59 | header_end = 16 | |
2514d263 | 60 | return real_size, box_type, self.read(real_size - header_end) |
cf1eb451 JMF |
61 | |
62 | def read_asrt(self): | |
63 | # version | |
64 | self.read_unsigned_char() | |
65 | # flags | |
66 | self.read(3) | |
67 | quality_entry_count = self.read_unsigned_char() | |
68 | # QualityEntryCount | |
69 | for i in range(quality_entry_count): | |
70 | self.read_string() | |
71 | ||
72 | segment_run_count = self.read_unsigned_int() | |
73 | segments = [] | |
74 | for i in range(segment_run_count): | |
75 | first_segment = self.read_unsigned_int() | |
76 | fragments_per_segment = self.read_unsigned_int() | |
77 | segments.append((first_segment, fragments_per_segment)) | |
78 | ||
79 | return { | |
80 | 'segment_run': segments, | |
81 | } | |
82 | ||
83 | def read_afrt(self): | |
84 | # version | |
85 | self.read_unsigned_char() | |
86 | # flags | |
87 | self.read(3) | |
88 | # time scale | |
89 | self.read_unsigned_int() | |
90 | ||
91 | quality_entry_count = self.read_unsigned_char() | |
92 | # QualitySegmentUrlModifiers | |
93 | for i in range(quality_entry_count): | |
94 | self.read_string() | |
95 | ||
96 | fragments_count = self.read_unsigned_int() | |
97 | fragments = [] | |
98 | for i in range(fragments_count): | |
99 | first = self.read_unsigned_int() | |
100 | first_ts = self.read_unsigned_long_long() | |
101 | duration = self.read_unsigned_int() | |
102 | if duration == 0: | |
103 | discontinuity_indicator = self.read_unsigned_char() | |
104 | else: | |
105 | discontinuity_indicator = None | |
106 | fragments.append({ | |
107 | 'first': first, | |
108 | 'ts': first_ts, | |
109 | 'duration': duration, | |
110 | 'discontinuity_indicator': discontinuity_indicator, | |
111 | }) | |
112 | ||
113 | return { | |
114 | 'fragments': fragments, | |
115 | } | |
116 | ||
117 | def read_abst(self): | |
118 | # version | |
119 | self.read_unsigned_char() | |
120 | # flags | |
121 | self.read(3) | |
1df96e59 PH |
122 | |
123 | self.read_unsigned_int() # BootstrapinfoVersion | |
cf1eb451 JMF |
124 | # Profile,Live,Update,Reserved |
125 | self.read(1) | |
126 | # time scale | |
127 | self.read_unsigned_int() | |
128 | # CurrentMediaTime | |
129 | self.read_unsigned_long_long() | |
130 | # SmpteTimeCodeOffset | |
131 | self.read_unsigned_long_long() | |
1df96e59 PH |
132 | |
133 | self.read_string() # MovieIdentifier | |
cf1eb451 JMF |
134 | server_count = self.read_unsigned_char() |
135 | # ServerEntryTable | |
136 | for i in range(server_count): | |
137 | self.read_string() | |
138 | quality_count = self.read_unsigned_char() | |
139 | # QualityEntryTable | |
1df96e59 | 140 | for i in range(quality_count): |
cf1eb451 JMF |
141 | self.read_string() |
142 | # DrmData | |
143 | self.read_string() | |
144 | # MetaData | |
145 | self.read_string() | |
146 | ||
147 | segments_count = self.read_unsigned_char() | |
148 | segments = [] | |
149 | for i in range(segments_count): | |
150 | box_size, box_type, box_data = self.read_box_info() | |
151 | assert box_type == b'asrt' | |
152 | segment = FlvReader(box_data).read_asrt() | |
153 | segments.append(segment) | |
154 | fragments_run_count = self.read_unsigned_char() | |
155 | fragments = [] | |
156 | for i in range(fragments_run_count): | |
157 | box_size, box_type, box_data = self.read_box_info() | |
158 | assert box_type == b'afrt' | |
159 | fragments.append(FlvReader(box_data).read_afrt()) | |
160 | ||
161 | return { | |
162 | 'segments': segments, | |
163 | 'fragments': fragments, | |
164 | } | |
165 | ||
166 | def read_bootstrap_info(self): | |
167 | total_size, box_type, box_data = self.read_box_info() | |
168 | assert box_type == b'abst' | |
169 | return FlvReader(box_data).read_abst() | |
170 | ||
171 | ||
172 | def read_bootstrap_info(bootstrap_bytes): | |
173 | return FlvReader(bootstrap_bytes).read_bootstrap_info() | |
174 | ||
175 | ||
176 | def build_fragments_list(boot_info): | |
177 | """ Return a list of (segment, fragment) for each fragment in the video """ | |
178 | res = [] | |
179 | segment_run_table = boot_info['segments'][0] | |
180 | # I've only found videos with one segment | |
181 | segment_run_entry = segment_run_table['segment_run'][0] | |
182 | n_frags = segment_run_entry[1] | |
183 | fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | |
184 | first_frag_number = fragment_run_entry_table[0]['first'] | |
2514d263 | 185 | for (i, frag_number) in zip(range(1, n_frags + 1), itertools.count(first_frag_number)): |
cf1eb451 JMF |
186 | res.append((1, frag_number)) |
187 | return res | |
188 | ||
189 | ||
2c322cc5 AA |
190 | def write_unsigned_int(stream, val): |
191 | stream.write(struct_pack('!I', val)) | |
192 | ||
193 | ||
f14f2a6d AA |
194 | def write_unsigned_int_24(stream, val): |
195 | stream.write(struct_pack('!I', val)[1:]) | |
196 | ||
197 | ||
3b8f3a15 AA |
198 | def write_flv_header(stream): |
199 | """Writes the FLV header to stream""" | |
cf1eb451 JMF |
200 | # FLV header |
201 | stream.write(b'FLV\x01') | |
202 | stream.write(b'\x05') | |
203 | stream.write(b'\x00\x00\x00\x09') | |
cf1eb451 | 204 | stream.write(b'\x00\x00\x00\x00') |
3b8f3a15 AA |
205 | |
206 | ||
207 | def write_metadata_tag(stream, metadata): | |
208 | """Writes optional metadata tag to stream""" | |
f14f2a6d | 209 | SCRIPT_TAG = b'\x12' |
2c322cc5 AA |
210 | FLV_TAG_HEADER_LEN = 11 |
211 | ||
3b8f3a15 | 212 | if metadata: |
f14f2a6d AA |
213 | stream.write(SCRIPT_TAG) |
214 | write_unsigned_int_24(stream, len(metadata)) | |
3b8f3a15 AA |
215 | stream.write(b'\x00\x00\x00\x00\x00\x00\x00') |
216 | stream.write(metadata) | |
2c322cc5 | 217 | write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) |
cf1eb451 JMF |
218 | |
219 | ||
220 | def _add_ns(prop): | |
221 | return '{http://ns.adobe.com/f4m/1.0}%s' % prop | |
222 | ||
223 | ||
224 | class HttpQuietDownloader(HttpFD): | |
225 | def to_screen(self, *args, **kargs): | |
226 | pass | |
227 | ||
228 | ||
229 | class F4mFD(FileDownloader): | |
230 | """ | |
231 | A downloader for f4m manifests or AdobeHDS. | |
232 | """ | |
233 | ||
234 | def real_download(self, filename, info_dict): | |
235 | man_url = info_dict['url'] | |
31bb8d3f | 236 | requested_bitrate = info_dict.get('tbr') |
cf1eb451 JMF |
237 | self.to_screen('[download] Downloading f4m manifest') |
238 | manifest = self.ydl.urlopen(man_url).read() | |
239 | self.report_destination(filename) | |
9e1a5b84 JW |
240 | http_dl = HttpQuietDownloader( |
241 | self.ydl, | |
1824b481 JMF |
242 | { |
243 | 'continuedl': True, | |
244 | 'quiet': True, | |
245 | 'noprogress': True, | |
da4d4191 | 246 | 'ratelimit': self.params.get('ratelimit', None), |
1824b481 | 247 | 'test': self.params.get('test', False), |
9e1a5b84 JW |
248 | } |
249 | ) | |
cf1eb451 JMF |
250 | |
251 | doc = etree.fromstring(manifest) | |
252 | formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | |
31bb8d3f JMF |
253 | if requested_bitrate is None: |
254 | # get the best format | |
255 | formats = sorted(formats, key=lambda f: f[0]) | |
256 | rate, media = formats[-1] | |
257 | else: | |
258 | rate, media = list(filter( | |
259 | lambda f: int(f[0]) == requested_bitrate, formats))[0] | |
260 | ||
cf1eb451 | 261 | base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) |
ee966928 PH |
262 | bootstrap_node = doc.find(_add_ns('bootstrapInfo')) |
263 | if bootstrap_node.text is None: | |
264 | bootstrap_url = compat_urlparse.urljoin( | |
265 | base_url, bootstrap_node.attrib['url']) | |
266 | bootstrap = self.ydl.urlopen(bootstrap_url).read() | |
267 | else: | |
268 | bootstrap = base64.b64decode(bootstrap_node.text) | |
3b8f3a15 AA |
269 | metadata_node = media.find(_add_ns('metadata')) |
270 | if metadata_node is not None: | |
271 | metadata = base64.b64decode(metadata_node.text) | |
272 | else: | |
273 | metadata = None | |
cf1eb451 | 274 | boot_info = read_bootstrap_info(bootstrap) |
ee966928 | 275 | |
cf1eb451 | 276 | fragments_list = build_fragments_list(boot_info) |
1824b481 JMF |
277 | if self.params.get('test', False): |
278 | # We only download the first fragment | |
279 | fragments_list = fragments_list[:1] | |
cf1eb451 | 280 | total_frags = len(fragments_list) |
b509a4b1 JMF |
281 | # For some akamai manifests we'll need to add a query to the fragment url |
282 | akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | |
cf1eb451 JMF |
283 | |
284 | tmpfilename = self.temp_name(filename) | |
285 | (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | |
3b8f3a15 AA |
286 | write_flv_header(dest_stream) |
287 | write_metadata_tag(dest_stream, metadata) | |
cf1eb451 JMF |
288 | |
289 | # This dict stores the download progress, it's updated by the progress | |
290 | # hook | |
291 | state = { | |
292 | 'downloaded_bytes': 0, | |
293 | 'frag_counter': 0, | |
294 | } | |
295 | start = time.time() | |
296 | ||
297 | def frag_progress_hook(status): | |
298 | frag_total_bytes = status.get('total_bytes', 0) | |
299 | estimated_size = (state['downloaded_bytes'] + | |
9e1a5b84 | 300 | (total_frags - state['frag_counter']) * frag_total_bytes) |
cf1eb451 JMF |
301 | if status['status'] == 'finished': |
302 | state['downloaded_bytes'] += frag_total_bytes | |
303 | state['frag_counter'] += 1 | |
304 | progress = self.calc_percent(state['frag_counter'], total_frags) | |
305 | byte_counter = state['downloaded_bytes'] | |
306 | else: | |
307 | frag_downloaded_bytes = status['downloaded_bytes'] | |
308 | byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | |
309 | frag_progress = self.calc_percent(frag_downloaded_bytes, | |
9e1a5b84 | 310 | frag_total_bytes) |
cf1eb451 JMF |
311 | progress = self.calc_percent(state['frag_counter'], total_frags) |
312 | progress += frag_progress / float(total_frags) | |
313 | ||
314 | eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | |
315 | self.report_progress(progress, format_bytes(estimated_size), | |
9e1a5b84 | 316 | status.get('speed'), eta) |
cf1eb451 JMF |
317 | http_dl.add_progress_hook(frag_progress_hook) |
318 | ||
319 | frags_filenames = [] | |
320 | for (seg_i, frag_i) in fragments_list: | |
321 | name = 'Seg%d-Frag%d' % (seg_i, frag_i) | |
322 | url = base_url + name | |
b509a4b1 JMF |
323 | if akamai_pv: |
324 | url += '?' + akamai_pv.strip(';') | |
cf1eb451 JMF |
325 | frag_filename = '%s-%s' % (tmpfilename, name) |
326 | success = http_dl.download(frag_filename, {'url': url}) | |
327 | if not success: | |
328 | return False | |
329 | with open(frag_filename, 'rb') as down: | |
330 | down_data = down.read() | |
331 | reader = FlvReader(down_data) | |
332 | while True: | |
333 | _, box_type, box_data = reader.read_box_info() | |
334 | if box_type == b'mdat': | |
335 | dest_stream.write(box_data) | |
336 | break | |
337 | frags_filenames.append(frag_filename) | |
338 | ||
b900e7cb | 339 | dest_stream.close() |
cf1eb451 JMF |
340 | self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) |
341 | ||
342 | self.try_rename(tmpfilename, filename) | |
343 | for frag_file in frags_filenames: | |
344 | os.remove(frag_file) | |
345 | ||
346 | fsize = os.path.getsize(encodeFilename(filename)) | |
347 | self._hook_progress({ | |
348 | 'downloaded_bytes': fsize, | |
349 | 'total_bytes': fsize, | |
350 | 'filename': filename, | |
351 | 'status': 'finished', | |
352 | }) | |
353 | ||
354 | return True |