]>
Commit | Line | Data |
---|---|---|
5cda4eda | 1 | from __future__ import division, unicode_literals |
cf1eb451 | 2 | |
cf1eb451 JMF |
3 | import io |
4 | import itertools | |
cf1eb451 | 5 | import time |
cf1eb451 | 6 | |
ab81ef8f | 7 | from .fragment import FragmentFD |
1cc79574 | 8 | from ..compat import ( |
cf282071 | 9 | compat_b64decode, |
36e6f62c | 10 | compat_etree_fromstring, |
1cc79574 | 11 | compat_urlparse, |
5eaaeb7c | 12 | compat_urllib_error, |
324ac0a2 | 13 | compat_urllib_parse_urlparse, |
edaa23f8 YCH |
14 | compat_struct_pack, |
15 | compat_struct_unpack, | |
1cc79574 | 16 | ) |
cf1eb451 | 17 | from ..utils import ( |
17b786ae | 18 | fix_xml_ampersands, |
b509a4b1 | 19 | xpath_text, |
cf1eb451 JMF |
20 | ) |
21 | ||
22 | ||
1b405bb4 YCH |
23 | class DataTruncatedError(Exception): |
24 | pass | |
25 | ||
26 | ||
cf1eb451 JMF |
27 | class FlvReader(io.BytesIO): |
28 | """ | |
29 | Reader for Flv files | |
30 | The file format is documented in https://www.adobe.com/devnet/f4v.html | |
31 | """ | |
32 | ||
1b405bb4 YCH |
33 | def read_bytes(self, n): |
34 | data = self.read(n) | |
35 | if len(data) < n: | |
36 | raise DataTruncatedError( | |
37 | 'FlvReader error: need %d bytes while only %d bytes got' % ( | |
38 | n, len(data))) | |
39 | return data | |
40 | ||
cf1eb451 JMF |
41 | # Utility functions for reading numbers and strings |
42 | def read_unsigned_long_long(self): | |
1b405bb4 | 43 | return compat_struct_unpack('!Q', self.read_bytes(8))[0] |
cf1eb451 JMF |
44 | |
45 | def read_unsigned_int(self): | |
1b405bb4 | 46 | return compat_struct_unpack('!I', self.read_bytes(4))[0] |
cf1eb451 JMF |
47 | |
48 | def read_unsigned_char(self): | |
1b405bb4 | 49 | return compat_struct_unpack('!B', self.read_bytes(1))[0] |
cf1eb451 JMF |
50 | |
51 | def read_string(self): | |
52 | res = b'' | |
53 | while True: | |
1b405bb4 | 54 | char = self.read_bytes(1) |
cf1eb451 JMF |
55 | if char == b'\x00': |
56 | break | |
57 | res += char | |
58 | return res | |
59 | ||
60 | def read_box_info(self): | |
61 | """ | |
62 | Read a box and return the info as a tuple: (box_size, box_type, box_data) | |
63 | """ | |
64 | real_size = size = self.read_unsigned_int() | |
1b405bb4 | 65 | box_type = self.read_bytes(4) |
cf1eb451 JMF |
66 | header_end = 8 |
67 | if size == 1: | |
68 | real_size = self.read_unsigned_long_long() | |
69 | header_end = 16 | |
1b405bb4 | 70 | return real_size, box_type, self.read_bytes(real_size - header_end) |
cf1eb451 JMF |
71 | |
72 | def read_asrt(self): | |
73 | # version | |
74 | self.read_unsigned_char() | |
75 | # flags | |
1b405bb4 | 76 | self.read_bytes(3) |
cf1eb451 JMF |
77 | quality_entry_count = self.read_unsigned_char() |
78 | # QualityEntryCount | |
79 | for i in range(quality_entry_count): | |
80 | self.read_string() | |
81 | ||
82 | segment_run_count = self.read_unsigned_int() | |
83 | segments = [] | |
84 | for i in range(segment_run_count): | |
85 | first_segment = self.read_unsigned_int() | |
86 | fragments_per_segment = self.read_unsigned_int() | |
87 | segments.append((first_segment, fragments_per_segment)) | |
88 | ||
89 | return { | |
90 | 'segment_run': segments, | |
91 | } | |
92 | ||
93 | def read_afrt(self): | |
94 | # version | |
95 | self.read_unsigned_char() | |
96 | # flags | |
1b405bb4 | 97 | self.read_bytes(3) |
cf1eb451 JMF |
98 | # time scale |
99 | self.read_unsigned_int() | |
100 | ||
101 | quality_entry_count = self.read_unsigned_char() | |
102 | # QualitySegmentUrlModifiers | |
103 | for i in range(quality_entry_count): | |
104 | self.read_string() | |
105 | ||
106 | fragments_count = self.read_unsigned_int() | |
107 | fragments = [] | |
108 | for i in range(fragments_count): | |
109 | first = self.read_unsigned_int() | |
110 | first_ts = self.read_unsigned_long_long() | |
111 | duration = self.read_unsigned_int() | |
112 | if duration == 0: | |
113 | discontinuity_indicator = self.read_unsigned_char() | |
114 | else: | |
115 | discontinuity_indicator = None | |
116 | fragments.append({ | |
117 | 'first': first, | |
118 | 'ts': first_ts, | |
119 | 'duration': duration, | |
120 | 'discontinuity_indicator': discontinuity_indicator, | |
121 | }) | |
122 | ||
123 | return { | |
124 | 'fragments': fragments, | |
125 | } | |
126 | ||
127 | def read_abst(self): | |
128 | # version | |
129 | self.read_unsigned_char() | |
130 | # flags | |
1b405bb4 | 131 | self.read_bytes(3) |
1df96e59 PH |
132 | |
133 | self.read_unsigned_int() # BootstrapinfoVersion | |
cf1eb451 | 134 | # Profile,Live,Update,Reserved |
c4f8c453 AA |
135 | flags = self.read_unsigned_char() |
136 | live = flags & 0x20 != 0 | |
cf1eb451 JMF |
137 | # time scale |
138 | self.read_unsigned_int() | |
139 | # CurrentMediaTime | |
140 | self.read_unsigned_long_long() | |
141 | # SmpteTimeCodeOffset | |
142 | self.read_unsigned_long_long() | |
1df96e59 PH |
143 | |
144 | self.read_string() # MovieIdentifier | |
cf1eb451 JMF |
145 | server_count = self.read_unsigned_char() |
146 | # ServerEntryTable | |
147 | for i in range(server_count): | |
148 | self.read_string() | |
149 | quality_count = self.read_unsigned_char() | |
150 | # QualityEntryTable | |
1df96e59 | 151 | for i in range(quality_count): |
cf1eb451 JMF |
152 | self.read_string() |
153 | # DrmData | |
154 | self.read_string() | |
155 | # MetaData | |
156 | self.read_string() | |
157 | ||
158 | segments_count = self.read_unsigned_char() | |
159 | segments = [] | |
160 | for i in range(segments_count): | |
161 | box_size, box_type, box_data = self.read_box_info() | |
162 | assert box_type == b'asrt' | |
163 | segment = FlvReader(box_data).read_asrt() | |
164 | segments.append(segment) | |
165 | fragments_run_count = self.read_unsigned_char() | |
166 | fragments = [] | |
167 | for i in range(fragments_run_count): | |
168 | box_size, box_type, box_data = self.read_box_info() | |
169 | assert box_type == b'afrt' | |
170 | fragments.append(FlvReader(box_data).read_afrt()) | |
171 | ||
172 | return { | |
173 | 'segments': segments, | |
174 | 'fragments': fragments, | |
c4f8c453 | 175 | 'live': live, |
cf1eb451 JMF |
176 | } |
177 | ||
178 | def read_bootstrap_info(self): | |
179 | total_size, box_type, box_data = self.read_box_info() | |
180 | assert box_type == b'abst' | |
181 | return FlvReader(box_data).read_abst() | |
182 | ||
183 | ||
184 | def read_bootstrap_info(bootstrap_bytes): | |
185 | return FlvReader(bootstrap_bytes).read_bootstrap_info() | |
186 | ||
187 | ||
188 | def build_fragments_list(boot_info): | |
189 | """ Return a list of (segment, fragment) for each fragment in the video """ | |
190 | res = [] | |
191 | segment_run_table = boot_info['segments'][0] | |
cf1eb451 JMF |
192 | fragment_run_entry_table = boot_info['fragments'][0]['fragments'] |
193 | first_frag_number = fragment_run_entry_table[0]['first'] | |
bf7fa94e JMF |
194 | fragments_counter = itertools.count(first_frag_number) |
195 | for segment, fragments_count in segment_run_table['segment_run']: | |
477b7a84 YCH |
196 | # In some live HDS streams (for example Rai), `fragments_count` is |
197 | # abnormal and causing out-of-memory errors. It's OK to change the | |
198 | # number of fragments for live streams as they are updated periodically | |
199 | if fragments_count == 4294967295 and boot_info['live']: | |
200 | fragments_count = 2 | |
bf7fa94e JMF |
201 | for _ in range(fragments_count): |
202 | res.append((segment, next(fragments_counter))) | |
c4f8c453 AA |
203 | |
204 | if boot_info['live']: | |
205 | res = res[-2:] | |
206 | ||
cf1eb451 JMF |
207 | return res |
208 | ||
209 | ||
2c322cc5 | 210 | def write_unsigned_int(stream, val): |
edaa23f8 | 211 | stream.write(compat_struct_pack('!I', val)) |
2c322cc5 AA |
212 | |
213 | ||
f14f2a6d | 214 | def write_unsigned_int_24(stream, val): |
edaa23f8 | 215 | stream.write(compat_struct_pack('!I', val)[1:]) |
f14f2a6d AA |
216 | |
217 | ||
3b8f3a15 AA |
218 | def write_flv_header(stream): |
219 | """Writes the FLV header to stream""" | |
cf1eb451 JMF |
220 | # FLV header |
221 | stream.write(b'FLV\x01') | |
222 | stream.write(b'\x05') | |
223 | stream.write(b'\x00\x00\x00\x09') | |
cf1eb451 | 224 | stream.write(b'\x00\x00\x00\x00') |
3b8f3a15 AA |
225 | |
226 | ||
227 | def write_metadata_tag(stream, metadata): | |
228 | """Writes optional metadata tag to stream""" | |
f14f2a6d | 229 | SCRIPT_TAG = b'\x12' |
2c322cc5 AA |
230 | FLV_TAG_HEADER_LEN = 11 |
231 | ||
3b8f3a15 | 232 | if metadata: |
f14f2a6d AA |
233 | stream.write(SCRIPT_TAG) |
234 | write_unsigned_int_24(stream, len(metadata)) | |
3b8f3a15 AA |
235 | stream.write(b'\x00\x00\x00\x00\x00\x00\x00') |
236 | stream.write(metadata) | |
2c322cc5 | 237 | write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) |
cf1eb451 JMF |
238 | |
239 | ||
f7df343b | 240 | def remove_encrypted_media(media): |
3089bc74 S |
241 | return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib |
242 | and 'drmAdditionalHeaderSetId' not in e.attrib, | |
f7df343b S |
243 | media)) |
244 | ||
245 | ||
48107c19 S |
246 | def _add_ns(prop, ver=1): |
247 | return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) | |
248 | ||
249 | ||
250 | def get_base_url(manifest): | |
251 | base_url = xpath_text( | |
252 | manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], | |
253 | 'base URL', default=None) | |
254 | if base_url: | |
255 | base_url = base_url.strip() | |
256 | return base_url | |
cf1eb451 JMF |
257 | |
258 | ||
ab81ef8f | 259 | class F4mFD(FragmentFD): |
cf1eb451 JMF |
260 | """ |
261 | A downloader for f4m manifests or AdobeHDS. | |
262 | """ | |
263 | ||
ab81ef8f S |
264 | FD_NAME = 'f4m' |
265 | ||
6ca85be6 | 266 | def _get_unencrypted_media(self, doc): |
e41b1f73 | 267 | media = doc.findall(_add_ns('media')) |
6ca85be6 | 268 | if not media: |
269 | self.report_error('No media found') | |
3089bc74 S |
270 | for e in (doc.findall(_add_ns('drmAdditionalHeader')) |
271 | + doc.findall(_add_ns('drmAdditionalHeaderSet'))): | |
6ca85be6 | 272 | # If id attribute is missing it's valid for all media nodes |
273 | # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute | |
d543bdc3 PH |
274 | if 'id' not in e.attrib: |
275 | self.report_error('Missing ID in f4m DRM') | |
f7df343b | 276 | media = remove_encrypted_media(media) |
6ca85be6 | 277 | if not media: |
d543bdc3 | 278 | self.report_error('Unsupported DRM') |
6ca85be6 | 279 | return media |
280 | ||
c4f8c453 AA |
281 | def _get_bootstrap_from_url(self, bootstrap_url): |
282 | bootstrap = self.ydl.urlopen(bootstrap_url).read() | |
283 | return read_bootstrap_info(bootstrap) | |
284 | ||
285 | def _update_live_fragments(self, bootstrap_url, latest_fragment): | |
286 | fragments_list = [] | |
287 | retries = 30 | |
288 | while (not fragments_list) and (retries > 0): | |
289 | boot_info = self._get_bootstrap_from_url(bootstrap_url) | |
290 | fragments_list = build_fragments_list(boot_info) | |
291 | fragments_list = [f for f in fragments_list if f[1] > latest_fragment] | |
292 | if not fragments_list: | |
293 | # Retry after a while | |
294 | time.sleep(5.0) | |
295 | retries -= 1 | |
296 | ||
297 | if not fragments_list: | |
298 | self.report_error('Failed to update fragments') | |
299 | ||
300 | return fragments_list | |
301 | ||
302 | def _parse_bootstrap_node(self, node, base_url): | |
8ab3fe81 S |
303 | # Sometimes non empty inline bootstrap info can be specified along |
304 | # with bootstrap url attribute (e.g. dummy inline bootstrap info | |
305 | # contains whitespace characters in [1]). We will prefer bootstrap | |
306 | # url over inline bootstrap info when present. | |
307 | # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m | |
308 | bootstrap_url = node.get('url') | |
309 | if bootstrap_url: | |
c4f8c453 | 310 | bootstrap_url = compat_urlparse.urljoin( |
8ab3fe81 | 311 | base_url, bootstrap_url) |
c4f8c453 AA |
312 | boot_info = self._get_bootstrap_from_url(bootstrap_url) |
313 | else: | |
314 | bootstrap_url = None | |
cf282071 | 315 | bootstrap = compat_b64decode(node.text) |
c4f8c453 | 316 | boot_info = read_bootstrap_info(bootstrap) |
8ab3fe81 | 317 | return boot_info, bootstrap_url |
c4f8c453 | 318 | |
cf1eb451 JMF |
319 | def real_download(self, filename, info_dict): |
320 | man_url = info_dict['url'] | |
31bb8d3f | 321 | requested_bitrate = info_dict.get('tbr') |
ab81ef8f | 322 | self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) |
69035555 S |
323 | |
324 | urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) | |
324ac0a2 | 325 | man_url = urlh.geturl() |
17b786ae | 326 | # Some manifests may be malformed, e.g. prosiebensat1 generated manifests |
067aa17e S |
327 | # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 |
328 | # and https://github.com/ytdl-org/youtube-dl/issues/7823) | |
4db43567 | 329 | manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() |
cf1eb451 | 330 | |
36e6f62c | 331 | doc = compat_etree_fromstring(manifest) |
6ca85be6 | 332 | formats = [(int(f.attrib.get('bitrate', -1)), f) |
333 | for f in self._get_unencrypted_media(doc)] | |
2615fa75 | 334 | if requested_bitrate is None or len(formats) == 1: |
31bb8d3f JMF |
335 | # get the best format |
336 | formats = sorted(formats, key=lambda f: f[0]) | |
337 | rate, media = formats[-1] | |
338 | else: | |
339 | rate, media = list(filter( | |
340 | lambda f: int(f[0]) == requested_bitrate, formats))[0] | |
341 | ||
48107c19 S |
342 | # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. |
343 | man_base_url = get_base_url(doc) or man_url | |
344 | ||
345 | base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) | |
ee966928 | 346 | bootstrap_node = doc.find(_add_ns('bootstrapInfo')) |
48107c19 S |
347 | boot_info, bootstrap_url = self._parse_bootstrap_node( |
348 | bootstrap_node, man_base_url) | |
c4f8c453 | 349 | live = boot_info['live'] |
3b8f3a15 AA |
350 | metadata_node = media.find(_add_ns('metadata')) |
351 | if metadata_node is not None: | |
cf282071 | 352 | metadata = compat_b64decode(metadata_node.text) |
3b8f3a15 AA |
353 | else: |
354 | metadata = None | |
ee966928 | 355 | |
cf1eb451 | 356 | fragments_list = build_fragments_list(boot_info) |
b8c9926c S |
357 | test = self.params.get('test', False) |
358 | if test: | |
1824b481 JMF |
359 | # We only download the first fragment |
360 | fragments_list = fragments_list[:1] | |
cf1eb451 | 361 | total_frags = len(fragments_list) |
b509a4b1 JMF |
362 | # For some akamai manifests we'll need to add a query to the fragment url |
363 | akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | |
cf1eb451 | 364 | |
ab81ef8f S |
365 | ctx = { |
366 | 'filename': filename, | |
367 | 'total_frags': total_frags, | |
09104e99 | 368 | 'live': live, |
ab81ef8f S |
369 | } |
370 | ||
371 | self._prepare_frag_download(ctx) | |
372 | ||
373 | dest_stream = ctx['dest_stream'] | |
5cda4eda | 374 | |
75a24854 RA |
375 | if ctx['complete_frags_downloaded_bytes'] == 0: |
376 | write_flv_header(dest_stream) | |
377 | if not live: | |
378 | write_metadata_tag(dest_stream, metadata) | |
cf1eb451 | 379 | |
324ac0a2 | 380 | base_url_parsed = compat_urllib_parse_urlparse(base_url) |
381 | ||
ab81ef8f | 382 | self._start_frag_download(ctx) |
cf1eb451 | 383 | |
75a24854 | 384 | frag_index = 0 |
c4f8c453 AA |
385 | while fragments_list: |
386 | seg_i, frag_i = fragments_list.pop(0) | |
75a24854 | 387 | frag_index += 1 |
3e0304fe | 388 | if frag_index <= ctx['fragment_index']: |
75a24854 | 389 | continue |
cf1eb451 | 390 | name = 'Seg%d-Frag%d' % (seg_i, frag_i) |
8cd9614a S |
391 | query = [] |
392 | if base_url_parsed.query: | |
393 | query.append(base_url_parsed.query) | |
b509a4b1 | 394 | if akamai_pv: |
8cd9614a | 395 | query.append(akamai_pv.strip(';')) |
c6391cd5 | 396 | if info_dict.get('extra_param_to_segment_url'): |
8cd9614a S |
397 | query.append(info_dict['extra_param_to_segment_url']) |
398 | url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) | |
5eaaeb7c | 399 | try: |
75a24854 | 400 | success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) |
5eaaeb7c AA |
401 | if not success: |
402 | return False | |
233c1c0e AA |
403 | reader = FlvReader(down_data) |
404 | while True: | |
1b405bb4 YCH |
405 | try: |
406 | _, box_type, box_data = reader.read_box_info() | |
407 | except DataTruncatedError: | |
408 | if test: | |
409 | # In tests, segments may be truncated, and thus | |
410 | # FlvReader may not be able to parse the whole | |
411 | # chunk. If so, write the segment as is | |
067aa17e | 412 | # See https://github.com/ytdl-org/youtube-dl/issues/9214 |
1b405bb4 YCH |
413 | dest_stream.write(down_data) |
414 | break | |
415 | raise | |
233c1c0e | 416 | if box_type == b'mdat': |
75a24854 | 417 | self._append_fragment(ctx, box_data) |
233c1c0e | 418 | break |
5eaaeb7c AA |
419 | except (compat_urllib_error.HTTPError, ) as err: |
420 | if live and (err.code == 404 or err.code == 410): | |
421 | # We didn't keep up with the live window. Continue | |
422 | # with the next available fragment. | |
423 | msg = 'Fragment %d unavailable' % frag_i | |
424 | self.report_warning(msg) | |
425 | fragments_list = [] | |
426 | else: | |
427 | raise | |
cf1eb451 | 428 | |
b8c9926c | 429 | if not fragments_list and not test and live and bootstrap_url: |
c4f8c453 | 430 | fragments_list = self._update_live_fragments(bootstrap_url, frag_i) |
5eaaeb7c AA |
431 | total_frags += len(fragments_list) |
432 | if fragments_list and (fragments_list[0][1] > frag_i + 1): | |
433 | msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) | |
434 | self.report_warning(msg) | |
c4f8c453 | 435 | |
ab81ef8f | 436 | self._finish_frag_download(ctx) |
cf1eb451 | 437 | |
cf1eb451 | 438 | return True |