import os
import random
import re
-import socket
-import ssl
import sys
import time
import math
js_to_json,
JSON_LD_RE,
mimetype2ext,
+ network_exceptions,
orderedSet,
parse_bitrate,
parse_codecs,
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
download, lower-case.
- "http", "https", "rtsp", "rtmp", "rtmpe",
+ "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe",
"m3u8", "m3u8_native" or "http_dash_segments".
* fragment_base_url
Base URL for fragments. Each fragment's path
ie_result = self._real_extract(url)
if self._x_forwarded_for_ip:
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ subtitles = ie_result.get('subtitles')
+ if (subtitles and 'live_chat' in subtitles
+ and 'no-live-chat' in self._downloader.params.get('compat_opts')):
+ del subtitles['live_chat']
return ie_result
except GeoRestrictedError as e:
if self.__maybe_fake_ip_and_retry(e.countries):
url_or_request = update_url_query(url_or_request, query)
if data is not None or headers:
url_or_request = sanitized_Request(url_or_request, data, headers)
- exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
- if hasattr(ssl, 'CertificateError'):
- exceptions.append(ssl.CertificateError)
try:
return self._downloader.urlopen(url_or_request)
- except tuple(exceptions) as err:
+ except network_exceptions as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
default = ('hidden', 'hasvid', 'ie_pref', 'lang', 'quality',
'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr',
- 'proto', 'ext', 'has_audio', 'source', 'format_id') # These must not be aliases
+ 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases
+ ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr',
+ 'height', 'width', 'proto', 'vext', 'abr', 'aext',
+ 'fps', 'fs_approx', 'source', 'format_id')
settings = {
'vcodec': {'type': 'ordered', 'regex': True,
'url': url,
'ext': determine_ext(url),
}
+ if sub_info['ext'] == 'm3u8':
+ # Per RFC 8216 ยง3.1, the only possible subtitle format m3u8
+ # files may contain is WebVTT:
+ # <https://tools.ietf.org/html/rfc8216#section-3.1>
+ sub_info['ext'] = 'vtt'
+ sub_info['protocol'] = 'm3u8_native'
subtitles.setdefault(lang, []).append(sub_info)
if media_type not in ('VIDEO', 'AUDIO'):
return
else:
# Assuming direct URL to unfragmented media.
f['url'] = base_url
- formats.append(f)
+ if content_type in ('video', 'audio'):
+ formats.append(f)
+ elif content_type == 'text':
+ subtitles.setdefault(lang or 'und', []).append(f)
else:
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats, subtitles
- def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ def _extract_ism_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self.report_warning(bug_reports_message(
+ "Ignoring subtitle tracks found in the ISM manifest; "
+ "if any subtitle tracks are missing,"
+ ))
+ return fmts
+
+ def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
fatal=fatal, data=data, headers=headers, query=query)
if res is False:
- return []
+ return [], {}
ism_doc, urlh = res
if ism_doc is None:
- return []
+ return [], {}
- return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+ return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id)
- def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
"""
Parse formats from ISM manifest.
References:
https://msdn.microsoft.com/en-us/library/ff469518.aspx
"""
if ism_doc.get('IsLive') == 'TRUE':
- return []
+ return [], {}
if (not self._downloader.params.get('allow_unplayable_formats')
and ism_doc.find('Protection') is not None):
- return []
+ return [], {}
duration = int(ism_doc.attrib['Duration'])
timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
formats = []
+ subtitles = {}
for stream in ism_doc.findall('StreamIndex'):
stream_type = stream.get('Type')
- if stream_type not in ('video', 'audio'):
+ if stream_type not in ('video', 'audio', 'text'):
continue
url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name')
+ stream_language = stream.get('Language', 'und')
for track in stream.findall('QualityLevel'):
fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
# TODO: add support for WVC1 and WMAP
- if fourcc not in ('H264', 'AVC1', 'AACL'):
+ if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
format_id.append(stream_name)
format_id.append(compat_str(tbr))
- formats.append({
- 'format_id': '-'.join(format_id),
- 'url': ism_url,
- 'manifest_url': ism_url,
- 'ext': 'ismv' if stream_type == 'video' else 'isma',
- 'width': width,
- 'height': height,
- 'tbr': tbr,
- 'asr': sampling_rate,
- 'vcodec': 'none' if stream_type == 'audio' else fourcc,
- 'acodec': 'none' if stream_type == 'video' else fourcc,
- 'protocol': 'ism',
- 'fragments': fragments,
- '_download_params': {
- 'duration': duration,
- 'timescale': stream_timescale,
- 'width': width or 0,
- 'height': height or 0,
- 'fourcc': fourcc,
- 'codec_private_data': track.get('CodecPrivateData'),
- 'sampling_rate': sampling_rate,
- 'channels': int_or_none(track.get('Channels', 2)),
- 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
- 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
- },
- })
- return formats
+ if stream_type == 'text':
+ subtitles.setdefault(stream_language, []).append({
+ 'ext': 'ismt',
+ 'protocol': 'ism',
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'fragments': fragments,
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ }
+ })
+ elif stream_type in ('video', 'audio'):
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ '_download_params': {
+ 'stream_type': stream_type,
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'language': stream_language,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats, subtitles
def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None):
def absolute_url(item_url):
entries.append(media_info)
return entries
- def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+ def _extract_akamai_formats(self, *args, **kwargs):
+ fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs)
+ if subs:
+ self.report_warning(bug_reports_message(
+ "Ignoring subtitle tracks found in the manifests; "
+ "if any subtitle tracks are missing,"
+ ))
+ return fmts
+
+ def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}):
signed = 'hdnea=' in manifest_url
if not signed:
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
'', manifest_url).strip('?')
formats = []
+ subtitles = {}
hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hls_host = hosts.get('hls')
if hls_host:
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
- m3u8_formats = self._extract_m3u8_formats(
+ m3u8_formats, m3u8_subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subtitles)
http_host = hosts.get('http')
if http_host and m3u8_formats and not signed:
formats.append(http_f)
i += 1
- return formats
+ return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
query = compat_urlparse.urlparse(url).query