class DummyIE(InfoExtractor):
- pass
+ def _sort_formats(self, formats, field_preference=[]):
+ self._downloader.sort_formats(
+ {'formats': formats, '_format_sort_fields': field_preference})
class TestInfoExtractor(unittest.TestCase):
{'ext': 'mp4', 'height': 460, 'url': TEST_URL},
]
info_dict = _make_result(formats)
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'webm')
{'ext': 'mp4', 'height': 1080, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'mp4')
{'ext': 'flv', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'mp4')
{'ext': 'webm', 'height': 720, 'url': TEST_URL},
]
info_dict['formats'] = formats
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['ext'], 'webm')
def test_format_selection(self):
formats = [
- {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL},
{'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
info_dict = _make_result(formats)
ydl = YDL({'format': 'best'})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'aac-64')
ydl = YDL({'format': 'mp3'})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'mp3-64')
ydl = YDL({'prefer_free_formats': True})
- ie = YoutubeIE(ydl)
- ie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(copy.deepcopy(info_dict))
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'ogg-64')
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '248+172')
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '38')
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': 'bestvideo/best,bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['137', '141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['137+141', '248+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['136+141', '247+141'])
info_dict = _make_result(list(formats_order), extractor='youtube')
ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts]
self.assertEqual(downloaded_ids, ['248+141'])
for f1, f2 in zip(formats_order, formats_order[1:]):
info_dict = _make_result([f1, f2], extractor='youtube')
ydl = YDL({'format': 'best/bestvideo'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1['format_id'])
info_dict = _make_result([f2, f1], extractor='youtube')
ydl = YDL({'format': 'best/bestvideo'})
- yie = YoutubeIE(ydl)
- yie._sort_formats(info_dict['formats'])
+ ydl.sort_formats(info_dict)
ydl.process_ie_result(info_dict)
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], f1['format_id'])
for f in formats:
f['url'] = 'http://_/'
f['ext'] = 'unknown'
- info_dict = _make_result(formats)
+ info_dict = _make_result(formats, _format_sort_fields=('id', ))
ydl = YDL({'format': 'best[filesize<3000]'})
ydl.process_ie_result(info_dict)
'format_id': format_id
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._og_search_title(webpage),
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
if formats:
break
- self._sort_formats(formats)
subtitles = {}
src_vtt = stream.get('captions', {}).get('src-vtt')
'url': mp4_url,
'width': 640,
})
- self._sort_formats(formats)
image = video.get('image') or {}
title = video_data['title']
formats = self._extract_m3u8_formats(
video_data['videoURL'].split('?')[0], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
**parse_codecs(video.get('codecs', ''))
})
- self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
- self._sort_formats(formats)
video = (self._download_json(
self._API_BASE_URL + 'video/%s' % video_id, video_id,
})
s3_extracted = True
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
'width': int_or_none(source.get('width') or None),
'url': source_src,
})
- self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
info['subtitles'].setdefault('en', []).append({
'url': asset_url,
})
- self._sort_formats(info['formats'])
return info
else:
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
raise last_e
- self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
}]
if not formats and not self.get_param('ignore_no_formats'):
continue
- self._sort_formats(formats)
file_info = common_entry.copy()
file_info.update({
'id': format_id,
'quality': quality_key(quality_str),
})
- self._sort_formats(formats)
-
station_info = self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
query={'szBjId': broadcaster_id}, fatal=False,
if meta['files'].get('dash'):
formats.extend(self._extract_mpd_formats(base_url + meta['files']['dash'], video_id))
- self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
'acodec': ext,
})
- self._sort_formats(formats)
return {
'id': media_id,
'formats': formats,
})
duration, view_count, timestamp = [None] * 3
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': display_id,
self._extract_smil_formats(fmt_url, video_id, fatal=False)
if '/smil:_' in fmt_url
else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False))
- self._sort_formats(formats)
webpage = (url and self._download_webpage(url, video_id, fatal=False)) or ''
thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage))
f['height'] = int('720' if m.group('res') == 'hd' else '480')
formats.extend(video_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
media_url = update_url_query(media_url, query)
formats, subtitles = self._extract_theplatform_smil(
media_url, video_id)
- self._sort_formats(formats)
thumbnails = []
thumbnail_urls = [properties.get('imageDesktop')]
'ext': ext,
})
- self._sort_formats(formats)
-
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
return {
raise ExtractorError('no source found for %s' % video_id)
formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4')
if determine_ext(source) == 'm3u8' else ([{'url': source}], {}))
- self._sort_formats(formats)
thumbnails = scale_thumbnails_to_max_format_width(
formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+')
return {
})
formats.append(a_format)
- self._sort_formats(formats)
-
subtitles = {}
for caption in video_data.get('captions', []):
a_caption = {
'height': int_or_none(qs.get('h', [None])[0]),
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': format_id,
'height': height,
})
- self._sort_formats(formats)
return {
'id': video_id,
r'(\d+)[pP]', label or '', 'height',
default=None)),
})
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
'height': int_or_none(size_data.get('height')),
'language': version[:2],
})
- self._sort_formats(formats)
entries.append({
'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
'height': int_or_none(format['height']),
})
- self._sort_formats(formats)
-
playlist.append({
'_type': 'video',
'id': video_id,
})
for entry in entries.values():
- self._sort_formats(entry['formats'], ('source', ))
+ entry['_format_sort_fields'] = ('source', )
if len(entries) == 1:
# If there's only one item, use it as the main info dict
'url': s_url,
'quality': -10,
})
- self._sort_formats(formats)
subtitles = {}
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
'This video is not available due to geoblocking',
countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
-
subtitles = {}
subtitle_url = media_info.get('_subtitleUrl')
if subtitle_url:
'format_id': fid,
'url': furl,
})
- self._sort_formats(formats)
info = {
'formats': formats,
}
continue
f['url'] = format_url
formats.append(f)
- self._sort_formats(formats)
_SUB_FORMATS = (
('./dataTimedText', 'ttml'),
elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
})
- self._sort_formats(formats)
channel = video.get('channel') or {}
channel_id = channel.get('url')
formats.extend(secondary_formats)
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
metadata = config['data']['attributes']['metadata']
elif src_type == 'application/dash+xml':
formats, subtitles = self._extract_mpd_formats(
src, video_id, mpd_id='dash', fatal=False)
- self._sort_formats(formats)
heartbeat = episode.get('heartbeat') or {}
omniture = episode.get('omniture') or {}
'url': source_url,
'format_id': protocol,
})
- self._sort_formats(formats)
return {
'id': clip_id,
'format_id': 'http-%s' % bitrate,
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
} for quality in data['quality']]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': data.get('title'),
'acodec': format_id.split('-')[0],
})
- self._sort_formats(formats)
-
title = '%s - %s' % (artist, track) if artist else track
if not duration:
'ext': ext,
'vcodec': 'none',
})
- self._sort_formats(formats)
title = show.get('audio_title') or 'Bandcamp Weekly'
subtitle = show.get('subtitle')
formats.extend(self._extract_m3u8_formats(
video_info.get('streamUrl'), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', live=True))
- self._sort_formats(formats)
return {
'id': video_id,
else:
programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
- self._sort_formats(formats)
-
return {
'id': programme_id,
'title': title,
def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
programme_id, title, description, duration, formats, subtitles = \
self._process_legacy_playlist_url(url, playlist_id)
- self._sort_formats(formats)
return {
'id': programme_id,
'title': title,
duration = int_or_none(items[0].get('duration'))
programme_id = items[0].get('vpid')
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': title,
continue
raise
if entry:
- self._sort_formats(entry['formats'])
entries.append(entry)
if entries:
if programme_id:
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
# digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
digital_data = self._parse_json(
self._search_regex(
if version_id:
title = smp_data['title']
formats, subtitles = self._download_media_selector(version_id)
- self._sort_formats(formats)
image_url = smp_data.get('holdingImageURL')
display_date = init_data.get('displayDate')
topic_title = init_data.get('topicTitle')
continue
title = lead_media.get('title') or self._og_search_title(webpage)
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
description = lead_media.get('summary')
uploader = lead_media.get('masterBrand')
uploader_id = lead_media.get('mid')
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
synopses = current_programme.get('synopses') or {}
network = current_programme.get('network') or {}
duration = int_or_none(
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
- self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
if not programme_id:
continue
formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
entries.append({
'id': programme_id,
'title': playlist_title,
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
- self._sort_formats(formats)
item_desc = None
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
if blocks:
formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
if not formats and not self.get_param('ignore_no_formats'):
continue
- self._sort_formats(formats)
video_id = media_meta.get('externalId')
if not video_id:
fmt['abr'] = 96
fmt['asr'] = 44100
formats.append(fmt)
- self._sort_formats(formats)
images = []
for name, info in track['images'].items():
f['height'] = height
formats.extend(current_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': first_fact.get('id'),
'url': decode_url(file_url),
})
- self._sort_formats(formats)
-
description = self._html_search_meta('description', webpage)
return {
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
- self._sort_formats(formats)
return formats
def json2srt(self, json_data):
'filesize': aud.get('size'),
})
- self._sort_formats(formats)
return formats
def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
})
for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
formats.extend(self._parse_formats(qn, fmt))
- self._sort_formats(formats)
return {
'id': room_id,
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for k, v in item.items():
self.raise_no_formats(
'Video is unavailable. Please make sure this video is playable in the browser '
'before reporting this issue.', expected=True, video_id=video_id)
- self._sort_formats(formats)
return {
'id': video_id,
formats = self._extract_m3u8_formats(
channel['data']['url'], username,
'mp4')
- self._sort_formats(formats)
return {
'id': username,
else:
formats.extend(self._extract_f4m_formats(
stream_url, video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
'quality': int(quality.attrib['value']),
} for quality in info_xml.findall('./video/quality')]
- self._sort_formats(formats)
-
return formats
formats = self._extract_m3u8_formats(
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
'height': video_data.get('resolution'),
'preference': -10,
}))
- self._sort_formats(formats)
return {
'id': video_id,
'url': update_url_query(authenticated_download_url, query),
})
- self._sort_formats(formats)
-
creator = f.get('created_by') or {}
return {
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
'format_id': 'rtmp-%s' % asset_type,
})
formats.append(rtmp_format_info)
- self._sort_formats(formats)
return formats
def _extract_thumbnails(self, variants, base_url):
'tbr': tbr,
'filesize': int_or_none(node.get('fileSize')),
})
- self._sort_formats(formats)
subtitles = {}
for edge in clip.get('captionFiles', {}).get('edges', []):
'format_id': 'http-%d' % bitrate if bitrate else 'http',
'tbr': bitrate,
})
- self._sort_formats(formats)
title = self._search_regex(
(r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage = self._download_webpage(url, video_id)
formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'title': self._generic_title('', webpage),
self.raise_no_formats(
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
- self._sort_formats(formats)
-
for f in formats:
f.setdefault('http_headers', {}).update(headers)
'thumbnail': ep.get('imageThumbnail'),
'duration': parse_duration(ep.get('length')),
})
- self._sort_formats(formats)
return merge_dicts(info, {
'id': video_id,
'url': f['url']
} for f in info['rfiles']
]
- self._sort_formats(formats)
return {
'id': info['vid'],
video_url = self._og_search_video_url(webpage, secure=False)
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
title = episode.get('title') or self._generic_title('', webpage)
url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
- self._sort_formats(formats)
show = traverse_obj(episode, ('show', 'title'))
show_id = traverse_obj(episode, ('show', 'id'))
video_stream = self._search_regex(r'videoStreamURL\s*=\s*"([^"]+)"', global_vars, 'Video Stream URL', fatal=False)
formats = self._extract_m3u8_formats(video_stream, video_id, 'ts', live=True)
- self._sort_formats(formats)
return {
'id': video_id,
m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL')
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
else:
continue
formats.append(f)
- self._sort_formats(formats)
return {
'id': user_id,
if not formats:
self.raise_no_formats('No active streams found', expected=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._html_extract_title(webpage),
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
formats.extend(dash_frmts)
subtitles = self._merge_subtitles(subtitles, dash_subs)
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title').strip(),
else:
info = self._parse_html5_media_entries(url, webpage, url)[0]
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': title,
'format_id': format_id,
'quality': preference(format_id),
})
- self._sort_formats(formats)
thumbnails = [{
'id': image_id,
'format_id': format_type,
'url': format_url,
})
- self._sort_formats(formats)
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
'height': int_or_none(f.get('height')),
'format_id': format_field(f, 'height', '%sp'),
} for f in video['qualities'] if f.get('fn')]
- self._sort_formats(formats)
thumbnail = video.get('splash')
duration = float_or_none(try_get(
if 'descriptive' in format['format_id'].lower():
format['preference'] = -2
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_info['title'],
subtitles = self._merge_subtitles(subtitles, tp_subtitles)
if last_e and not formats:
self.raise_no_formats(last_e, True, content_id)
- self._sort_formats(formats)
extra_info.update({
'id': content_id,
})
formats = self._extract_akamai_formats(video_info['url'], display_id)
- self._sort_formats(formats)
return {
'id': display_id,
formats = self._extract_m3u8_formats(
metadata['files'][0]['url'], video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
image = video.get('image')
thumbnails = None
'language': language,
'vcodec': vcodec,
})
- self._sort_formats(formats)
return {
'id': event_id,
'url': media_url,
'vcodec': 'none' if media_type == 'audio' else None,
})
- self._sort_formats(formats)
informacio = media['informacio']
title = informacio['titol']
hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
uploader = data.get('editer_name')
description = self._html_search_meta(
'description', webpage, default=None)
'filesize': quality.get('length'),
} for quality in meta['qualities'] if quality.get('file')]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': meta.get('title'),
extract_format(webpage, resolution)
- self._sort_formats(formats)
-
return merge_dicts(info_dict, info)
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
- self._sort_formats(formats)
return {
'id': str(json_data['videoId']),
'title': json_data.get('name') or self._og_search_title(webpage),
'is_live': is_live,
})
- for e in entries:
- self._sort_formats(e['formats'])
-
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
if not formats and not slides and not zip_file:
self.raise_no_formats(
'None of recording, slides or zip are available for %s' % content_path)
- self._sort_formats(formats)
subtitles = {}
for caption in content_data.get('Captions', []):
info_dict = self._parse_html5_media_entries(
self._PLAYER_BASE % video_id, webpage, video_id,
m3u8_entry_protocol='m3u8_native')[0]
-
- self._sort_formats(info_dict['formats'])
self._remove_duplicate_formats(info_dict['formats'])
info_dict.update({
# ffmpeg skips segments for fast m3u8
preference=-10 if m3u8_id == 'fast' else None,
m3u8_id=m3u8_id, fatal=False, live=True))
- self._sort_formats(formats)
return {
'id': video_id,
'url': base_url + '/apipublic' + media_data['path'],
'quality': 10,
})
- self._sort_formats(formats)
timestamp = str_to_int(post_data.get('created_at'))
if timestamp:
timestamp = int_or_none(timestamp, 1000)
'format_id': 'backup',
'url': backup_url,
})
- self._sort_formats(formats)
return {
'id': video_id,
'vcodec': 'none',
'acodec': 'mp3',
})
- self._sort_formats(formats)
return {
'id': video_id,
'height': int_or_none(height),
'tbr': int_or_none(f.get('br')),
})
- self._sort_formats(formats)
thumbnail = self._search_regex(
r"var\s+mov_thumb\s*=\s*'([^']+)';",
'm3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
'url': src['src'],
'quality': quality_order(src['streamQuality']),
} for src in sources]
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': format_id,
'vcodec': 'none',
})
- self._sort_formats(formats)
title = metadata['Title']
description = metadata.get('Description')
return FormatSort
def _sort_formats(self, formats, field_preference=[]):
- if formats and field_preference:
+ if not field_preference:
+ self._downloader.deprecation_warning(
+ 'yt_dlp.InfoExtractor._sort_formats is deprecated and is no longer required')
+ return
+ self._downloader.deprecation_warning(
+ 'yt_dlp.InfoExtractor._sort_formats is deprecated and no longer works as expected. '
+ 'Return _format_sort_fields in the info_dict instead')
+ if formats:
formats[0]['__sort_fields'] = field_preference
def _check_formats(self, formats, video_id):
'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
})
- self._sort_formats(formats)
entries.append({
'id': playlist_id,
'url': formats[0]['url'],
})
else:
- self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
if len(entries) == 1:
'ext': ext,
'quality': 1 if quality == 'high' else 0,
})
- self._sort_formats(formats)
subtitles = {}
for t, caption in video_info.get('captions', {}).items():
'url': media_mp4_url,
})
- self._sort_formats(formats)
-
subtitles = {}
captions = m_details.get('captions') or {}
for caption_url in captions.values():
smil, smil_url, video_id, namespace))
if not formats and video.get('drm'):
self.report_drm(video_id)
- self._sort_formats(formats)
subtitles = {}
for track in video.get('tracks', []):
'source_preference': preference_key(MOBILE),
})
- self._sort_formats(formats)
-
thumbnail = coub.get('picture')
duration = float_or_none(coub.get('duration'))
timestamp = parse_iso8601(coub.get('published_at') or coub.get('created_at'))
else:
fmt['language_preference'] = -10
- self._sort_formats(formats)
-
category = str_or_none(content['details']['category_%s_t' % (url_lang, )])
def is_live(v_type):
})
if not formats and has_drm:
self.report_drm(video_id)
- self._sort_formats(formats)
description = media.get('Description')
duration = int_or_none(media.get(
'format_id': item['type'],
'quality': quality(item['type']),
} for item in manifest['flavors'] if item['mime'].startswith('video/')]
- self._sort_formats(formats)
return {
'url': url,
'width': int_or_none(image.get('width')),
} for image in video_json.get('thumbnails') or [] if image.get('url')]
- self._sort_formats(formats)
return {
'id': id,
'title': video_json.get('title'),
f['language'] = stream_response.get('audio_locale')
f['quality'] = hardsub_preference(hardsub_lang.lower())
formats.extend(adaptive_formats)
- self._sort_formats(formats)
return {
'id': internal_id,
path, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
add_referer(formats)
- self._sort_formats(formats)
entries.append({
'id': '%s_%d' % (video_id, partnum + 1),
'title': (
'format_id': 'http',
})
formats.append(fmt)
- self._sort_formats(formats)
title = media['title']
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
return {
'id': video_id,
'height': int_or_none(height),
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for k, v in item.items():
'protocol': protocol,
'ext': 'mp4' if is_hls else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
f['url'] = f['url'].split('#')[0]
if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60
- self._sort_formats(formats)
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug)
formats.extend(format_)
self._merge_subtitles(subs_, target=subtitles)
- self._sort_formats(formats)
return {
'id': episode_info['id'],
'display_id': slug,
if not m3u8_url:
raise ExtractorError('Failed to obtain m3u8 URL')
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
video_id, 'mp4', fatal=False, headers={'Referer': src_iframe})
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return {
'id': video_id,
'preference': -100, # Only the first 30 seconds
'ext': 'mp3',
}]
- self._sort_formats(formats)
artists = ', '.join(
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
entries.append({
'preference': -100, # Only the first 30 seconds
'ext': 'mp3',
}]
- self._sort_formats(formats)
artists = ', '.join(
orderedSet(a.get('ART_NAME') for a in s.get('ARTISTS')))
entries.append({
'vcodec': 'none' if key == 'audio' else None,
})
- self._sort_formats(formats)
-
default_lang = 'en'
subtitles = {}
}
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id)
- self._sort_formats(formats)
json_ld_data = self._search_json_ld(webpage, display_id, default={})
yield merge_dicts(json_ld_data, extra_info_dict, {
formats.extend(self._extract_m3u8_formats(
manifest_url, display_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
m3u8_url = traverse_obj(
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
- self._sort_formats(formats)
yield {
'id': video_id,
'format_id': source.get('label'),
})
- self._sort_formats(formats)
-
title = deliver_info['title']
thumbnail = jwconf.get('image')
duration = int_or_none(deliver_info.get('duration'))
elif stream_kind == 'hds':
formats.extend(self._extract_f4m_formats(
stream_url, display_id, f4m_id=stream_kind, fatal=False))
- self._sort_formats(formats)
video_id = video.get('id') or display_id
description = video.get('description', {}).get('detailed')
self.raise_no_formats(
'%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']),
expected=True)
- self._sort_formats(formats)
subtitles = {}
for caption in video_data.get('captions', []):
video_formats = self._parse_mp4(metadata)
if video_formats is None:
video_formats = self._parse_flv(metadata)
- self._sort_formats(video_formats)
return {
'id': video_id,
title = broadcast['title']
formats = self._extract_m3u8_formats(
broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
return {
'id': vod_id,
'title': title,
formats = self._extract_m3u8_formats(
'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
display_name, 'mp4')
- self._sort_formats(formats)
return {
'id': display_name,
'title': title,
'url': format_url,
'format_id': format_id,
})
- self._sort_formats(formats)
creator = series = None
tags = []
info = self._parse_html5_media_entries(
url, webpage, display_id, m3u8_id='hls',
m3u8_entry_protocol='m3u8_native')[0]
- self._sort_formats(info['formats'])
asset = self._parse_json(
self._search_regex(
video_url = re.sub(r'[?&]dl=0', '', url)
video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1})
- self._sort_formats(formats)
return {
'id': video_id,
'quality': 2 if format_id == 'hq' else 1,
'url': video_url
})
- self._sort_formats(formats)
duration = int_or_none(video_data.get('duration')) or parse_duration(
video_data.get('duration_format'))
'Unfortunately, DR is not allowed to show this program outside Denmark.',
countries=self._GEO_COUNTRIES)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
formats.extend(self._extract_f4m_formats(update_url_query(
'%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
channel_id, f4m_id=link_type, fatal=False))
- self._sort_formats(formats)
return {
'id': channel_id,
'format_id': version,
'quality': quality(version),
})
- self._sort_formats(formats)
thumbnails = []
stills = item.get('stills') or {}
'format_id': join_nonempty('http', ext, label),
'height': int_or_none(height),
})
- self._sort_formats(formats)
return {
'id': data.get('mediaid') or video_id,
transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/'))
- self._sort_formats(formats)
upload_date = hidden_inputs.get('display_date')
if not upload_date:
f['url'] = format_url
formats.append(f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
formats.append({
'url': format_url,
})
- self._sort_formats(formats)
return {
'id': lesson_id,
'url': mp4_url,
})
- self._sort_formats(formats)
-
description = get_elements_by_class('synopsis', webpage)[0]
thumbnail = self._html_search_regex(
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
'%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'),
video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': media.get('NAME_ES') or media.get('name') or media['NAME_EU'],
entry_protocol='m3u8_native', m3u8_id='hls')
duration = int_or_none(entry.get('duration'))
break
- self._sort_formats(formats)
def get_insight(kind):
return int_or_none(try_get(
else:
formats, subtitles = [], {}
self.raise_no_formats(f'Unknown streaming format {ext}')
- self._sort_formats(formats)
return {
'id': video_id,
description = self._og_search_description(webpage) or None
thumbnail = self._og_search_thumbnail(webpage) or None
formats = self._extract_m3u8_formats(data_json['url']['video_url'], id)
- self._sort_formats(formats)
subtitles = {}
for subtitle in data_json.get('subtitles', []):
'height': height,
'fps': fps,
})
- self._sort_formats(formats)
json_ld = self._search_json_ld(webpage, display_id, default={})
},
]
- def _extract_formats_and_subs(self, video_id, allow_none=True):
+ def _extract_formats_and_subs(self, video_id):
media_info = self._call_api(video_id, codename=video_id)
formats, subs = [], {}
for media_file in try_get(media_info, lambda x: x['MediaFiles'], list) or []:
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
- if formats or not allow_none:
- self._sort_formats(formats)
return formats, subs
def _real_extract(self, url):
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://mediastream.ert.gr/vodedge/_definst_/mp4:dvrorigin/{video_id}/playlist.m3u8',
video_id, 'mp4')
- self._sort_formats(formats)
thumbnail_id = parse_qs(url).get('bgimg', [None])[0]
if thumbnail_id and not thumbnail_id.startswith('http'):
thumbnail_id = f'https://program.ert.gr{thumbnail_id}'
'format_id': '%s-%sp' % (determine_ext(video['src']), video['res']),
'height': int_or_none(video.get('res')),
} for video in data['files']['videos']]
- self._sort_formats(formats)
return {
'id': video_id,
links = clip.get('links', {})
traverse_source(links.get('source', {}))
traverse_source(links.get('mobile', {}))
- self._sort_formats(formats)
description = clip.get('caption') or clip.get('description')
thumbnail = clip.get('thumbnail')
'url': item['url'],
'vcodec': 'none',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
m3u8_url, headers = asset['stream'], {}
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
'height': int(height),
'filesize_approx': parse_filesize(filesize),
})
- self._sort_formats(formats)
title = self._html_search_meta('title', webpage, 'title')
description = self._html_search_meta(
'format_note': xpath_text(file_, './lglabel'),
'language_preference': language_preference(lang)
})
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- self._sort_formats(formats)
-
return {
'id': json_data['id'],
'title': json_ld_data.get('title') or self._og_search_title(webpage),
formats = [{
'url': source['src'],
} for source in video_json.get('sources', [])]
- self._sort_formats(formats)
return {
'id': id,
r'filename=.*\.([a-z0-9_A-Z]+)&', media_url,
'file extension', default=None) or fcfg.get('type'),
})
- self._sort_formats(formats)
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
formats = [{
'url': stream,
}]
- self._sort_formats(formats)
title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw')
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
- def process_formats(formats):
+ def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around
# with non-browser User-Agent.
- for f in formats:
+ for f in info['formats']:
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
-
- self._sort_formats(formats, ('res', 'quality'))
+ info['_format_sort_fields'] = ('res', 'quality')
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
'url': playable_url,
})
extract_dash_manifest(video, formats)
- process_formats(formats)
v_id = video.get('videoId') or video.get('id') or video_id
info = {
'id': v_id,
'timestamp': int_or_none(video.get('publish_time')),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
}
+ process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text'])
title = video.get('name')
if title:
if subtitles_src:
subtitles.setdefault('en', []).append({'url': subtitles_src})
- process_formats(formats)
-
info_dict = {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
}
+ process_formats(info_dict)
info_dict.update(extract_metadata(webpage))
return info_dict
'tbr': tbr or int(mobj.group(3)),
})
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
'Referer': url,
}))
- self._sort_formats(formats)
for fmt in formats:
fmt.update({
'protocol': 'fc2_live',
'height': int_or_none(q.get('label')),
} for q in msi_data['qualities'] if q.get('url')]
- self._sort_formats(formats)
-
tags = [tag['label'] for tag in msi_data.get('tags', []) if tag.get('label')]
return {
})
formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'protocol': 'm3u8_native',
} for source in data['sources']]
- self._sort_formats(formats)
-
subtitles = {}
if data.get('subtitle'):
'quality': QUALITY(stream.get('quality')),
'protocol': 'm3u8_native',
})
- self._sort_formats(formats)
thumbnails = []
poster = response.get('poster', {})
'ext': 'mp4',
'quality': QUALITY(quality),
})
- self._sort_formats(formats)
thumbnails = []
for name, width, height in self._THUMBNAIL_RES:
% (path, m3u8_path),
display_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
duration = int_or_none(item.get('duration') or self._html_search_meta(
'url': stream['_content'],
'quality': preference(stream_type),
})
- self._sort_formats(formats)
owner = video_info.get('owner', {})
uploader_id = owner.get('nsid')
'url': xpath_text(n, './url', fatal=True),
'tbr': int_or_none(n.attrib['bitrate']),
} for n in doc.findall('.//streams/stream')]
- self._sort_formats(formats)
return {
'id': video_id,
'resolution': format + 'p',
'quality': int(format),
} for format in sources]
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
} for source, resolution in zip(
video_data['sources'], video_data.get('resolutions', itertools.repeat(None)))]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
info = self._download_json(
self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'title': join_nonempty(title, real_episode, delim=' - '),
} for sheet in spritesheets]
})
- self._sort_formats(formats)
-
if subtitle:
title += ' - %s' % subtitle
title = title.strip()
'format_note': channels,
'quality': quality,
} for quality, format_url in enumerate(audio_urls)]
- self._sort_formats(formats)
return {
'id': audio_id,
video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
for episode in episodes:
video_id = str(episode['contentID'])
formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
- self._sort_formats(formats)
yield {
'id': video_id,
'format_id': format_id,
})
formats.append(f)
- self._sort_formats(formats)
subtitles = {
'en': [{
self._BITRATE_MAP.get(f.get('tbr'), ()))))
formats.extend(fmt)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats, ['tbr'])
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg',
+ '_format_sort_fields': ('tbr', )
}
self.raise_no_formats(
'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
self._remove_duplicate_formats(formats)
- self._sort_formats(formats, ('lang', 'source'))
return {
'id': episode_id,
'formats': formats,
'thumbnails': thumbnails,
'subtitles': subtitles,
+ '_format_sort_fields': ('lang', 'source'),
}
def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name):
'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
})
if formats:
- self._sort_formats(formats)
info['formats'] = formats
else:
info.update({
else:
frmt['height'] = str_to_int(resolution.replace('p', ''))
formats.append(frmt)
- self._sort_formats(formats)
return {
'id': id,
} for url, f in ((media.get('url'), metadata.get('original') or {}),
(media.get('source_mp4'), metadata.get('playable') or {})) if url]
- self._sort_formats(formats)
-
author = json_data.get('account') or {}
entries.append({
'id': f'{post_id}-{idx}',
media_id, headers=headers)
formats = self._extract_m3u8_formats(
media['mediaUrls']['bcHLS'], media_id, 'mp4')
- self._sort_formats(formats)
subtitles = {}
text_tracks = media.get('textTracks', {})
formats.extend(self._extract_mpd_formats(
mpd_url, page_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
-
return {
'id': data_video.get('guid') or page_id,
'display_id': page_id,
'view_count': view_count,
'average_rating': average_rating,
})
- self._sort_formats(entry['formats'])
return entry
duration = int_or_none(v)
self._clean_formats(formats)
- self._sort_formats(formats)
return {
'id': video_id,
'vcodec': 'none' if m.group('type') == 'audio' else None
}]
info_dict['direct'] = True
- self._sort_formats(formats)
info_dict.update({
'formats': formats,
'subtitles': subtitles,
if first_bytes.startswith(b'#EXTM3U'):
self.report_detected('M3U playlist')
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
- self._sort_formats(info_dict['formats'])
return info_dict
# Maybe it's a direct link to a video?
elif doc.tag == 'SmoothStreamingMedia':
info_dict['formats'], info_dict['subtitles'] = self._parse_ism_formats_and_subtitles(doc, url)
self.report_detected('ISM manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self.report_detected('SMIL file')
- self._sort_formats(smil['formats'])
return smil
elif doc.tag == '{http://xspf.org/ns/0/}playlist':
self.report_detected('XSPF playlist')
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
self.report_detected('DASH manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
self.report_detected('F4M manifest')
- self._sort_formats(info_dict['formats'])
return info_dict
except xml.etree.ElementTree.ParseError:
pass
})
if formats or subtitles:
self.report_detected('video.js embed')
- self._sort_formats(formats)
return [{'formats': formats, 'subtitles': subtitles}]
# Looking for http://schema.org/VideoObject
if not formats[-1].get('height'):
formats[-1]['quality'] = 1
- self._sort_formats(formats)
-
return [{
'id': flashvars['video_id'],
'display_id': display_id,
else:
entry_info_dict['url'] = video_url
- if entry_info_dict.get('formats'):
- self._sort_formats(entry_info_dict['formats'])
-
entries.append(entry_info_dict)
if len(entries) > 1:
make_archive_id('generic', f'{video_id}-{num}' if len(entries) > 1 else video_id),
],
})
- self._sort_formats(entry['formats'])
yield entry
'height': int_or_none(post_data.get('vid_hgt')),
})
- self._sort_formats(formats)
-
return {
'id': post_id,
'title': title,
'url': urljoin(self._MEDIA_BASE_URL, thumbnail),
} for thumbnail in try_get(video_info, lambda x: x['postData']['imgs'], list) or []]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': try_get(video_info, lambda x: x['postData']['ttl'], str),
'filesize': filesize,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': display_id,
'format_id': '%s-%s' % (fmt['quality'], fmt['type'].split('/')[-1]),
'quality': quality(fmt['quality']),
})
- self._sort_formats(formats)
title = self._html_search_meta(
'title', webpage, 'title', fatal=True)
fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(fmts)
- self._sort_formats(formats)
for resource in video['resources']:
if resource.get('type') == 'subtitle':
if video.get('language'):
for fmt in formats:
fmt['language'] = video['language']
- self._sort_formats(formats)
images = (video.get('images') or []) + [video.get('image') or {}]
thumbnails = [{
'height': height,
})
formats.append(f)
- self._sort_formats(formats)
for cc in video_data.get('closedcaption', {}).get('src', []):
cc_url = cc.get('value')
'filesize': self._int(e.findtext('filesize'), 'filesize'),
'ext': determine_ext(e.findtext('./filename')),
})
- self._sort_formats(formats)
info['formats'] = formats
thumbnails = []
else:
self.raise_no_formats('User is offline', expected=True, video_id=channel_name)
- self._sort_formats(formats)
return {
'id': player_id,
'formats': formats,
if not formats and reason:
self.raise_no_formats(reason, expected=True)
- self._sort_formats(formats)
-
hl = get_value('hl')
subtitles_id = None
ttsurl = get_value('ttsurl')
formats, subs = self._extract_m3u8_formats_and_subtitles(
api['video']['S'], video_id, ext='mp4', m3u8_id='HLS')
- self._sort_formats(formats)
info_dict.update({
'id': video_id,
'height': int_or_none(fmt.get('height')),
})
- self._sort_formats(formats)
-
title = str_or_none(
try_get(metadata, lambda x: x['collection']['title'])
or self._html_search_meta(['og:title', 'twitter:title'], webpage)
'url': data_json['vtt_url'],
'ext': 'vtt',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
'width': format_info.get('width'),
'height': format_info.get('height'),
})
- self._sort_formats(formats)
thumbnails = []
card_sizes = xpath_element(video_data, 'titleCardSizes')
'acodec': ext,
'quality': 2, # Usually better quality
})
- self._sort_formats(formats)
return {
'id': track_id,
'format_id': '%s_%s' % (ext, label),
'height': height,
})
- self._sort_formats(formats)
return {
'id': video_id,
title = remove_end(self._html_extract_title(webpage), ' - Hell Porno')
info = self._parse_html5_media_entries(url, webpage, display_id)[0]
- self._sort_formats(info['formats'])
video_id = self._search_regex(
(r'chs_object\s*=\s*["\'](\d+)',
'url': s['file'],
'ext': 'mp4',
} for s in params['sources']]
- self._sort_formats(formats)
return {
'id': video_id,
f['language'] = audio
f['format_note'] = f'{version}, {extra}'
formats.extend(frmt)
- self._sort_formats(formats)
return {
'id': video_id,
'tbr': bitrate,
'format_note': label,
})
- self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/video', video_id)
'page_url': url,
'player_url': 'http://www.hitbox.tv/static/player/flowplayer/flowplayer.commercial-3.2.16.swf',
})
- self._sort_formats(formats)
metadata = self._extract_metadata(
'https://www.smashcast.tv/api/media/live', video_id)
'width': w,
'height': h,
})
- self._sort_formats(formats)
subtitles = {}
tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
if not formats and geo_restricted:
self.raise_geo_restricted(countries=['IN'], metadata_available=True)
- self._sort_formats(formats)
for f in formats:
f.setdefault('http_headers', {}).update(headers)
'vbr': vbr,
})
- self._sort_formats(formats)
-
return {
'id': '%s' % video_id,
'display_id': display_id,
stream_format['tbr'] = int_or_none(quality_information.group(4))
stream_formats.append(stream_format)
-
- self._sort_formats(stream_formats)
return stream_formats
def _real_extract(self, url):
formats = self._extract_m3u8_formats(
m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
description = clean_html(title_info.get('summary_long'))
age_limit = parse_age_limit(video.get('parental_control', {}).get('rating'))
fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4')
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return formats, subtitles
'vcodec': 'none' if key.startswith('audio/') else None,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
})
formats = self._extract_m3u8_formats(video_json['stream_url'], video_id, ext='mp4', m3u8_id='hls')
- self._sort_formats(formats)
json_ld = self._search_json_ld(
self._download_webpage(url, video_id, fatal=False) or '', video_id, fatal=False)
**self._RESOLUTION.get(si.get('sDisplayName'), {}),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail'))
}]
- self._sort_formats(formats)
return merge_dicts({
'id': video_id,
'title': None,
'acodec': 'aac',
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': uploader or video_id,
'http_headers': {'Referer': url},
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': uploader or video_id,
'url': mezzanine_url,
})
- self._sort_formats(formats)
-
thumbnails = []
for thumbnail in (video.get('thumbnails') or []):
thumbnail_url = thumbnail.get('url')
'ext': ext,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(self._extract_mpd_formats(
media_url, media_id, mpd_id='dash', fatal=False,
headers=self._MANIFEST_HEADERS))
- self._sort_formats(formats)
subtitles = {}
for subtitle in video_data.get('subtitles', []):
},
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
'url': video_url,
'height': height,
})
- self._sort_formats(formats)
timestamp = video.get('date')
if timestamp:
+ self._extract_http_video(webpage)
+ self._extract_http_audio(webpage, video_id))
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_title,
} for format in videos_list or []]
if dash_manifest_raw:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash'))
- self._sort_formats(formats)
thumbnails = [{
'url': thumbnail.get('url'),
dash = traverse_obj(media, ('dash_info', 'video_dash_manifest'))
if dash:
formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash'))
- self._sort_formats(formats)
comment_data = traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))
comments = [{
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
video_base + 'mpd', display_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
timestamp = unified_timestamp(self._html_search_meta(
'article:published_time', webpage, 'timestamp'))
replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
formats.extend(self._extract_ism_formats(
replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
elif manifest_type == 'DASH' or ext == 'mpd':
formats += self._extract_mpd_formats(
manifest_url, video_id, mpd_id='dash', fatal=False)
- self._sort_formats(formats)
final_result = self._search_json_ld(webpage, video_id, default={})
final_result.update({
if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
self._sleep(5, video_id)
- self._sort_formats(formats)
title = (get_element_by_id('widget-videotitle', webpage)
or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
})
formats.extend(extracted_formats)
- self._sort_formats(formats)
-
for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]):
lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
subtitles.setdefault(lang, []).extend([{
traverse_obj(show_stream, ('response', 'tokenization', 'url')), video_id,
headers=headers)
formats, subs = self._extract_m3u8_formats_and_subtitles(traverse_obj(streams, ('Streams', 'Adaptive')), video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
formats.append({
'url': href,
})
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, default={})
if not info:
json_ld = self._parse_json(self._search_regex(
'quality': quality(content_format),
'filesize': int_or_none(f.get('size_in_bytes')),
})
- self._sort_formats(formats)
compilation = result.get('compilation')
episode = title if compilation else None
'ext': 'flv',
'quality': quality(format_id),
} for format_id in self._QUALITIES]
- self._sort_formats(formats)
return {
'id': server_id,
'quality': 1 if format_id == 'Source' else 0,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
formats = list(self._media_selector(json_data.get('videoResource')))
- self._sort_formats(formats)
return {
'id': video_id,
'title': json_data.get('title'),
'ext': ext,
'height': height,
})
- self._sort_formats(formats)
description = self._og_search_description(webpage, default=None)
thumbnail = video.get('posterURL') or self._proto_relative_url(
webpage = self._download_webpage(url, video_id)
formats = self._extract_m3u8_formats(
self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
('ogg1', 'ogg', 'ogg'),
('flac', 'flac', 'flac'),
))]
- self._sort_formats(formats)
urls = []
thumbnails = []
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
f'https://hlslive.shugiintv.go.jp/{room_id}/amlst:{room_id}/playlist.m3u8',
room_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': room_id,
m3u8_url = re.sub(r'^http://', 'https://', m3u8_url)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, ext='mp4')
- self._sort_formats(formats)
title = self._html_search_regex(
(r'<td\s+align="left">(.+)\s*\(\d+分\)',
'm3u8 url', group=2)
formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'ext': 'mp4',
})
- self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
r'(\d+)[pP]', format_id or path, 'height',
default=None)),
})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
'filesize': int_or_none(fmt.get('filesize')),
'tbr': int_or_none(fmt.get('kbps')),
})
- self._sort_formats(formats)
thumbs = []
for thumb in clip.get('clipChapterThumbnailList') or []:
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- self._sort_formats(formats)
-
if captions:
for caption in captions.get('objects', []):
# Continue if caption is not ready
from ..compat import compat_urllib_parse_unquote
from ..utils import (
determine_ext,
- ExtractorError,
format_field,
int_or_none,
str_to_int,
self.raise_no_formats(
'Video %s is no longer available' % video_id, expected=True)
- try:
- self._sort_formats(formats)
- except ExtractorError:
- if fatal:
- raise
-
if not title:
title = self._html_search_regex(
r'<h1[^>]*>([^<]+)', webpage, 'title')
subtitles.setdefault('en', []).append({
'url': track['file'],
})
- self._sort_formats(formats)
yield {
'id': video_id,
'title': item['title'],
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
thumbnail = None
poster = data.get('poster') or {}
'url': fallback_rendition_url,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
formats = self._extract_m3u8_formats(
data['playlistEntity']['uri'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
description = dict_get(
film, ('descriptscription', 'description',
'url': fallback_url,
})
- self._sort_formats(formats)
-
title = player_config.get('title') or media['title']
description = player_config.get('mediaInfo', {}).get('description')
thumbnail = media.get('image')
if not formats:
self.raise_no_formats('No video/audio found at the provided url.', expected=True)
- self._sort_formats(formats)
return {
'id': id,
'title': clean_html(item_json.get('title')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
})
- self._sort_formats(formats)
return {
'id': video_id,
lrc_content = None
formats = self._get_formats(song_id)
- self._sort_formats(formats)
album_id = self._html_search_regex(
r'<a[^>]+href="http://www\.kuwo\.cn/album/(\d+)/"',
'format_id': 'mv',
})
- self._sort_formats(formats)
-
return {
'id': song_id,
'title': song_name,
if http_f:
formats.append(http_f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._og_search_title(webpage, default=None),
'format_id': ext,
'ext': ext,
}]
- self._sort_formats(formats)
title = self._html_search_regex(
(r'<div class="title">(?P<title>.+?)</',
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
if determine_ext(final_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
- self._sort_formats(info['formats'])
else:
info['url'] = streaming_url
return {
'url': url,
})
- self._sort_formats(formats)
-
creator = self._html_search_regex(
r'<div[^>]+id="description">([^<]+)</div>', webpage, 'creator', fatal=False)
duration = parse_duration(self._html_search_regex(
'height': int(mobj.group(1)),
})
formats.append(f)
- self._sort_formats(formats)
subtitles = {}
automatic_captions = {}
f['height'] = int_or_none(format_id[:-1])
formats.append(f)
- self._sort_formats(formats, ('res', 'quality'))
publish_time = parse_iso8601(self._html_search_regex(
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
'thumbnail': playurl['pic'],
'description': description,
'timestamp': publish_time,
+ '_format_sort_fields': ('res', 'quality'),
}
media_id = uu + '_' + vu
formats = self._get_formats('flash', uu, vu, media_id) + self._get_formats('html5', uu, vu, media_id)
- self._sort_formats(formats)
return {
'id': media_id,
'width': quality[2],
}),
formats.append(f)
- self._sort_formats(formats)
subtitles = {}
sub_file_id = video.get('SubFileId')
'filesize_approx': parse_filesize(m.group('size')),
})
- self._sort_formats(formats)
-
duration = float_or_none(data.get('duration'))
view_count = int_or_none(data.get('viewCount'))
else:
extract_original(video_url)
- self._sort_formats(formats)
-
thumbnail = thumbnail or self._search_regex(
r'"image"\s*:\s*"([^"]+)', webpage, 'thumbnail', default=None)
'width': info.get('video_width'),
'quality': 1,
}]
- self._sort_formats(formats)
return {
'id': video_id,
'title': info.get('msgText'),
'ext': ext,
})
- self._sort_formats(formats)
-
subtitles = {}
for flag in mobile_item.get('flags'):
if flag == 'ClosedCaptions':
archive_status = item.get('archiveStatus')
if archive_status != 'ARCHIVED':
self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
- self._sort_formats(formats)
info['formats'] = formats
return info
'tbr': float_or_none(source.get('data-bitrate'), scale=1000),
} for source in sources]
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
streaming_url, video_slug, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
- # It seems like this would be correctly handled by default
- # However, unless someone can confirm this, the old
- # behaviour is being kept as-is
- self._sort_formats(formats, ('res', 'source_preference'))
subtitles = {}
duration = int_or_none(video_data.get('durationInSeconds'))
transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list)
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
'duration': duration,
'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
}
formats = self._extract_m3u8_formats(
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
info = {
'id': item_id,
'formats': formats,
if f4m_url:
formats.extend(self._extract_f4m_formats(
f4m_url, video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
comments = [{
'author_id': comment.get('author_id'),
'url': rtsp_url,
'format_id': 'rtsp',
})
- self._sort_formats(formats)
return {
'id': broadcast_id,
'format_id': 'rtsp',
})
- self._sort_formats(formats)
return formats
def _extract_folder(self, url, folder_id):
formats = self._extract_m3u8_formats(
self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return {
'id': id,
'title': video_json.get('title'),
fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
stream_title = self._extract_js_var(webpage, 'video_title', 'LRT')
return {
'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id,
'height': int_or_none(format_id),
})
- self._sort_formats(formats)
conviva = self._download_json(
'https://www.lynda.com/ajax/player/conviva', video_id,
} for format_id, video_url in prioritized_stream.items()])
self._check_formats(formats, video_id)
- self._sort_formats(formats)
subtitles = self.extract_subtitles(video_id)
'height': height,
'http_headers': headers,
})
- self._sort_formats(formats)
meta_data = video_data['meta']
title = remove_end(meta_data['title'], '.mp4')
subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
formats.extend(m3u8_formats + mpd_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
formats = self._extract_m3u8_formats(
video['VideoSource'], video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
subtitles = {}
for s in (video.get('Subtitles') or {}):
], webpage, 'format url')
formats = self._extract_wowza_formats(
format_url, page_id, m3u8_entry_protocol, ['smil'])
- self._sort_formats(formats)
return {
'id': page_id,
episode_json = self._download_json(_API_URL.format('showmodule', 'episodedetails', video_id), video_id)
details = episode_json.get('details', {})
formats = self._extract_m3u8_formats(details.get('videoM3u8Url'), video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'series': details.get('showTitle'),
details = json.get('details', {})
video_url = details.get('liveUrl')
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True)
- self._sort_formats(formats)
return {
'id': video_id,
'title': 'Manoto TV Live',
if 'transcoded' in f['format_id']:
f['preference'] = f.get('preference', -1) - 1
- self._sort_formats(formats)
-
def get_likes():
likes = self._search_regex(
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
'vcodec': 'none' if format_id.startswith('Audio') else None,
})
- self._sort_formats(formats)
-
return {
'id': episode,
'title': title,
f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en',
video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4')
- self._sort_formats(formats)
thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()]
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
formats.append(f)
- self._sort_formats(formats)
-
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
timestamp = parse_iso8601(
xpath_text(
'An unknown error occurred ({0}).'.format(error),
video_id=video_id)
- self._sort_formats(formats)
-
# Necessary because the id of the author is not known in advance.
# Won't raise an issue if no profile can be found as this is optional.
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
formats = self._extract_wowza_formats(
playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
- self._sort_formats(formats)
return {
'id': video_id,
'ext': ext,
'url': src,
})
- self._sort_formats(formats)
return {
'id': production_id,
if (first_e or geo_e) and not formats:
raise geo_e or first_e
- self._sort_formats(formats)
-
feed_data = self._download_json(
'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2/guid/-/' + guid,
guid, fatal=False)
})
formats.extend(stream_formats)
- self._sort_formats(formats)
-
# XXX: Presentation['Presenters']
# XXX: Presentation['Transcript']
audio_format.setdefault('acodec', 'aac')
formats.append(audio_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': asset.get('title'),
formats, subs = [{'url': source}], {}
if player_attrs.get('subs'):
self._merge_subtitles({'und': [{'url': player_attrs['subs']}]}, target=subs)
- self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
formats = self._extract_m3u8_formats(
stream_info['encUrl'], video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
artist_list = play_info.get('artistList')
artist = None
'url': video_url,
'ext': video_ext,
}]
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': rate_str,
'tbr': int(rate_str),
})
- self._sort_formats(formats)
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
webpage, 'description', flags=re.DOTALL)
'ext': fmt['format'],
})
- self._sort_formats(formats)
return {
'id': video_id,
},
'format_note': stream.get('name'),
})
- self._sort_formats(formats)
return {
'id': video_id,
'height': source.get('heightPixels'),
'width': source.get('widthPixels'),
})
- self._sort_formats(formats)
subtitles = {
lang: [{
playlist['playbackUrl'], video_id, ism_id='mss',
fatal=False, headers=headers))
formats = [merge_dicts(f, {'language': language}) for f in formats]
- self._sort_formats(formats)
return {
'id': video_id,
'acodec': acodec,
'vcodec': vcodec,
})
- self._sort_formats(formats)
subtitles = {}
for source in settings.findall('.//MarkerResourceSource'):
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
- self._sort_formats(formats)
-
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'ext': 'mp4'
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'height': int_or_none(source.get('size')),
'url': src,
})
- self._sort_formats(formats)
entity = video.get('entity') or entity
owner = entity.get('ownerObj') or {}
'height': int_or_none(fmt.get('height')),
**parse_codecs(fmt.get('codecs')),
})
- self._sort_formats(formats)
return {
'id': video_id,
hls_url, video_id,
ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls', live=is_live)
- self._sort_formats(formats)
return {
'id': video_id,
if not formats and cloudcast.get('isExclusive'):
self.raise_login_required(metadata_available=True)
- self._sort_formats(formats)
-
comments = []
for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
node = edge.get('node') or {}
'width': int(mobj.group(1)),
})
formats.append(f)
- self._sort_formats(formats)
thumbnails = []
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
formats.extend(f)
self._merge_subtitles(s, target=subtitles)
- self._sort_formats(formats)
return {
'id': video_id,
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
m3u8_url += '?' + token
formats = self._extract_wowza_formats(
m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp', 'f4m'])
- self._sort_formats(formats)
description = info.get('ment')
duration = parse_duration(info.get('time'))
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- self._sort_formats(formats)
-
return {
'id': video_id,
'display_id': json_data.get('slug') or video_slug,
'ext': 'mp4',
}]
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'),
'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
'quality': 1 if format_id == '1001' else None,
})
- self._sort_formats(formats)
subtitles = {}
for file_ in video.get('files', []):
}])
except (KeyError, TypeError):
raise ExtractorError('Invalid rendition field.')
- if formats:
- self._sort_formats(formats)
return formats
def _extract_subtitles(self, mdoc, mtvn_id):
if not formats:
return None
- self._sort_formats(formats)
-
return {
'title': title,
'formats': formats,
'format_id': format_id,
'preference': -100 if '.smil' in s['file'] else 0, # Strictly inferior than all other formats?
})
- self._sort_formats(formats)
return {
'id': video_id,
continue
formats.extend(
self._extract_f4m_formats(f4m_url + '&hdcore=3.0.3', video_id, f4m_id=stream_name))
- self._sort_formats(formats)
return {
'id': video_id,
else:
raise ExtractorError(
'Found song but don\'t know how to download it')
- self._sort_formats(formats)
return {
'id': video_id,
'title': self._og_search_title(webpage),
video.get('streamUrl'), video.get('hlsStreamUrl'),
video.get('mp4StreamUrl'), int_or_none(video.get('width')),
int_or_none(video.get('height')))
- self._sort_formats(formats)
return {
'id': video_id,
'title': video['title'],
formats = self._extract_m3u8_formats(
url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_id,
'height': self._QUALITY.get(f_url[-2:]),
'quality': int_or_none(f_url[-2:]),
} for f_url in video_data.get('smcUriList') or []]
- self._sort_formats(formats)
return {
'id': id,
'title': video_data.get('clipTitle'),
formats.extend(self._extract_m3u8_formats(
update_url_query(stream_url, query), video_id,
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
- self._sort_formats(formats)
replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
quality.get('url'), video_id, 'mp4',
m3u8_id=quality.get('qualityId'), live=True
))
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(cvp_info['formats'])
info = merge_dicts(info, cvp_info)
- self._sort_formats(formats)
info['formats'] = formats
return info
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
info = merge_dicts(info, cvp_info)
- self._sort_formats(formats)
else:
info.update(self._embed_url_result(team, video['videoId']))
'resourceId': base64.b64encode(resource.encode()).decode(),
}).encode())['tokenizedUrl']
formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'tbr': tbr,
'ext': 'mp4',
})
- self._sort_formats(formats)
subtitles = {}
closed_captioning = video_data.get('closedCaptioning')
# -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to
# download with ffmpeg without this option
f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']}
- self._sort_formats(formats)
return {
'id': pid,
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', headers=headers, m3u8_id='hls',
fatal=live, live=live, errnote='No HLS formats found'))
- self._sort_formats(formats)
return {
'id': str_or_none(video_id),
ff['vcodec'] = 'none'
ff['ext'] = ext or 'mp3'
formats.append(ff)
- self._sort_formats(formats)
config = playlist['config']
song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info)
- self._sort_formats(formats)
lyrics_info = self.query_api(
'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
{'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
for brs, mv_url in info['brs'].items()
]
- self._sort_formats(formats)
return {
'id': mv_id,
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
formats = self.extract_formats(info['mainSong'])
- self._sort_formats(formats)
return {
'id': info['mainSong']['id'],
'ext': 'mp4',
'url': tpl.replace('{}', film_fn) + suffix[key],
} for key, tpl in templates.items()]
- self._sort_formats(formats)
return {
'id': video_id,
if video_type_description == 'Audio File':
formats[0]['vcodec'] = 'none'
self._check_formats(formats, media_id)
- self._sort_formats(formats)
return {
'id': media_id,
if not entries:
raise ExtractorError('No HTML5 media elements found')
info = entries[0]
- self._sort_formats(info['formats'])
title = self._html_search_meta('og:title', webpage, fatal=False)
description = self._html_search_meta(
formats.append(f)
self._check_formats(formats, video_guid)
- self._sort_formats(formats)
return {
'id': video_guid,
fmts, subs = self._extract_m3u8_formats_and_subtitles(data_json['stream'], display_id)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
- self._sort_formats(formats)
return merge_dicts(ld_json, {
'id': data_json['id'],
'display_id': display_id,
else:
self.raise_no_formats(f'{cdn} formats are currently not supported', video_id)
- self._sort_formats(formats)
-
subtitles = {}
for sub in video.get('captiondata') or []:
if sub.get('data'):
player, 'source', default=None, fatal=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(source, video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
video_id).get('video_url')
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive)
- self._sort_formats(formats, ['res', 'tbr'])
return {
'id': video_id,
'uploader_url': uploaderPage,
'location': location,
'upload_date': upload_date,
- 'is_live': isLive
+ 'is_live': isLive,
+ '_format_sort_fields': ('res', 'tbr'),
}
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
- self._sort_formats(info['formats'])
else:
info['url'] = item_url
if item.get('audio') is True:
m3u8_id='hls', fatal=False)
for f in info['formats']:
f['language'] = lang
- self._sort_formats(info['formats'])
else:
info.update({
'_type': 'url_transparent',
formats = self._extract_m3u8_formats(
f'https://nhks-vh.akamaihd.net/i/das/{video_id[0:8]}/{video_id}_V_000.f4v/master.m3u8',
video_id, ext='mp4', m3u8_id='hls')
- self._sort_formats(formats)
duration = parse_duration(base_values.get('r_duration'))
'height': height,
'tbr': int_or_none(self._search_regex(r'_(\d+)[kK]', playback_url, 'bitrate', default=None)),
})
- self._sort_formats(formats)
thumbnails = []
cuts = video_data.get('image', {}).get('cuts') or []
if fmt:
formats.append(fmt)
- self._sort_formats(formats)
-
# Start extracting information
tags = None
if webpage:
formats.extend(self._extract_mpd_formats(
manifest_base_url + 'mpd', content_id,
mpd_id='dash', fatal=False))
- self._sort_formats(formats)
thumbnails = []
for image in (content.get('Images') or []):
'format_id': image_id,
})
formats.append(common)
- self._sort_formats(formats)
section = traverse_obj(post, ('postSection', 'name'))
formats += self._extract_m3u8_formats(
player_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, quality=int(kind == 'high'))
- self._sort_formats(formats)
-
tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
formats.append({
'url': source_src,
})
- self._sort_formats(formats)
return {
'id': video_id,
'ext': source.get('type'),
} for source in playlist_info.get('sources')]
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
if not formats and has_drm:
self.report_drm(video_id)
- self._sort_formats(formats)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
formats = [{
'url': video_url,
}]
- self._sort_formats(formats)
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
thumbnail = config.get('poster')
'Authorization': f'Bearer {self._access_token}'
})[0]['links']['play']['href']
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
if not self.get_param('allow_unplayable_formats') and drm:
self.report_drm(video_id)
- self._sort_formats(formats)
-
info = {
'id': video_id,
'title': video_id,
'quality': stream.get('kwaliteit'),
})
- self._sort_formats(formats)
-
subtitles = {}
if metadata.get('tt888') == 'ja':
subtitles['nl'] = [{
m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
-
entries.append({
'id': media_id,
'title': media.get('title', {}).get('$text') or playlist_title,
'format_id': asset_format,
'vcodec': 'none',
})
- self._sort_formats(formats)
data = call_playback_api('metadata')
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
quality=1, m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
'url': file_,
'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
})
- self._sort_formats(formats)
return {
'id': xpath_text(video, './id'),
} for quality, source in video_data.get('files').items() if source]
self._check_formats(formats, video_id)
- self._sort_formats(formats)
duration = parse_duration(traverse_obj(video_data, 'duration', 'duration_format'))
thumbnails = [
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext,
})
- self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
if payment_info:
self.raise_no_formats('This video is paid, subscribe to download it', expected=True)
- self._sort_formats(formats)
-
info['formats'] = formats
return info
m3u8_url = self._download_json(
f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': uuid,
'vcodec': 'none',
'acodec': 'wav'
})
- self._sort_formats(formats)
return {
'id': event_id,
data_json = self._search_json_ld(webpage, id)
m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id)
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title'),
'vbr': float_or_none(f.get('video_bitrate')),
})
formats.append(http_f)
- self._sort_formats(formats)
meta = video.get('meta', {})
if not formats and not auth_data.get('authorized'):
self.raise_no_formats('%s said: %s' % (
self.IE_NAME, auth_data['message']), expected=True)
- self._sort_formats(formats)
subtitles = {}
for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
})
formats.append(track_obj)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
formats = list(self._expand_media(video_id, new_media))
is_live = False
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': get_first(movie_stores, 'title'),
formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': q,
'quality': preference(q),
})
- self._sort_formats(formats)
else:
return self.url_result(self._search_regex(
r'"youtube_id"\s*:\s*"([^"]+)', webpage, 'youtube id'), 'Youtube')
HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking',
errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats')
- self._sort_formats(formats)
-
subtitles = {}
for sub in sd.get('subtitles', []):
sub_src = sub.get('src')
format_url, video_id, 'mp4', m3u8_id=format_id))
else:
continue
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
description = self._og_search_description(webpage)
format_url, video_id, 'mp4', m3u8_id=format_id))
else:
continue
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
if idx >= 1:
'url': format_url,
'height': int(height),
})
- self._sort_formats(formats)
return {
'id': video_id,
subtitles = self._merge_subtitles(
podcast_subtitles, streams_subtitles, self.extract_subtitles(base_url, video_id, delivery))
- self._sort_formats(formats)
self.mark_watched(base_url, video_id, delivery_info)
return {
elif stream.get('streamType') != 'VOD':
self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType')))
formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
media_info = self._download_webpage(
self._MEDIA_INFO_URL % video_id, video_id, note='Downloading media info', fatal=False)
}
elif name == 'video':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
- self._sort_formats(formats)
return {
**info,
'formats': formats,
for f in formats:
if (f.get('format_note') or '').endswith(' AD'): # Audio description
f['language_preference'] = -10
- self._sort_formats(formats)
rating_str = info.get('rating')
if rating_str is not None:
'format_id': k,
'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v
} for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v]
- self._sort_formats(formats)
title = self._search_regex(
(r'<h1[^>]+\bclass=(["\'])video-tt\1[^>]*>(?P<value>[^<]+)',
} for name, url in srcs.items() if len(name) > 8 and name.startswith('data-src')]
if not formats:
formats = [{'url': url} for url in srcs.values()]
- self._sort_formats(formats)
info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
info.update({
else:
f['fps'] = int_or_none(file_.get('fps'))
formats.append(f)
- self._sort_formats(formats)
description = video.get('description')
if description and len(description) >= 250:
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '),
'title': segment.get('name')
} for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
- self._sort_formats(formats)
return {
'id': video_id,
'title': ride_data.get('title'),
'vbr': int_or_none(c.get('videoRate'), 1000),
'abr': int_or_none(c.get('audioRate'), 1000),
})
- self._sort_formats(formats)
return {
'id': video_id,
}
self._add_width_and_height(rtmp_format)
formats.append(rtmp_format)
- self._sort_formats(formats)
info['formats'] = formats
return info
m3u8_id='hls', fatal=False))
if not formats and not self.get_param('ignore_no_formats'):
return
- self._sort_formats(formats)
return {
'title': title,
'formats': formats,
formats.append({
'url': source_url,
})
- self._sort_formats(formats)
mature = metadata.get('adult')
if mature is None:
formats = self._extract_m3u8_formats(
vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
transform_source=transform_source, fatal=False))
- self._sort_formats(formats, ('tbr', )) # Incomplete resolution information
-
subtitles = {}
for caption in video_data.get('captions', []):
caption_url = caption.get('url')
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
'subtitles': subtitles,
+ '_format_sort_fields': ('tbr', ), # Incomplete resolution information
}
'format_id': format_id,
'height': height,
})
- self._sort_formats(formats)
title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike')
description = self._html_search_regex(
'height': int_or_none(format_dict.get('height')),
'duration': duration,
})
- self._sort_formats(formats)
description = data.get('description') or data.get('description_html') or data.get('seo_description')
timestamp = unified_timestamp(data.get('created_at'))
formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
if error:
fail(error)
- self._sort_formats(formats)
-
webpage = self._download_webpage(
'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
video_id)
asset_title = id.replace('-', ' ')
asset_id = f'{asset["sk"]}_{id}'.replace('#', '-')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id)
- self._sort_formats(formats)
entries.append({
'id': asset_id,
'title': asset_title,
format_url, lecture_id, mpd_id=format_id,
note='Downloading %s MPD manifest' % server_id,
fatal=False))
- self._sort_formats(formats)
content = str_or_none(desc.get('content'))
description = (clean_html(compat_b64decode(content).decode('utf-8'))
'width': int_or_none(file_info.get('width')),
'height': int_or_none(file_info.get('height')),
})
- self._sort_formats(formats)
thumbnails = []
for thumb in media.get('thumbs', []):
'format_id': 'http-' + format_id,
'height': int_or_none(height),
})
- self._sort_formats(formats)
info.update({
'id': video_id,
'quality': quality(fmt.get('quality')),
'preference': preference,
})
- self._sort_formats(formats)
title = item['title']
is_live = item['type'] == 'stream'
'height': height,
'url': val,
})
- self._sort_formats(formats)
# Extract title - should be in the flashvars; if not, look elsewhere
if video_title is None:
for a_format in formats:
if not dict_get(a_format, ['tbr', 'width', 'height']):
a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0
- self._sort_formats(formats)
return {
'id': video_id,
})
formats.append(clip_f)
- self._sort_formats(formats)
-
duration = int_or_none(
clip.get('duration')) or parse_duration(clip.get('formattedDuration'))
subtitles = self._merge_subtitles(subtitles, subs)
formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles)
- self._sort_formats(formats)
info = {
'id': video_id,
formats = list(self._extract_formats(
try_get(media, lambda x: x['playback']['mediaSources']), video_id))
- self._sort_formats(formats)
return {
'id': video_id,
'url': stream_url,
})
- self._sort_formats(formats)
-
return {
'id': compat_str(channel['id']),
'formats': formats,
thumbnail = None
duration = None
- self._sort_formats(formats)
-
view_count = str_to_int(self._search_regex(
(r'Views:\s*</span>\s*<span>\s*([\d,.]+)',
r'class=["\']views["\'][^>]*><p>([\d,.]+)'), webpage,
r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&')
formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
- self._sort_formats(formats)
return {
'age_limit': 18,
})
if formats:
info['formats'] = formats
- self._sort_formats(info['formats'])
description = self._html_search_regex(
(r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
continue
add_format(video_url)
- # field_preference is unnecessary here, but kept for code-similarity with youtube-dl
- self._sort_formats(
- formats, field_preference=('height', 'width', 'fps', 'format_id'))
-
model_profile = self._search_json(
r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
'height': item.get('height'),
'bitrate': item.get('bitrate'),
})
- self._sort_formats(formats)
webpage = self._download_webpage(url, video_id)
raise ExtractorError('No video on the provided url.', expected=True)
playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId'))
formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'title': main_data['title'],
'tbr': tbr,
'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
})
- self._sort_formats(formats)
return {
'duration': float_or_none(video.get('duration')),
format_id += '-%sp' % quality
f['format_id'] = format_id
formats.append(f)
- self._sort_formats(formats)
creator = try_get(
show, lambda x: x['producer']['name'], compat_str)
'abr': details.get('abr'),
})
self._check_formats(formats, mid)
- self._sort_formats(formats)
actual_lrc_lyrics = ''.join(
line + '\n' for line in re.findall(
f_copy['protocol'] = 'http'
f = f_copy
formats.append(f)
- self._sort_formats(formats)
description = video.get('description')
thumbnail = video.get('thumb')
sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]}
formats.extend(subformats)
- self._sort_formats(formats)
return formats
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
- self._sort_formats(formats)
subtitles = {}
closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
'abr': stream['bitRate'],
'asr': stream['sampleRate']
} for stream in broadcast['streamUrls']]
- self._sort_formats(formats)
return {
'id': radio_id,
for i, fm in
enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str))
]
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': format_id,
})
formats.append(f)
- self._sort_formats(formats)
title = self._og_search_title(webpage)
thumbnail = self._og_search_thumbnail(webpage)
raise ExtractorError('Unable to extract video info, make sure the URL is valid')
formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
- self._sort_formats(formats)
data = video_info.get('structured_data', {})
video = media['video']
relinker_info = self._extract_relinker_info(video['content_url'], video_id)
- self._sort_formats(relinker_info['formats'])
thumbnails = []
for _, value in media.get('images', {}).items():
else:
raise ExtractorError('not a media file')
- self._sort_formats(relinker_info['formats'])
-
thumbnails = []
for image_type in ('image', 'image_medium', 'image_300'):
thumbnail_url = media.get(image_type)
relinker_info = self._extract_relinker_info(
urljoin(url, relinker_url), video_id)
- self._sort_formats(relinker_info['formats'])
title = self._search_regex(
r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1',
relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id)
- self._sort_formats(relinker_info['formats'])
-
return {
'id': video_id,
'title': track_info.get('title') or self._og_search_title(webpage),
'format_id': 'http-mp4',
'url': urls['mp4']
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
if 'akamaized' in f['url'] or 'cloudfront' in f['url']:
f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs
- self._sort_formats(formats)
-
return {
'id': video_meta.get('product_id') or video_json.get('product_id'),
'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')),
video_id = self._match_id(url)
formats, subtitles = self._get_formats_and_subtitles(video_id)
- self._sort_formats(formats)
video_info = self._download_json(
f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
- self._sort_formats(formats, ['res', 'proto'])
-
return {
'id': video_id,
'formats': formats,
'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
'timestamp': traverse_obj(
video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
+ '_format_sort_fields': ('res', 'proto'),
}
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
- self._sort_formats(formats, ['res', 'proto'])
return {
'id': media_id,
'formats': formats,
'series': data.get('programLabel'),
'subtitles': subtitles,
'is_live': is_live,
+ '_format_sort_fields': ('res', 'proto'),
}
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token),
video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
for resource in video.get('resources', []):
if resource.startswith('closed_caption_'):
hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False))
formats.extend(self._extract_mpd_formats(
dash_playlist_url, display_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
return {
**info,
'height': height,
'quality': quality(format_id),
})
- self._sort_formats(formats)
return {
'id': video_id,
video_url = self._html_search_regex(
r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
formats.append({'url': video_url, 'ext': 'mp4'})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
formats.append({
'url': src,
})
- self._sort_formats(formats)
return {
'id': video_id,
'title': title,
formats = self._extract_smil_formats(
'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id,
video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'ext': ext,
'container': container if method != 'mobile' else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
'ext': 'flv',
})
formats.append(fmt)
- self._sort_formats(formats)
thumbnails = []
for content_asset in content_data.findall('.//contentAssets'):
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
self.raise_no_formats(
f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
video_id=video_id, expected=True)
- self._sort_formats(formats)
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
episode = self._download_json(
api_episode_url, display_id,
formats.extend(self._extract_f4m_formats(
hds_url, item_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
-
info_dict['formats'] = formats
return info_dict
if m3u8_url:
formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
- self._sort_formats(formats)
-
return {
'id': display_id,
'title': title,
raise ExtractorError('video not found', expected=True)
formats = self._extract_m3u8_formats(stream_url.decode(), video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
video_data = self._download_json(
self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
thumbnails = []
webpage = self._download_webpage(url, video_id)
formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
})
self._check_formats(formats, media_id)
- self._sort_formats(formats)
duration = info.get('duration') or info.get('cutout') or info.get('cutduration')
if isinstance(duration, compat_str):
'quality': q(quality),
'url': video_url,
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
'quality': q(quality),
'url': audio_url,
})
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
formats.extend(self._extract_f4m_formats(
http_base_url + '/manifest.f4m',
video_id, f4m_id='hds', fatal=False))
- self._sort_formats(formats)
return {
'id': video_id,
formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}]
else:
formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id)
- self._sort_formats(formats)
return {
'id': video_id,
if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error':
raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True)
- self._sort_formats(formats)
return {
'id': v_id,
'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))),
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
'height': 'h',
}, default={})
})
- self._sort_formats(formats)
subtitles = {
lang: [{
'url': format_url,
'format_id': format_id,
})
- self._sort_formats(formats)
return formats
def _download_and_extract_formats(self, video_id, query=None):
})
formats.append(fmt)
- self._sort_formats(formats, ('source', ))
-
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
+ '_format_sort_fields': ('source', ),
}
if ns_st_cds != 'free':
raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
- self._sort_formats(formats)
-
themes = pv('themes')
return {
'height': 720,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'format': 'mp4',
})
- self._sort_formats(formats)
return {
'id': video_id,
'title': traverse_obj(
if not formats:
self.raise_no_formats('There is no video.', expected=True, video_id=video_id)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_data.get('title'),
entry['format_id'] += mobj.group('tag')
formats.append(entry)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
formats = self._extract_m3u8_formats(
f'{stream_domain}/i/{filename}_1@{stream_num}/master.m3u8',
display_id, ext='mp4')
- self._sort_formats(formats)
title = self._html_search_regex(
(*self._og_regexes('title'), r'(?s)<title>([^<]*?)</title>'), webpage, 'video title')
'format_id': '%s-%d' % (determine_protocol(f), tbr),
'tbr': tbr,
})
- # 'tbr' was explicitly set to be preferred over 'height' originally,
- # So this is being kept unless someone can confirm this is unnecessary
- self._sort_formats(info_dict['formats'], ('tbr', 'res'))
thumbnails = []
if video.get('thumbnailUrl'):
'thumbnails': thumbnails,
'duration': float_or_none(video.get('SM_length')),
'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '),
+ # 'tbr' was explicitly set to be preferred over 'height' originally,
+ # So this is being kept unless someone can confirm this is unnecessary
+ '_format_sort_fields': ('tbr', 'res')
})
entries.append(info_dict)
'width': int_or_none(resource.get('width')),
'height': int_or_none(resource.get('height')),
})
- self._sort_formats(formats)
attrs = {}
for attribute in video['attributes']:
r'^(\d+)[pP]', source.get('label', ''), 'height',
default=None)),
} for source in sources if source.get('file')]
- self._sort_formats(formats)
title = self._html_search_regex(
r'<title>([^<]+)\s*-\s*Sexu\.Com</title>', webpage, 'title')
urljoin(sdn_url, hls_rel_url), video_id, ext='mp4',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
return formats
def _real_extract(self, url):
# https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
r'aws\.manifestfilter=[\w:;,-]+&?',
'', playout['url']), video_id, 'mp4')
- self._sort_formats(formats)
# video = self._call_api(
# 'product/id', video_id, {
iv = [0] * 16
m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii')
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
- self._sort_formats(formats)
release_date = self._html_search_regex(
(r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'),
'format_note': stream.get('label'),
'quality': int_or_none(stream.get('quality', 100)),
})
- self._sort_formats(formats)
return {
'id': compat_str(room.get('live_id') or broadcaster_id),
'quality': preference(quality_id),
'ext': 'mp4',
})
- self._sort_formats(formats)
return {
'id': video_id,
'quality': quality_key(quality),
'ext': ext,
})
- self._sort_formats(formats)
def get(getter):
for src in (data, clip_data):
self.raise_geo_restricted(countries=['IT'])
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
mpd_id='dash', fatal=False))
- self._sort_formats(formats)
info.update({
'id': service_id,
'formats': formats,
'height': int_or_none(data.get('height')),
'fps': int_or_none(data.get('fps')),
})
- self._sort_formats(formats)
playlist.append({
'id': '%s_part%d' % (video_id, i + 1),
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
for f in formats:
f.setdefault('http_headers', {}).update(headers)
- self._sort_formats(formats)
metadata = self._call_api(
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
if not formats and info.get('policy') == 'BLOCK':
self.raise_geo_restricted(metadata_available=True)
- self._sort_formats(formats)
user = info.get('user') or {}
thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
- self._sort_formats(m3u8_formats)
if not m3u8_formats:
duration = None
format_url = format_url[0]
extract_format(format_id, format_url)
- self._sort_formats(formats)
-
info = self._search_json_ld(webpage, video_id, default={})
title = self._html_search_regex(
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
view_count = str_to_int(video.get('viewed'))
'width': int(fmt.get('width')),
'height': int(fmt.get('height')),
} for fmt in metadata.findall('./PlaybackLinks/FileURL')]
- self._sort_formats(formats)
return {
'id': video_id,
formats.append({
'url': src,
})
- self._sort_formats(formats)
player = self._parse_json(
self._search_regex(
})
formats.append(m3u8_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'url': podcast_url,
'quality': q(quality),
})
- self._sort_formats(formats)
if media_type == 'video':
for sub in (media_data.get('subtitleList') or []):
hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
- self._sort_formats(formats)
captions = self._html_search_regex(
r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
'format_id': ext + quality,
'url': video_url,
})
- self._sort_formats(formats)
entry['formats'] = formats
entries.append(entry)
embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
'https://steamcommunity.com/actions/ajaxresolveusers',
video_id, query={'steamids': video_id})[0]
- self._sort_formats(formats)
return {
'id': video_id,
'title': self._generic_title('', webpage),
'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'),
'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'),
})
- self._sort_formats(formats)
return {
'id': video_id,
formats = self._extract_m3u8_formats(
f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}',
video_id, ext='mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
})
formats = list(self._extract_formats(spl_url, video))
- self._sort_formats(formats)
return {
'id': video_id,
if not formats:
self.raise_no_formats('No active streams found', expected=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': video_id,
else:
self.raise_no_formats(f'Page type "{post_type}" is not supported')
- self._sort_formats(formats)
return {
'id': str(webpage_info['post']['id']),
'formats': formats,
'format_id': video_ext,
'quality': quality(video_ext),
})
- self._sort_formats(formats)
return {
'id': video_id,
'vcodec': 'none',
'url': audio_url,
})
- self._sort_formats(formats)
return {
'id': audio_id,
self.raise_geo_restricted(
'This video is only available in Sweden',
countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
if isinstance(subtitle_references, list):
'vcodec': codec if media_type == 'Video' else 'none',
'acodec': codec if media_type == 'Audio' else None,
})
- self._sort_formats(formats)
upload_date = None
entry_pdatet = attr.get('entry_pdatet')
timestamp = video_info.get('timestamp')
title = title or video_info.get('description')
- self._sort_formats(formats)
-
return {
'id': display_id,
'title': title,
'format_id': label,
'quality': quality(label),
})
- self._sort_formats(formats)
return {
'id': video_id,
} for media_url in set(media_urls)
]
- self._sort_formats(formats)
-
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'thumbnail', webpage)
'format_id': format_id,
'quality': get_quality(format_id),
})
- self._sort_formats(formats)
info['formats'] = formats
return info
ext_url = external.get('code') if service.lower() == 'youtube' else None
return self.url_result(ext_url or external['uri'])
- self._sort_formats(formats)
-
thumbnail = playerData.get('thumb') or self._og_search_property('image', webpage)
if thumbnail:
# trim thumbnail resize parameters
'ext': ext,
})
urls.append(format_url)
- self._sort_formats(formats)
return {
'id': display_id,
rtmp_url = re.sub(r'^rmtp', 'rtmp', rtmp_url)
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
- self._sort_formats(formats)
is_live = 'stream/live' in rtmp_url
}).encode(), headers=headers)['tokens']['1']['cdn']
formats = self._extract_m3u8_formats(
stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': 'http' + ('-%s' % label if label else ''),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'url': video_url,
'ext': 'mp4',
}]
- self._sort_formats(formats)
videos.append({
'id': url_basename(webpage_url),
'webpage_url': update_url_query(webpage_url, {'single': True}),
'preference': -10,
})
formats.append(fmt)
- self._sort_formats(formats)
title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
description = self._html_search_regex(
redirect_url + '?format=redirect&manifest=m3u&format=redirect&Tracking=true&Embedded=true&formats=MPEG4'),
video_id, 'Processing m3u8').geturl()
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
date = unified_timestamp(try_get(
metadata, lambda x: x['props']['initialState']['video']['associatedPlaylists'][0]['videos'][0]['datePublished'].split(' ', 1)[1]))
return {
formats.extend(fmts)
self._merge_subtitles(subs, native_subtitles, target=subtitles)
- self._sort_formats(formats)
return formats, subtitles
def _get_clean_title(self, title):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': self._generic_title('', webpage),
if '10play-not-in-oz' in m3u8_url:
self.raise_geo_restricted(countries=['AU'])
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
- self._sort_formats(formats)
return {
'formats': formats,
r'(<div[^>]*\bdata-controller="player"[^>]*>)', webpage, 'video player'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
player_attrs['data-player-source-value'], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': video_id,
smil_url = self._sign_url(smil_url, sig['key'], sig['secret'])
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
- self._sort_formats(formats)
ret = self._extract_theplatform_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
formats.extend(cur_formats)
subtitles = self._merge_subtitles(subtitles, cur_subtitles)
- self._sort_formats(formats)
-
thumbnails = [{
'url': thumbnail['plfile$url'],
'width': int_or_none(thumbnail.get('plfile$width')),
if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url'])
formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
return {
'id': video_id,
'url': variant_url,
'format_id': variant_id,
})
- self._sort_formats(formats)
cc_url = video_data.get('cc_url')
'vcodec': 'none' if height == 0 else None,
'width': int(height * aspect) if height and aspect else None,
})
- # It seems like this would be correctly handled by default
- # However, unless someone can confirm this, the old
- # behaviour is being kept as-is
- self._sort_formats(formats, ('res', 'source_preference'))
for subtitle in (config.get('subtitles') or []):
src = subtitle.get('src')
'is_live': live,
'formats': formats,
'subtitles': subtitles,
+ # It seems like this would be correctly handled by default
+ # However, unless someone can confirm this, the old
+ # behaviour is being kept as-is
+ '_format_sort_fields': ('res', 'source_preference')
}
'quality': 11,
'format_note': 'Original file',
})
- self._sort_formats(formats)
return {
'id': id,
'title': data_json.get('title') or data_json.get('root_title'),
if auth_cookie:
for f in formats:
self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
- self._sort_formats(formats, ('quality', 'codec', 'size', 'br'))
thumbnails = []
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
'availability': self._availability(
is_private='Private' in labels,
needs_subscription='Friends only' in labels,
- is_unlisted='Followers only' in labels)
+ is_unlisted='Followers only' in labels),
+ '_format_sort_fields': ('quality', 'codec', 'size', 'br'),
}
def _parse_aweme_video_web(self, aweme_detail, webpage_url):
'height': height,
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
thumbnails = []
for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
def extract_field(pattern, name):
return self._html_search_regex(pattern, webpage, name, default=None) if pattern else None
- self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
and meta.get('Key') == 'Encryption' and meta.get('Value') == '1'):
self.report_drm(video_id)
# Most likely because geo-blocked if no formats and no DRM
- self._sort_formats(formats)
thumbnails = []
for picture in info.get('Pictures', []):
description = remove_end(description, 'Category')
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
formats.extend(self._extract_m3u8_formats(
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
comment_count = int_or_none(video_info.get('comment_count'))
'tbr': stream_info.get('bitrate'),
'http_headers': self._HEADERS,
})
- self._sort_formats(formats)
info = {
'id': program_id,
'url': play_url,
'http_headers': self._HEADERS,
})
- self._sort_formats(formats)
category = vod_info.get('categoryName')
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
id = episode_info.get('id')
formats = list(self._extract_formats(
traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id))
- self._sort_formats(formats)
title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle')
series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle'))
elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed
raise ExtractorError('This content is currently unavailable', expected=True)
- self._sort_formats(formats)
-
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
'height': int_or_none(
media_json.get('height') or self._og_search_property('video:height', webpage, default=None)),
}]
- self._sort_formats(formats)
# the url we're extracting from might be an original post or it might be a reblog.
# if it's a reblog, og:description will be the reblogger's comment, not the uploader's.
'source_preference': reliability,
'format_note': format_note,
})
- self._sort_formats(formats)
return {
'id': content_id,
formats = self._parse_jwplayer_formats(
details['player']['sources'], video_id)
- self._sort_formats(formats)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'url': child.text,
'quality': get_quality(quality),
})
- self._sort_formats(formats)
return {
'id': video_id,
else:
f['tbr'] = int(mobj.group(1))
formats.append(f)
- self._sort_formats(formats)
for source in video_data.findall('closedCaptions/source'):
for track in source.findall('track'):
'start_time': start_time,
'end_time': start_time + chapter_duration,
})
- self._sort_formats(formats)
return {
'formats': formats,
})
if not formats and data.get('drmProtected'):
self.report_drm(video_id)
- self._sort_formats(formats)
thumbnails = [{
'id': type,
})
if not formats and data.get('drmProtected'):
self.report_drm(video_id)
- self._sort_formats(formats)
thumbnails = [{
'id': thumbnail.get('@type'),
self._search_json(
r'var\s*vPlayConfig\s*=\s*', webpage, 'thumbnail',
video_id, default=None, transform_source=js_to_json), 'poster')
- self._sort_formats(formats)
return {
'id': video_id,
'formats': formats,
formats.append({
'url': src,
})
- self._sort_formats(formats)
return {
'id': video_id,
video_json = self._download_json(video_json_url, video_id)
m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls')))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
if not formats and info.get('is_geo_restricted'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'url': v_url,
'format_id': video_format,
})
- self._sort_formats(formats)
metadata = self._parse_json(
vpl_data['data-metadata'], display_id)
'height': int_or_none(info.get('height')),
'tbr': int_or_none(info.get('bitrate')),
})
- self._sort_formats(formats)
return {
'id': video_id,
'height': int_or_none(height),
'filesize': filesize,
})
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': format_id,
'height': int_or_none(format_id.rstrip('p')),
})
- self._sort_formats(formats)
description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
stream_urls.add(stream_url)
formats.extend(self._extract_m3u8_formats(
stream_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
# better support for radio streams
if title.startswith('VOV'):
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
description = info.get('articleLong') or info.get('articleShort')
timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
if not info.get('free', True):
raise ExtractorError(
'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
description = source.get('description')
thumbnail = url_or_none(source.get('poster'))
continue
formats.extend(formats_)
self._merge_subtitles(subs_, target=subs)
- self._sort_formats(formats)
return formats, subs
def _real_extract(self, url):
'height': int_or_none(traverse_obj(file, ('quality', 'height'))),
})
- self._sort_formats(formats)
-
title = dict_get(info, ('subtitle', 'title', 'seoTitle'))
description = dict_get(info, ('description', 'seoDescription'))
thumbnails = []
'This content might not be available in your country due to copyright reasons',
metadata_available=True)
- self._sort_formats(formats)
-
# TODO: webvtt in m3u8
subtitles = {}
sami_path = video.get('sami_path')
raise
formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4')
- self._sort_formats(formats)
episode = program.get('episode') or {}
return {
'id': guid,
urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
thumbnails = set(traverse_obj(
data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
raise
formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4')
- self._sort_formats(formats)
return {
'id': resource_id,
'height': height,
'ext': ext,
})
- self._sort_formats(formats)
return {
'id': video_id,
'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
'quality': quality,
} for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
- self._sort_formats(formats)
description = video.get('lead')
thumbnail = video.get('thumbnail')
'protocol': 'websocket_frag',
})
- self._sort_formats(formats, ('source',))
-
infodict = {
- 'formats': formats
+ 'formats': formats,
+ '_format_sort_fields': ('source', ),
}
elif len(m3u8_urls) == 1:
formats = self._extract_m3u8_formats(
m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS)
- self._sort_formats(formats)
infodict = {
# No problem here since there's only one manifest
'formats': formats,
'quality': 10,
'format_note': 'Source',
})
- self._sort_formats(formats)
def _download_base_gql(self, video_id, ops, note, fatal=True):
headers = {
'height': int_or_none(option.get('quality')),
'fps': int_or_none(option.get('frameRate')),
})
- self._sort_formats(formats)
thumbnails = []
for thumbnail_id in ('tiny', 'small', 'medium'):
fmts, subs = self._extract_variant_formats(variant, twid)
subtitles = self._merge_subtitles(subtitles, subs)
formats.extend(fmts)
- self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown
thumbnails = []
media_url = media.get('media_url_https') or media.get('media_url')
'subtitles': subtitles,
'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000),
+ # The codec of http formats are unknown
+ '_format_sort_fields': ('res', 'br', 'size', 'proto'),
}
def extract_from_card_info(card):
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
- self._sort_formats(formats)
thumbnails = []
for suffix in ('_small', '', '_large', '_x_large', '_original'):
if f.get('url'):
formats.append(f)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
})
formats.append(a_format)
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
if not formats:
for format_id in (867, 836, 940):
add_m3u8_format(format_id)
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': format_id,
'quality': quality(format_id)
})
- self._sort_formats(formats)
title = self._html_search_regex(
r'<title>UTV - (.*?)</', webpage, 'title')
'url': f_url,
'quality': quality(format_id),
})
- self._sort_formats(formats)
tags = []
for tag in video_data.get('tags', []):
if session_id:
for f in formats:
f['extra_param_to_segment_url'] = 'pbs=' + session_id
- self._sort_formats(formats)
asset = self._download_json('http://content.uplynk.com/player/assetinfo/%s.json' % path, display_id)
if asset.get('error') == 1:
raise ExtractorError('% said: %s' % (self.IE_NAME, asset['msg']), expected=True)
'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'],
'quality': 3 if f['FileType'] == 'mp3' else 2,
} for f in s['Files']]
- self._sort_formats(formats)
e = {
'id': '%d-%s' % (s['BandId'], s['$id']),
'title': s['Title'],
formats.extend(self._extract_wowza_formats(
'http://%s/%splaylist.m3u8' % (host, file_http),
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
- self._sort_formats(formats)
subtitles = {}
formats.extend(self._parse_segmented_mp4(dash_streams))
'''
- self._sort_formats(formats)
-
description = video.get('description')
timestamp = int_or_none(video.get('created_at'))
duration = float_or_none(video.get('length'))
} for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')]
formats = extract('video')
- self._sort_formats(formats)
webpage = self._download_webpage(url, display_id)
'width': int_or_none(quality.get('width')),
'height': height,
})
- self._sort_formats(formats)
thumbnails = []
for image in video_data.get('images', []):
'format_id': format_key.split('_')[1],
'height': int(format_key.split('_')[1][:-1]),
} for format_key, format_url in videos_json.items() if url_or_none(format_url)]
- self._sort_formats(formats)
thumbnail = url_or_none(dict_get(json_data, ('cover_image_url', 'preview_image_url')))
return {
'id': video_id,
'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': str_or_none(metadata.get('title')),
'quality': q(f_id),
'url': f_url,
})
- self._sort_formats(formats)
categories = metadata.get('categoryPath')
if not categories:
'width': int(m.group('width')),
'height': int(m.group('height')),
})
- self._sort_formats(formats)
track = video_info['title']
if featured_artist:
raise self.raise_geo_restricted(
countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': data['title'],
video_data = preplay['video']
formats = self._extract_m3u8_formats(
preplay['playURL'], video_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
episode = video_data.get('episode') or {}
channel = video_data.get('channel') or {}
season = video_data.get('season') or {}
f['format_id'] = format_id + '-html5'
f['source_preference'] = 0
formats.append(f)
- self._sort_formats(formats)
categories = [
t.get('text') for t in data.get('tags', []) if 'text' in t]
'height': int_or_none(source.get('height')),
})
formats.append(f)
- self._sort_formats(formats)
thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
raise
formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
- self._sort_formats(formats)
return {
'id': video_id,
if error in ('Данное видео недоступно для просмотра на территории этой страны', 'Данное видео доступно для просмотра только на территории России'):
self.raise_geo_restricted(countries=['RU'], metadata_available=True)
self.raise_no_formats(error, expected=True)
- self._sort_formats(formats)
return {
'id': video_id,
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
})
- self._sort_formats(formats)
return {
'id': video_id,
formats, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, display_id, 'mp4', 'm3u8_native')
- self._sort_formats(formats)
-
get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
channel = get_first('channel')
user = get_first('user')
if stream_meta.get('stream_url'):
formats.extend(self._extract_m3u8_formats(
stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
- self._sort_formats(formats)
return {
'id': video_id,
'format_id': f'{height}p',
'height': height,
})
- self._sort_formats(formats)
title = self._search_regex(
(r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
'url': sub_url,
})
- self._sort_formats(formats)
return {
'id': film_id,
'title': title,
smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id)
smil = self._download_smil(smil_url, lecture_id)
info = self._parse_smil(smil, smil_url, lecture_id)
- self._sort_formats(info['formats'])
info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id)
info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id)
if multipart:
# Modify the URL to get 1080p
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
formats = self._extract_mpd_formats(mpd_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
- def _vimeo_sort_formats(self, formats):
- # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
- # at the same time without actual units specified.
- self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source'))
-
def _parse_config(self, config, video_id):
video_data = config['video']
video_title = video_data.get('title')
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
+ # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified.
+ '_format_sort_fields': ('quality', 'res', 'fps', 'hdr:12', 'source'),
}
def _extract_original_format(self, url, video_id, unlisted_hash=None):
})
info = self._parse_config(self._download_json(
video['config_url'], video_id), video_id)
- self._vimeo_sort_formats(info['formats'])
get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
info.update({
'description': video.get('description'),
if config.get('view') == 4:
config = self._verify_player_video_password(
redirect_url, video_id, headers)
- info = self._parse_config(config, video_id)
- self._vimeo_sort_formats(info['formats'])
- return info
+ return self._parse_config(config, video_id)
if re.search(r'<form[^>]+?id="pw_form"', webpage):
video_password = self._get_video_password()
info_dict_config = self._parse_config(config, video_id)
formats.extend(info_dict_config['formats'])
- self._vimeo_sort_formats(formats)
+ info_dict['_format_sort_fields'] = info_dict_config['_format_sort_fields']
json_ld = self._search_json_ld(webpage, video_id, default={})
page_url + '/action', video_id)
if source_format:
info_dict['formats'].append(source_format)
- self._vimeo_sort_formats(info_dict['formats'])
info_dict['description'] = clean_html(clip_data.get('description'))
return info_dict
config = self._download_json(config_url, video_id)
info = self._parse_config(config, video_id)
info['id'] = video_id
- self._vimeo_sort_formats(info['formats'])
return info
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://www.vimm.tv/hls/{channel_id}.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True)
- self._sort_formats(formats)
return {
'id': channel_id,
formats, subs = self._extract_m3u8_formats_and_subtitles(
f'https://d211qfrkztakg3.cloudfront.net/{channel_id}/{video_id}/index.m3u8', video_id, 'mp4', m3u8_id='hls', live=False)
- self._sort_formats(formats)
return {
'id': video_id,
formats = [{
'url': f['url'],
} for f in playlist['video']]
- self._sort_formats(formats)
return {
'id': video_id,
'quality': quality,
})
self._check_formats(formats, video_id)
- self._sort_formats(formats)
username = data.get('username')
'vcodec': 'none' if is_audio else None,
})
formats.append(f)
- self._sort_formats(formats)
duration = int_or_none(data.get('duration'))
# r'\1whe\2', video_data['href'])
m3u8_url = video_data['href']
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
'ext': 'mp4',
'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
})
- self._sort_formats(formats)
subtitles = {}
for sub in video_data.get('subtitle') or []:
'url': format_url,
'ext': 'flv',
})
- self._sort_formats(formats)
subtitles = {}
for sub in data.get('subs') or {}:
'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl']
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
- self._sort_formats(formats)
info = get_common_fields()
info.update({
'title': video['title'],
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
- self._sort_formats(formats)
entry = {
'formats': formats,
'id': video_id,
formats = self._extract_wowza_formats(
hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
- self._sort_formats(formats)
return {
'id': video_id,
'ext': determine_ext(talk_url) or format_id,
'vcodec': 'none',
} for format_id, talk_url in talk['media_links'].items()]
- self._sort_formats(formats)
return {
'id': compat_str(talk.get('id') or display_id),
'acodec': 'mp3',
'vcodec': 'none',
}]
- self._sort_formats(formats)
return {
'id': compat_str(entry.get('ArticleId')),
'title': entry.get('ArticleTitle'),
formats = self._extract_m3u8_formats(
'https://cdnapisec.kaltura.com/p/1982551/playManifest/pt/https/f/applehttp/t/web/e/' + entry_id,
video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
description, series, season_number, episode, episode_number = [None] * 5
'tbr': int_or_none(tbr),
})
if formats:
- self._sort_formats(formats)
info['formats'] = formats
info['duration'] = int_or_none(asset.get('duration'))
return info
continue
if provider_video_type == 'brightcove':
info['formats'] = self._extract_once_formats(provider_video_id)
- self._sort_formats(info['formats'])
else:
info.update({
'_type': 'url_transparent',
formats.extend(self._extract_vrv_formats(
stream.get('url'), video_id, stream_type.split('_')[1],
audio_locale, stream.get('hardsub_locale')))
- self._sort_formats(formats)
subtitles = {}
for k in ('captions', 'subtitles'):
url, '<video>%s</video>' % self._extract_packed(webpage),
video_id)[0]
- self._sort_formats(info['formats'])
-
info.update({
'id': video_id,
'title': title,
metadata_from_url(embed_code)
if not is_youtube:
- self._sort_formats(formats)
info['formats'] = formats
metadata_from_url(video_data.get('thumbnail'))
f['_decryption_key_url'] = url_templ % ('goose', '') + qs
formats.extend(m3u8_formats)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
if m:
fmt['height'] = int(m.group('height'))
formats.append(fmt)
- self._sort_formats(formats)
return {
'id': video_id,
media_url, is_live = self._get_media_url(media_meta)
video_id = media.get('media_id') or container.get('media_container_id')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': str(video_id),
'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
clip_data = clip.get('clip_data')
formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
- self._sort_formats(formats)
return {
'id': clip_id,
'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
if manifest_urls:
extract_formats(manifest_urls)
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': title,
'height': int_or_none(item.get('height')),
'tbr': int_or_none(item.get('bitrate')),
})
- self._sort_formats(formats)
description = strip_or_none(item.get('descr'))
thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
a_format['ext'] = ext
formats.append(a_format)
- self._sort_formats(formats)
-
caption_url = media_resource.get('captionURL')
if caption_url:
subtitles['de'] = [{
'format_note': track.get('title'),
})
formats.extend(m3u8_formats)
- self._sort_formats(formats)
thumbnail = xpath_text(video, './/image', 'thumbnail')
'play_path': play_path,
}]
- self._sort_formats(formats)
-
return {
'id': story_id,
'title': title,
'height': res,
})
- self._sort_formats(formats)
-
uploader = self._og_search_property(
'nick-name', webpage, 'uploader', default=None)
formats.extend(self._extract_m3u8_formats(
hls_url, video_id, ext='mp4', m3u8_id='hls'))
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str)
if uploader_url:
raise ExtractorError('No videos found')
formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4')
- self._sort_formats(formats)
return {
'id': str(video.get('content_id')),
})
json = json.get('resource')
thumb = self._generate_thumbnail(json.get('thumbnailId'))
- self._sort_formats(formats)
return {
'id': video_id,
})
formats.append(f)
- self._sort_formats(formats)
-
subtitles = {}
for caption in data.get('captions', []):
language = caption.get('language')
random.choice(fmt['url']),
video_id, live=True))
- self._sort_formats(formats)
-
channel['formats'] = formats
return channel
'height': int_or_none(v.get('height')),
'fps': float_or_none(v.get('fps')),
})
- self._sort_formats(formats)
return {
'id': video_id,
'url': video_url,
'format_id': 'sd',
})
- self._sort_formats(formats)
thumbnail = self._search_regex(
[
'Referer': standard_url,
},
})
- self._sort_formats(formats)
categories_list = video.get('categories')
if isinstance(categories_list, list):
'url': video_url,
})
- self._sort_formats(formats)
-
# Only a few videos have an description
mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
description = mobj.group(1) if mobj else None
'ext': 'mp4',
} for prog in v if prog.get('url') or []])
- self._sort_formats(formats)
-
return {
'id': video_id,
'title': data.get('title'),
'format_id': format_id,
'quality': -1 if format_id == 'low' else 0,
})
- self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage, default=None) or get(
'ThumbUrl', fatal=False) or get('ThumbUrl169', fatal=False)
'url': media_url,
'tbr': tbr,
})
- self._sort_formats(formats)
link = find_xpath_attr(
entry, xpath_with_ns('./atom:link', NS_MAP), 'rel', 'original')
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
if not title:
title = self._search_regex(
'format_id': format_id,
'height': int(format_id) if format_id.isnumeric() else None,
})
- self._sort_formats(formats)
timestamp = media_info.get('PUBLISH_DATETIME')
if timestamp:
'quality': -2 if format_id.endswith('low') else None,
})
- self._sort_formats(formats)
-
return {
'id': video_id,
'formats': formats,
if not formats and msg == 'geo restricted':
self.raise_geo_restricted(metadata_available=True)
- self._sort_formats(formats)
-
thumbnails = []
for thumb in video.get('thumbnails', []):
thumb_url = thumb.get('url')
'tbr': int_or_none(vid.get('bitrate')),
})
self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
return formats
'url': format_url,
'width': int_or_none(size.get('width')),
})
- self._sort_formats(formats)
uid = resource.get('uid')
display_name = try_get(store, lambda x: x['users'][uid]['displayName'])
else:
formats.append({'url': content_url})
- self._sort_formats(formats)
-
timestamp = (int_or_none(content.get('release_date'))
or int_or_none(content.get('release_date_ut'))
or int_or_none(content.get('start_time')))
formats.extend(self._extract_mpd_formats(s_url, id, mpd_id='dash'))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(s_url, id, 'mp4'))
- self._sort_formats(formats)
return {
'id': video_id,
'title': video_json.get('title') or self._og_search_title(webpage),
'quality': quality_key(format_id),
'height': hd_height if is_hd else None,
})
- self._sort_formats(formats)
return {
'id': video_id,
'ext': 'mp4',
'tbr': format_info.get('bitrate'),
} for format_info in info['videoUrlModels']]
- self._sort_formats(formats)
return {
'id': video_id,
if m:
title = m.group('title')
formats = self._extract_f4m_formats(f4m_url, video_id)
- self._sort_formats(formats)
return {
'id': video_id,
'width': stream.get('width'),
'height': stream.get('height'),
} for stream in data['stream'] if stream.get('channel_type') != 'tail']
- self._sort_formats(formats)
return {
'id': video_id,
})
f['height'] = height
formats.append(f)
- self._sort_formats(formats)
webpage = self._download_webpage(
'http://www.youporn.com/watch/%s' % video_id, display_id,
formats.extend(self._extract_storyboard(player_responses, duration))
- # source_preference is lower for throttled/potentially damaged formats
- self._sort_formats(formats, (
- 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
-
info = {
'id': video_id,
'title': video_title,
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'live_status': live_status,
'release_timestamp': live_start_time,
+ '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
+ 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
}
subtitles = {}
if m:
f['height'] = int(m.group('height'))
formats.append(f)
- self._sort_formats(formats)
return {
'id': video_id,
for this_format in this_formats:
this_format['quality'] = preference
formats.extend(this_formats)
- self._sort_formats(formats)
return formats, subtitles
def _extract_video(self, video_id, record_id=None):
'class': track.get('class'),
'language': track.get('language'),
})
- self._sort_formats(formats, ('tbr', 'res', 'quality', 'language_preference'))
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
'duration': duration,
'formats': formats,
'subtitles': self._extract_subtitles(ptmd),
+ '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'),
}
def _extract_player(self, webpage, video_id, fatal=True):
format_urls = set()
for f in formitaeten or []:
self._extract_format(content_id, formats, format_urls, f)
- self._sort_formats(formats)
thumbnails = []
teaser_bild = document.get('teaserBild')
if not asset_data.get('hls_url'):
self.raise_login_required(self._LOGIN_HINT, metadata_available=True, method=None)
formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(asset_data['hls_url'], video_id, 'mp4', fatal=False)
- self._sort_formats(formats)
subtitles = {}
for sub in asset_data.get('subtitle_url', []):
raise ExtractorError('No video found', expected=True)
formats = self._extract_m3u8_formats(embed_url, content_id, 'mp4')
- self._sort_formats(formats)
return {
**self._json_ld(json_ld_list, display_id),
'url': play_url,
'width': int_or_none(q.get('width')),
})
- self._sort_formats(formats)
author = zvideo.get('author') or {}
url_token = author.get('url_token')
if not formats and item.get('msg') == 'Sorry, this content is not available in your country.':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
- self._sort_formats(formats)
lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file')
'preference': -1
})
- self._sort_formats(formats)
-
return {
'id': play_id,
'title': data.get('topic'),
if text_tracks:
text_tracks = self._parse_json(
text_tracks, video_id, js_to_json, False)
- self._sort_formats(formats)
if text_tracks:
for text_track in text_tracks: