_pps = []
_download_retcode = None
_num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
_screen_file = None
def __init__(self, params=None, auto_init=True):
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result['webpage_url']
+ if webpage_url in self._playlist_urls:
self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
-
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- if self._match_entry(entry, incomplete=True) is not None:
- continue
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
- entry_result = self.__process_iterable_entry(entry, download, extra)
- # TODO: skip failed (empty) entries?
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
else:
raise Exception('Invalid result type: %s' % result_type)
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import (
+ compat_HTTPError,
compat_b64decode,
compat_ord,
)
bytes_to_long,
ExtractorError,
float_or_none,
+ int_or_none,
intlist_to_bytes,
long_to_bytes,
pkcs1pad,
strip_or_none,
- urljoin,
+ try_get,
+ unified_strdate,
)
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
_TEST = {
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
- 'md5': 'e497370d847fd79d9d4c74be55575c7a',
+ 'md5': '0319c99885ff5547565cacb4f3f9348d',
'info_dict': {
'id': '7778',
'ext': 'mp4',
- 'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
+ 'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
+ 'series': 'Blue Exorcist - Kyôto Saga',
+ 'duration': 1467,
+ 'release_date': '20170106',
+ 'comment_count': int,
+ 'average_rating': float,
+ 'season_number': 2,
+ 'episode': 'Début des hostilités',
+ 'episode_number': 1,
}
}
+
_BASE_URL = 'http://animedigitalnetwork.fr'
- _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
+ _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
+ _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+ _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
_POS_ALIGN_MAP = {
'start': 1,
'end': 3,
def _ass_subtitles_timecode(seconds):
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
- def _get_subtitles(self, sub_path, video_id):
- if not sub_path:
+ def _get_subtitles(self, sub_url, video_id):
+ if not sub_url:
return None
enc_subtitles = self._download_webpage(
- urljoin(self._BASE_URL, sub_path),
- video_id, 'Downloading subtitles location', fatal=False) or '{}'
+ sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
if subtitle_location:
enc_subtitles = self._download_webpage(
- urljoin(self._BASE_URL, subtitle_location),
- video_id, 'Downloading subtitles data', fatal=False,
- headers={'Origin': 'https://animedigitalnetwork.fr'})
+ subtitle_location, video_id, 'Downloading subtitles data',
+ fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
+ bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- player_config = self._parse_json(self._search_regex(
- r'playerConfig\s*=\s*({.+});', webpage,
- 'player config', default='{}'), video_id, fatal=False)
- if not player_config:
- config_url = urljoin(self._BASE_URL, self._search_regex(
- r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
- webpage, 'config url'))
- player_config = self._download_json(
- config_url, video_id,
- 'Downloading player config JSON metadata')['player']
-
- video_info = {}
- video_info_str = self._search_regex(
- r'videoInfo\s*=\s*({.+});', webpage,
- 'video info', fatal=False)
- if video_info_str:
- video_info = self._parse_json(
- video_info_str, video_id, fatal=False) or {}
-
- options = player_config.get('options') or {}
- metas = options.get('metas') or {}
- links = player_config.get('links') or {}
- sub_path = player_config.get('subtitles')
- error = None
- if not links:
- links_url = player_config.get('linksurl') or options['videoUrl']
- token = options['token']
- self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
- message = bytes_to_intlist(json.dumps({
- 'k': self._K,
- 'e': 60,
- 't': token,
- }))
+ video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
+ player = self._download_json(
+ video_base_url + 'configuration', video_id,
+ 'Downloading player config JSON metadata')['player']
+ options = player['options']
+
+ user = options['user']
+ if not user.get('hasAccess'):
+ raise ExtractorError(
+ 'This video is only available for paying users', expected=True)
+ # self.raise_login_required() # FIXME: Login is not implemented
+
+ token = self._download_json(
+ user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
+ video_id, 'Downloading access token', headers={
+ 'x-player-refresh-token': user['refreshToken']
+ }, data=b'')['token']
+
+ links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
+ self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
+ message = bytes_to_intlist(json.dumps({
+ 'k': self._K,
+ 't': token,
+ }))
+
+ # Sometimes authentication fails for no good reason, retry with
+ # a different random padding
+ links_data = None
+ for _ in range(3):
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode()
- links_data = self._download_json(
- urljoin(self._BASE_URL, links_url), video_id,
- 'Downloading links JSON metadata', headers={
- 'Authorization': 'Bearer ' + authorization,
- })
- links = links_data.get('links') or {}
- metas = metas or links_data.get('meta') or {}
- sub_path = sub_path or links_data.get('subtitles') or \
- 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
- sub_path += '&token=' + token
- error = links_data.get('error')
- title = metas.get('title') or video_info['title']
+
+ try:
+ links_data = self._download_json(
+ links_url, video_id, 'Downloading links JSON metadata', headers={
+ 'X-Player-Token': authorization
+ }, query={
+ 'freeWithAds': 'true',
+ 'adaptive': 'false',
+ 'withMetadata': 'true',
+ 'source': 'Web'
+ })
+ break
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError):
+ raise e
+
+ if e.cause.code == 401:
+ # This usually goes away with a different random pkcs1pad, so retry
+ continue
+
+ error = self._parse_json(e.cause.read(), video_id)
+ message = error.get('message')
+ if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
+ self.raise_geo_restricted(msg=message)
+ else:
+ raise ExtractorError(message)
+ else:
+ raise ExtractorError('Giving up retrying')
+
+ links = links_data.get('links') or {}
+ metas = links_data.get('metadata') or {}
+ sub_url = (links.get('subtitles') or {}).get('all')
+ video_info = links_data.get('video') or {}
+ title = metas['title']
formats = []
- for format_id, qualities in links.items():
+ for format_id, qualities in (links.get('streaming') or {}).items():
if not isinstance(qualities, dict):
continue
for quality, load_balancer_url in qualities.items():
for f in m3u8_formats:
f['language'] = 'fr'
formats.extend(m3u8_formats)
- if not error:
- error = options.get('error')
- if not formats and error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
self._sort_formats(formats)
+ video = (self._download_json(
+ self._API_BASE_URL + 'video/%s' % video_id, video_id,
+ 'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
+ show = video.get('show') or {}
+
return {
'id': video_id,
'title': title,
- 'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
- 'thumbnail': video_info.get('image'),
+ 'description': strip_or_none(metas.get('summary') or video.get('summary')),
+ 'thumbnail': video_info.get('image') or player.get('image'),
'formats': formats,
- 'subtitles': self.extract_subtitles(sub_path, video_id),
- 'episode': metas.get('subtitle') or video_info.get('videoTitle'),
- 'series': video_info.get('playlistTitle'),
+ 'subtitles': self.extract_subtitles(sub_url, video_id),
+ 'episode': metas.get('subtitle') or video.get('name'),
+ 'episode_number': int_or_none(video.get('shortNumber')),
+ 'series': show.get('title'),
+ 'season_number': int_or_none(video.get('season')),
+ 'duration': int_or_none(video_info.get('duration') or video.get('duration')),
+ 'release_date': unified_strdate(video.get('releaseDate')),
+ 'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
+ 'comment_count': int_or_none(video.get('commentsCount')),
}
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
webpage, 'anime description', default=None)
- entries = []
-
def extract_info(html, video_id, num=None):
title, description = [None] * 2
formats = []
self._sort_formats(info['formats'])
f = common_info.copy()
f.update(info)
- entries.append(f)
+ yield f
# Extract teaser/trailer only when full episode is not available
if not info['formats']:
'title': m.group('title'),
'url': urljoin(url, m.group('href')),
})
- entries.append(f)
+ yield f
def extract_episodes(html):
for num, episode_html in enumerate(re.findall(
'episode_number': episode_number,
}
- extract_entries(episode_html, video_id, common_info)
+ for e in extract_entries(episode_html, video_id, common_info):
+ yield e
def extract_film(html, video_id):
common_info = {
'title': anime_title,
'description': anime_description,
}
- extract_entries(html, video_id, common_info)
+ for e in extract_entries(html, video_id, common_info):
+ yield e
- extract_episodes(webpage)
+ def entries():
+ has_episodes = False
+ for e in extract_episodes(webpage):
+ has_episodes = True
+ yield e
- if not entries:
- extract_film(webpage, anime_id)
+ if not has_episodes:
+ for e in extract_film(webpage, anime_id):
+ yield e
- return self.playlist_result(entries, anime_id, anime_title, anime_description)
+ return self.playlist_result(
+ entries(), anime_id, anime_title, anime_description)
ExtractorError,
extract_attributes,
find_xpath_attr,
+ get_element_by_attribute,
get_element_by_class,
int_or_none,
js_to_json,
merge_dicts,
+ parse_iso8601,
smuggle_url,
+ str_to_int,
unescapeHTML,
)
from .senateisvp import SenateISVPIE
jwsetup, video_id, require_title=False, m3u8_id='hls',
base_url=url)
add_referer(info['formats'])
+ for subtitles in info['subtitles'].values():
+ for subtitle in subtitles:
+ ext = determine_ext(subtitle['url'])
+ if ext == 'php':
+ ext = 'vtt'
+ subtitle['ext'] = ext
ld_info = self._search_json_ld(webpage, video_id, default={})
- return merge_dicts(info, ld_info)
+ title = get_element_by_class('video-page-title', webpage) or \
+ self._og_search_title(webpage)
+ description = get_element_by_attribute('itemprop', 'description', webpage) or \
+ self._html_search_meta(['og:description', 'description'], webpage)
+ return merge_dicts(info, ld_info, {
+ 'title': title,
+ 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
+ 'description': description,
+ 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
+ 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
+ 'duration': int_or_none(self._search_regex(
+ r'jwsetup\.seclength\s*=\s*(\d+);',
+ webpage, 'duration', fatal=False)),
+ 'view_count': str_to_int(self._search_regex(
+ r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
+ webpage, 'views', fatal=False)),
+ })
# Obsolete
# We first look for clipid, because clipprog always appears before
from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
-from .khanacademy import KhanAcademyIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from __future__ import unicode_literals
-import re
+import json
from .common import InfoExtractor
from ..utils import (
- unified_strdate,
+ int_or_none,
+ parse_iso8601,
+ try_get,
)
-class KhanAcademyIE(InfoExtractor):
- _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
- IE_NAME = 'KhanAcademy'
+class KhanAcademyBaseIE(InfoExtractor):
+ _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
- _TESTS = [{
- 'url': 'http://www.khanacademy.org/video/one-time-pad',
- 'md5': '7b391cce85e758fb94f763ddc1bbb979',
+ def _parse_video(self, video):
+ return {
+ '_type': 'url_transparent',
+ 'url': video['youtubeId'],
+ 'id': video.get('slug'),
+ 'title': video.get('title'),
+ 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
+ 'duration': int_or_none(video.get('duration')),
+ 'description': video.get('description'),
+ 'ie_key': 'Youtube',
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ component_props = self._parse_json(self._download_json(
+ 'https://www.khanacademy.org/api/internal/graphql',
+ display_id, query={
+ 'hash': 1604303425,
+ 'variables': json.dumps({
+ 'path': display_id,
+ 'queryParams': '',
+ }),
+ })['data']['contentJson'], display_id)['componentProps']
+ return self._parse_component_props(component_props)
+
+
+class KhanAcademyIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy'
+ _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
+ 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
'info_dict': {
- 'id': 'one-time-pad',
- 'ext': 'webm',
+ 'id': 'FlIG3TvQCBQ',
+ 'ext': 'mp4',
'title': 'The one-time pad',
'description': 'The perfect cipher',
'duration': 176,
'uploader': 'Brit Cruise',
'uploader_id': 'khanacademy',
'upload_date': '20120411',
+ 'timestamp': 1334170113,
+ 'license': 'cc-by-nc-sa',
},
'add_ie': ['Youtube'],
- }, {
- 'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+ }
+
+ def _parse_component_props(self, component_props):
+ video = component_props['tutorialPageData']['contentModel']
+ info = self._parse_video(video)
+ author_names = video.get('authorNames')
+ info.update({
+ 'uploader': ', '.join(author_names) if author_names else None,
+ 'timestamp': parse_iso8601(video.get('dateAdded')),
+ 'license': video.get('kaUserLicense'),
+ })
+ return info
+
+
+class KhanAcademyUnitIE(KhanAcademyBaseIE):
+ IE_NAME = 'khanacademy:unit'
+ _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
+ _TEST = {
+ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
'info_dict': {
'id': 'cryptography',
- 'title': 'Journey into cryptography',
+ 'title': 'Cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?',
},
- 'playlist_mincount': 3,
- }]
-
- def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
+ 'playlist_mincount': 31,
+ }
- if m.group('key') == 'video':
- data = self._download_json(
- 'http://api.khanacademy.org/api/v1/videos/' + video_id,
- video_id, 'Downloading video info')
+ def _parse_component_props(self, component_props):
+ curation = component_props['curation']
- upload_date = unified_strdate(data['date_added'])
- uploader = ', '.join(data['author_names'])
- return {
- '_type': 'url_transparent',
- 'url': data['url'],
- 'id': video_id,
- 'title': data['title'],
- 'thumbnail': data['image_url'],
- 'duration': data['duration'],
- 'description': data['description'],
- 'uploader': uploader,
- 'upload_date': upload_date,
+ entries = []
+ tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
+ for tutorial_number, tutorial in enumerate(tutorials, 1):
+ chapter_info = {
+ 'chapter': tutorial.get('title'),
+ 'chapter_number': tutorial_number,
+ 'chapter_id': tutorial.get('id'),
}
- else:
- # topic
- data = self._download_json(
- 'http://api.khanacademy.org/api/v1/topic/' + video_id,
- video_id, 'Downloading topic info')
+ for content_item in (tutorial.get('contentItems') or []):
+ if content_item.get('kind') == 'Video':
+ info = self._parse_video(content_item)
+ info.update(chapter_info)
+ entries.append(info)
- entries = [
- {
- '_type': 'url',
- 'url': c['url'],
- 'id': c['id'],
- 'title': c['title'],
- }
- for c in data['children'] if c['kind'] in ('Video', 'Topic')]
-
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': data['title'],
- 'description': data['description'],
- 'entries': entries,
- }
+ return self.playlist_result(
+ entries, curation.get('unit'), curation.get('title'),
+ curation.get('description'))
cloudcast_url = cloudcast.get('url')
if not cloudcast_url:
continue
+ slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
+ owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
+ video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
entries.append(self.url_result(
- cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
+ cloudcast_url, MixcloudIE.ie_key(), video_id))
page_info = items['pageInfo']
has_next_page = page_info['hasNextPage']
_DESCRIPTION_KEY = 'biog'
_ROOT_TYPE = 'user'
_NODE_TEMPLATE = '''slug
- url'''
+ url
+ owner { username }'''
def _get_playlist_title(self, title, slug):
return '%s (%s)' % (title, slug)
_NODE_TEMPLATE = '''cloudcast {
slug
url
+ owner { username }
}'''
def _get_cloudcast(self, node):
'tags': ['framasoft', 'peertube'],
'categories': ['Science & Technology'],
}
+ }, {
+ # Issue #26002
+ 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'info_dict': {
+ 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc',
+ 'ext': 'mp4',
+ 'title': 'Dot matrix printer shell demo',
+ 'uploader_id': '3',
+ 'timestamp': 1587401293,
+ 'upload_date': '20200420',
+ 'uploader': 'Drew DeVault',
+ }
}, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True,
title = video['name']
formats = []
- for file_ in video['files']:
+ files = video.get('files') or []
+ for playlist in (video.get('streamingPlaylists') or []):
+ if not isinstance(playlist, dict):
+ continue
+ playlist_files = playlist.get('files')
+ if not (playlist_files and isinstance(playlist_files, list)):
+ continue
+ files.extend(playlist_files)
+ for file_ in files:
if not isinstance(file_, dict):
continue
file_url = url_or_none(file_.get('fileUrl'))
},
}]
- _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_GEO_COUNTRIES = ['US']
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'paramountnetwork.com',
+ 'mgid': uri,
+ }
+
def _extract_mgid(self, webpage, url):
root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})',
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
- js_to_json,
- mimetype2ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
)
IE_DESC = '3Q SDN'
_VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TESTS = [{
- # ondemand from http://www.philharmonie.tv/veranstaltung/26/
- 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
- 'md5': 'ab040e37bcfa2e0c079f92cb1dd7f6cd',
+ # https://player.3qsdn.com/demo.html
+ 'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
+ 'md5': '64a57396b16fa011b15e0ea60edce918',
'info_dict': {
- 'id': '0280d6b9-1215-11e6-b427-0cc47a188158',
+ 'id': '7201c779-6b3c-11e7-a40e-002590c750be',
'ext': 'mp4',
- 'title': '0280d6b9-1215-11e6-b427-0cc47a188158',
+ 'title': 'Video Ads',
'is_live': False,
+ 'description': 'Video Ads Demo',
+ 'timestamp': 1500334803,
+ 'upload_date': '20170717',
+ 'duration': 888.032,
+ 'subtitles': {
+ 'eng': 'count:1',
+ },
},
- 'expected_warnings': ['Failed to download MPD manifest', 'Failed to parse JSON'],
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
}, {
# live video stream
- 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
+ 'url': 'https://playout.3qsdn.com/66e68995-11ca-11e8-9273-002590c750be',
'info_dict': {
- 'id': 'd755d94b-4ab9-11e3-9162-0025907ad44f',
+ 'id': '66e68995-11ca-11e8-9273-002590c750be',
'ext': 'mp4',
- 'title': 're:^d755d94b-4ab9-11e3-9162-0025907ad44f [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^66e68995-11ca-11e8-9273-002590c750be [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
},
'params': {
'skip_download': True, # m3u8 downloads
},
- 'expected_warnings': ['Failed to download MPD manifest'],
}, {
# live audio stream
'url': 'http://playout.3qsdn.com/9edf36e0-6bf2-11e2-a16a-9acf09e2db48',
# live video with rtmp link
'url': 'https://playout.3qsdn.com/6092bb9e-8f72-11e4-a173-002590c750be',
'only_matching': True,
+ }, {
+ # ondemand from http://www.philharmonie.tv/veranstaltung/26/
+ 'url': 'http://playout.3qsdn.com/0280d6b9-1215-11e6-b427-0cc47a188158?protocol=http',
+ 'only_matching': True,
+ }, {
+ # live video stream
+ 'url': 'https://playout.3qsdn.com/d755d94b-4ab9-11e3-9162-0025907ad44f?js=true',
+ 'only_matching': True,
}]
@staticmethod
def _real_extract(self, url):
video_id = self._match_id(url)
- js = self._download_webpage(
- 'http://playout.3qsdn.com/%s' % video_id, video_id,
- query={'js': 'true'})
-
- if any(p in js for p in (
- '>This content is not available in your country',
- 'playout.3qsdn.com/forbidden')):
- self.raise_geo_restricted()
-
- stream_content = self._search_regex(
- r'streamContent\s*:\s*(["\'])(?P<content>.+?)\1', js,
- 'stream content', default='demand', group='content')
+ try:
+ config = self._download_json(
+ url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self.raise_geo_restricted()
+ raise
- live = stream_content == 'live'
-
- stream_type = self._search_regex(
- r'streamType\s*:\s*(["\'])(?P<type>audio|video)\1', js,
- 'stream type', default='video', group='type')
+ live = config.get('streamContent') == 'live'
+ aspect = float_or_none(config.get('aspect'))
formats = []
- urls = set()
-
- def extract_formats(item_url, item={}):
- if not item_url or item_url in urls:
- return
- urls.add(item_url)
- ext = mimetype2ext(item.get('type')) or determine_ext(item_url, default_ext=None)
- if ext == 'mpd':
+ for source_type, source in (config.get('sources') or {}).items():
+ if not source:
+ continue
+ if source_type == 'dash':
formats.extend(self._extract_mpd_formats(
- item_url, video_id, mpd_id='mpd', fatal=False))
- elif ext == 'm3u8':
+ source, video_id, mpd_id='mpd', fatal=False))
+ elif source_type == 'hls':
formats.extend(self._extract_m3u8_formats(
- item_url, video_id, 'mp4',
- entry_protocol='m3u8' if live else 'm3u8_native',
+ source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- item_url, video_id, f4m_id='hds', fatal=False))
- else:
- if not self._is_valid_url(item_url, video_id):
- return
- formats.append({
- 'url': item_url,
- 'format_id': item.get('quality'),
- 'ext': 'mp4' if item_url.startswith('rtsp') else ext,
- 'vcodec': 'none' if stream_type == 'audio' else None,
- })
-
- for item_js in re.findall(r'({[^{]*?\b(?:src|source)\s*:\s*["\'].+?})', js):
- f = self._parse_json(
- item_js, video_id, transform_source=js_to_json, fatal=False)
- if not f:
+ elif source_type == 'progressive':
+ for s in source:
+ src = s.get('src')
+ if not (src and self._is_valid_url(src, video_id)):
+ continue
+ width = None
+ format_id = ['http']
+ ext = determine_ext(src)
+ if ext:
+ format_id.append(ext)
+ height = int_or_none(s.get('height'))
+ if height:
+ format_id.append('%dp' % height)
+ if aspect:
+ width = int(height * aspect)
+ formats.append({
+ 'ext': ext,
+ 'format_id': '-'.join(format_id),
+ 'height': height,
+ 'source_preference': 0,
+ 'url': src,
+ 'vcodec': 'none' if height == 0 else None,
+ 'width': width,
+ })
+ for f in formats:
+ if f.get('acodec') == 'none':
+ f['preference'] = -40
+ elif f.get('vcodec') == 'none':
+ f['preference'] = -50
+ self._sort_formats(formats, ('preference', 'width', 'height', 'source_preference', 'tbr', 'vbr', 'abr', 'ext', 'format_id'))
+
+ subtitles = {}
+ for subtitle in (config.get('subtitles') or []):
+ src = subtitle.get('src')
+ if not src:
continue
- extract_formats(f.get('src'), f)
-
- # More relaxed version to collect additional URLs and acting
- # as a future-proof fallback
- for _, src in re.findall(r'\b(?:src|source)\s*:\s*(["\'])((?:https?|rtsp)://.+?)\1', js):
- extract_formats(src)
-
- self._sort_formats(formats)
+ subtitles.setdefault(subtitle.get('label') or 'eng', []).append({
+ 'url': src,
+ })
- title = self._live_title(video_id) if live else video_id
+ title = config.get('title') or video_id
return {
'id': video_id,
- 'title': title,
+ 'title': self._live_title(title) if live else title,
+ 'thumbnail': config.get('poster') or None,
+ 'description': config.get('description') or None,
+ 'timestamp': parse_iso8601(config.get('upload_date')),
+ 'duration': float_or_none(config.get('vlength')) or None,
'is_live': live,
'formats': formats,
+ 'subtitles': subtitles,
}
)
from ..utils import (
clean_html,
+ dict_get,
ExtractorError,
float_or_none,
int_or_none,
headers = {
'Referer': page_url,
- 'Origin': page_url,
+ 'Origin': 'https://www.twitch.tv',
'Content-Type': 'text/plain;charset=UTF-8',
}
response = self._download_json(
post_url, None, note, data=json.dumps(form).encode(),
headers=headers, expected_status=400)
- error = response.get('error_description') or response.get('error_code')
+ error = dict_get(response, ('error', 'error_description', 'error_code'))
if error:
fail(error)
self._sort_formats(formats)
def _download_base_gql(self, video_id, ops, note, fatal=True):
+ headers = {
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ 'Client-ID': self._CLIENT_ID,
+ }
+ gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
+ if gql_auth:
+ headers['Authorization'] = 'OAuth ' + gql_auth.value
return self._download_json(
'https://gql.twitch.tv/gql', video_id, note,
data=json.dumps(ops).encode(),
- headers={
- 'Content-Type': 'text/plain;charset=UTF-8',
- 'Client-ID': self._CLIENT_ID,
- }, fatal=fatal)
+ headers=headers, fatal=fatal)
def _download_gql(self, video_id, ops, note, fatal=True):
for op in ops:
'uploader_id': '1eVjYOLGkGrQL',
},
'add_ie': ['TwitterBroadcast'],
+ }, {
+ # unified card
+ 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
+ 'info_dict': {
+ 'id': '1349794411333394432',
+ 'ext': 'mp4',
+ 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
+ 'uploader': 'Brooklyn Nets',
+ 'uploader_id': 'BrooklynNets',
+ 'duration': 324.484,
+ 'timestamp': 1610651040,
+ 'upload_date': '20210114',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
# Twitch Clip Embed
'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
# appplayer card
'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
'only_matching': True,
+ }, {
+ # video_direct_message card
+ 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
+ 'only_matching': True,
+ }, {
+ # poll2choice_video card
+ 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
+ 'only_matching': True,
+ }, {
+ # poll3choice_video card
+ 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
+ 'only_matching': True,
+ }, {
+ # poll4choice_video card
+ 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
+ 'only_matching': True,
}]
def _real_extract(self, url):
'tags': tags,
}
- media = try_get(status, lambda x: x['extended_entities']['media'][0])
- if media and media.get('type') != 'photo':
+ def extract_from_video_info(media):
video_info = media.get('video_info') or {}
formats = []
'thumbnails': thumbnails,
'duration': float_or_none(video_info.get('duration_millis'), 1000),
})
+
+ media = try_get(status, lambda x: x['extended_entities']['media'][0])
+ if media and media.get('type') != 'photo':
+ extract_from_video_info(media)
else:
card = status.get('card')
if card:
'_type': 'url',
'url': get_binding_value('card_url'),
})
- # amplify, promo_video_website, promo_video_convo, appplayer, ...
+ elif card_name == 'unified_card':
+ media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
+ extract_from_video_info(next(iter(media_entities.values())))
+ # amplify, promo_video_website, promo_video_convo, appplayer,
+ # video_direct_message, poll2choice_video, poll3choice_video,
+ # poll4choice_video, ...
else:
is_amplify = card_name == 'amplify'
vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
}, {
'url': 'http://www.youporn.com/watch/505835',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+ 'only_matching': True,
}]
@staticmethod
# Main source
definitions = self._parse_json(
self._search_regex(
- r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
+ r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
'media definitions', default='[]'),
video_id, fatal=False)
if definitions:
links.append(video_url)
# Fallback #1, this also contains extra low quality 180p format
- for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+ for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
links.append(link)
# Fallback #2 (unavailable as at 22.06.2017)
# Video URL's path looks like this:
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
# We will benefit from it by extracting some metadata
- mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+ mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
if mobj:
height = int(mobj.group('height'))
bitrate = int(mobj.group('bitrate'))
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), video_id, fatal=False)
+ def _extract_video(self, renderer):
+ video_id = renderer.get('videoId')
+ title = try_get(
+ renderer,
+ (lambda x: x['title']['runs'][0]['text'],
+ lambda x: x['title']['simpleText']), compat_str)
+ description = try_get(
+ renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
+ compat_str)
+ duration = parse_duration(try_get(
+ renderer, lambda x: x['lengthText']['simpleText'], compat_str))
+ view_count_text = try_get(
+ renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+ view_count = str_to_int(self._search_regex(
+ r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
+ 'view count', default=None))
+ uploader = try_get(
+ renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': video_id,
+ 'url': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'uploader': uploader,
+ }
+
class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com'
if renderer:
return renderer
- def _extract_video(self, renderer):
- video_id = renderer.get('videoId')
- title = try_get(
- renderer,
- (lambda x: x['title']['runs'][0]['text'],
- lambda x: x['title']['simpleText']), compat_str)
- description = try_get(
- renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
- compat_str)
- duration = parse_duration(try_get(
- renderer, lambda x: x['lengthText']['simpleText'], compat_str))
- view_count_text = try_get(
- renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
- view_count = str_to_int(self._search_regex(
- r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
- 'view count', default=None))
- uploader = try_get(
- renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
- return {
- '_type': 'url_transparent',
- 'ie_key': YoutubeIE.ie_key(),
- 'id': video_id,
- 'url': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': view_count,
- 'uploader': uploader,
- }
-
def _grid_entries(self, grid_renderer):
for item in grid_renderer['items']:
if not isinstance(item, dict):
if not slr_contents:
break
- isr_contents = []
- continuation_token = None
# Youtube sometimes adds promoted content to searches,
# changing the index location of videos and token.
# So we search through all entries till we find them.
- for index, isr in enumerate(slr_contents):
+ continuation_token = None
+ for slr_content in slr_contents:
+ isr_contents = try_get(
+ slr_content,
+ lambda x: x['itemSectionRenderer']['contents'],
+ list)
if not isr_contents:
- isr_contents = try_get(
- slr_contents,
- (lambda x: x[index]['itemSectionRenderer']['contents']),
- list)
- for content in isr_contents:
- if content.get('videoRenderer') is not None:
- break
- else:
- isr_contents = []
+ continue
+ for content in isr_contents:
+ if not isinstance(content, dict):
+ continue
+ video = content.get('videoRenderer')
+ if not isinstance(video, dict):
+ continue
+ video_id = video.get('videoId')
+ if not video_id:
+ continue
+
+ yield self._extract_video(video)
+ total += 1
+ if total == n:
+ return
if continuation_token is None:
continuation_token = try_get(
- slr_contents,
- lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
- 'token'],
+ slr_content,
+ lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
compat_str)
- if continuation_token is not None and isr_contents:
- break
- if not isr_contents:
- break
- for content in isr_contents:
- if not isinstance(content, dict):
- continue
- video = content.get('videoRenderer')
- if not isinstance(video, dict):
- continue
- video_id = video.get('videoId')
- if not video_id:
- continue
- title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
- description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
- duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
- view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
- view_count = str_to_int(self._search_regex(
- r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
- 'view count', default=None))
- uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
- total += 1
- yield {
- '_type': 'url_transparent',
- 'ie_key': YoutubeIE.ie_key(),
- 'id': video_id,
- 'url': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': view_count,
- 'uploader': uploader,
- }
- if total == n:
- return
if not continuation_token:
break
data['continuation'] = continuation_token