-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
-
- def _find_entries_in_json(self, extracted):
- entries = []
- c = {}
-
- def _real_find(obj):
- if obj is None or isinstance(obj, str):
- return
-
- if type(obj) is list:
- for elem in obj:
- _real_find(elem)
-
- if type(obj) is dict:
- if self._is_entry(obj):
- entries.append(obj)
- return
-
- if 'continuationCommand' in obj:
- c['continuation'] = obj
- return
-
- for _, o in obj.items():
- _real_find(o)
-
- _real_find(extracted)
-
- return entries, try_get(c, lambda x: x["continuation"])
-
- def _entries(self, page, playlist_id, n=1):
- seen = []
-
- yt_conf = {}
- for m in re.finditer(self._YTCFG_DATA_RE, page):
- parsed = self._parse_json(m.group(1), playlist_id,
- transform_source=js_to_json, fatal=False)
- if parsed:
- yt_conf.update(parsed)
-
- data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
-
- # for page_num in itertools.count(1):
- for page_num in range(n):
- entries, continuation = self._find_entries_in_json(data_json)
- processed = self._process_entries(entries, seen)
-
- if not processed:
- break
- for entry in processed:
- yield entry
-
- if not continuation or not yt_conf:
- break
- continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
- continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
- if not continuation_token or not continuation_url:
- break
-
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- data_json = self._download_json(
- 'https://www.youtube.com%s' % continuation_url,
- playlist_id,
- 'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
-
- transform_source=uppercase_escape,
- query={
- 'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
- },
- data=bytes(json.dumps({
- 'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
- 'continuation': continuation_token
- }), encoding='utf-8'),
- headers={
- 'Content-Type': 'application/json'
- }
- )
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
-
- def _extract_title(self, renderer):
- title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
- if title:
- return title
- return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
-
-
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _is_entry(self, obj):
- return 'videoId' in obj
-
- def _process_entries(self, entries, seen):
- ids_in_page = []
- titles_in_page = []
- for renderer in entries:
- video_id = try_get(renderer, lambda x: x['videoId'])
- video_title = self._extract_title(renderer)
-
- if video_id is None or video_title is None:
- # we do not have a videoRenderer or title extraction broke
- continue
-
- video_title = video_title.strip()
-
- try:
- idx = ids_in_page.index(video_id)
- if video_title and not titles_in_page[idx]:
- titles_in_page[idx] = video_title
- except ValueError:
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- for video_id, video_title in zip(ids_in_page, titles_in_page):
- yield self.url_result(video_id, 'Youtube', video_id, video_title)
-