- def _extract_subtitles_from_rendition(self, rendition, subtitles, parsed_urls):
- for cc_file in rendition.get('ccFiles', []):
- cc_url = url_or_none(try_get(cc_file, lambda x: x[2]))
- # name is used since we cant distinguish subs with same language code
- cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str)
- if cc_url not in parsed_urls and cc_lang:
- parsed_urls.add(cc_url)
- subtitles.setdefault(cc_lang, []).append({'url': cc_url})
-
- def _get_subtitles(self, url, video_id, title, key, subtitles, parsed_urls):
- webpage = self._download_webpage(url, video_id, fatal=False) or ''
- for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)):
- renditions = self._call_api(
- video_id, title, key, {'Captions': caption}, fatal=False,
- note=f'Downloading {caption} subtitle information').get('renditions') or {}
- for rendition_id, rendition in renditions.items():
- self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls)
- return subtitles
-