]> jfr.im git - yt-dlp.git/commitdiff
[youtube] Fix subtitles only being extracted from the first client
authorpukkandan <redacted>
Fri, 23 Jul 2021 04:07:15 +0000 (09:37 +0530)
committerpukkandan <redacted>
Fri, 23 Jul 2021 04:16:55 +0000 (09:46 +0530)
Closes #547

yt_dlp/extractor/youtube.py

index 23cf828340a5eead975d88b320b82c26c608aef0..9eb1035204825f9557b5f5e68c6850880468bdc7 100644 (file)
@@ -2845,7 +2845,14 @@ def feed_entry(name):
             'release_timestamp': live_starttime,
         }
 
-        pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
+        pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
+        # Converted into dicts to remove duplicates
+        captions = {
+            sub.get('baseUrl'): sub
+            for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
+        translation_languages = {
+            lang.get('languageCode'): lang.get('languageName')
+            for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
         subtitles = {}
         if pctr:
             def process_language(container, base_url, lang_code, sub_name, query):
@@ -2860,8 +2867,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
                         'name': sub_name,
                     })
 
-            for caption_track in (pctr.get('captionTracks') or []):
-                base_url = caption_track.get('baseUrl')
+            for base_url, caption_track in captions.items():
                 if not base_url:
                     continue
                 if caption_track.get('kind') != 'asr':
@@ -2872,18 +2878,17 @@ def process_language(container, base_url, lang_code, sub_name, query):
                         continue
                     process_language(
                         subtitles, base_url, lang_code,
-                        try_get(caption_track, lambda x: x['name']['simpleText']),
+                        traverse_obj(caption_track, ('name', 'simpleText')),
                         {})
                     continue
                 automatic_captions = {}
-                for translation_language in (pctr.get('translationLanguages') or []):
-                    translation_language_code = translation_language.get('languageCode')
-                    if not translation_language_code:
+                for trans_code, trans_name in translation_languages.items():
+                    if not trans_code:
                         continue
                     process_language(
-                        automatic_captions, base_url, translation_language_code,
-                        self._get_text(translation_language.get('languageName'), max_runs=1),
-                        {'tlang': translation_language_code})
+                        automatic_captions, base_url, trans_code,
+                        self._get_text(trans_name, max_runs=1),
+                        {'tlang': trans_code})
                 info['automatic_captions'] = automatic_captions
         info['subtitles'] = subtitles