[extractor/youtube] Extract DRC formats

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 7e3530c0fa24c377bd7b45cc706259776b10f082..506bd1e19abbe4b576596454cf4deb9e74bdca2f 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -388,6 +388,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          r'(?:www\.)?piped\.adminforge\.de',
          r'(?:www\.)?watch\.whatevertinfoil\.de',
          r'(?:www\.)?piped\.qdi\.fi',
+        r'(?:www\.)?piped\.video',
+        r'(?:www\.)?piped\.aeong\.one',
      )
  
      # extracted from account/account_menu ep
@@ -2542,6 +2544,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'tags': [],
              },
              'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
+        }, {
+            'note': 'Audio formats with Dynamic Range Compression',
+            'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
+            'info_dict': {
+                'id': 'Tq92D6wQ1mg',
+                'ext': 'weba',
+                'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
+                'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'channel_follower_count': int,
+                'description': 'md5:17eccca93a786d51bc67646756894066',
+                'upload_date': '20191228',
+                'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
+                'playable_in_embed': True,
+                'like_count': int,
+                'categories': ['Entertainment'],
+                'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
+                'age_limit': 18,
+                'channel': 'Projekt Melody',
+                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
+                'view_count': int,
+                'availability': 'needs_auth',
+                'comment_count': int,
+                'live_status': 'not_live',
+                'uploader': 'Projekt Melody',
+                'duration': 106,
+            },
+            'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
          }
      ]
  
@@ -3551,7 +3582,7 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
  
              itag = str_or_none(fmt.get('itag'))
              audio_track = fmt.get('audioTrack') or {}
-            stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
+            stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
              if stream_id in stream_ids:
                  continue
  
@@ -3632,11 +3663,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
              dct = {
                  'asr': int_or_none(fmt.get('audioSampleRate')),
                  'filesize': int_or_none(fmt.get('contentLength')),
-                'format_id': itag,
+                'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
                  'format_note': join_nonempty(
                      '%s%s' % (audio_track.get('displayName') or '',
                                ' (default)' if language_preference > 0 else ''),
                      fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
+                    'DRC' if fmt.get('isDrc') else None,
                      try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                      try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
                      throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
@@ -3645,7 +3677,7 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
                  'fps': int_or_none(fmt.get('fps')) or None,
                  'audio_channels': fmt.get('audioChannels'),
                  'height': height,
-                'quality': q(quality),
+                'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
                  'has_drm': bool(fmt.get('drmFamilies')),
                  'tbr': tbr,
                  'url': fmt_url,
@@ -4003,10 +4035,6 @@ def is_bad_format(fmt):
  
          formats.extend(self._extract_storyboard(player_responses, duration))
  
-        # source_preference is lower for throttled/potentially damaged formats
-        self._sort_formats(formats, (
-            'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
-
          info = {
              'id': video_id,
              'title': video_title,
@@ -4036,6 +4064,8 @@ def is_bad_format(fmt):
              'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
              'live_status': live_status,
              'release_timestamp': live_start_time,
+            '_format_sort_fields': (  # source_preference is lower for throttled/potentially damaged formats
+                'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
          }
  
          subtitles = {}
@@ -4085,7 +4115,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
                      if not trans_code:
                          continue
                      orig_trans_code = trans_code
-                    if caption_track.get('kind') != 'asr':
+                    if caption_track.get('kind') != 'asr' and trans_code != 'und':
                          if not get_translated_subs:
                              continue
                          trans_code += f'-{lang_code}'
@@ -4382,6 +4412,25 @@ def _extract_basic_item_renderer(item):
              elif key.startswith('grid') and key.endswith('Renderer'):
                  return renderer
  
+    def _extract_channel_renderer(self, renderer):
+        channel_id = renderer['channelId']
+        title = self._get_text(renderer, 'title')
+        channel_url = f'https://www.youtube.com/channel/{channel_id}'
+        return {
+            '_type': 'url',
+            'url': channel_url,
+            'id': channel_id,
+            'ie_key': YoutubeTabIE.ie_key(),
+            'channel': title,
+            'channel_id': channel_id,
+            'channel_url': channel_url,
+            'title': title,
+            'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
+            'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
+            'playlist_count': self._get_count(renderer, 'videoCountText'),
+            'description': self._get_text(renderer, 'descriptionSnippet'),
+        }
+
      def _grid_entries(self, grid_renderer):
          for item in grid_renderer['items']:
              if not isinstance(item, dict):
@@ -4407,9 +4456,7 @@ def _grid_entries(self, grid_renderer):
              # channel
              channel_id = renderer.get('channelId')
              if channel_id:
-                yield self.url_result(
-                    'https://www.youtube.com/channel/%s' % channel_id,
-                    ie=YoutubeTabIE.ie_key(), video_title=title)
+                yield self._extract_channel_renderer(renderer)
                  continue
              # generic endpoint URL support
              ep_url = urljoin('https://www.youtube.com/', try_get(
@@ -5762,7 +5809,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader': 'cole-dlp-test-acc',
              'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
              'channel': 'cole-dlp-test-acc',
-            'channel_follower_count': int,
          },
          'playlist_mincount': 1,
          'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
@@ -5930,7 +5976,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'title': 'cole-dlp-test-acc - Shorts',
              'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
              'channel': 'cole-dlp-test-acc',
-            'channel_follower_count': int,
              'description': 'test description',
              'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
              'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
@@ -5976,8 +6021,40 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                  'channel': str,
              }
          }],
-        'params': {'extract_flat': True},
+        'params': {'extract_flat': True, 'playlist_items': '1'},
          'playlist_mincount': 1
+    }, {
+        # Channel renderer metadata. Contains number of videos on the channel
+        'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
+        'info_dict': {
+            'id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'title': 'cole-dlp-test-acc - Channels',
+            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel': 'cole-dlp-test-acc',
+            'description': 'test description',
+            'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'tags': [],
+            'uploader': 'cole-dlp-test-acc',
+            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+
+        },
+        'playlist': [{
+            'info_dict': {
+                '_type': 'url',
+                'ie_key': 'YoutubeTab',
+                'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'title': 'PewDiePie',
+                'channel': 'PewDiePie',
+                'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'thumbnails': list,
+                'channel_follower_count': int,
+                'playlist_count': int
+            }
+        }],
+        'params': {'extract_flat': True},
      }]
  
      @classmethod
@@ -6531,6 +6608,30 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
              #     'title': '#cats',
              # }],
          },
+    }, {
+        # Channel results
+        'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
+        'info_dict': {
+            'id': 'kurzgesagt',
+            'title': 'kurzgesagt',
+        },
+        'playlist': [{
+            'info_dict': {
+                '_type': 'url',
+                'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+                'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+                'ie_key': 'YoutubeTab',
+                'channel': 'Kurzgesagt – In a Nutshell',
+                'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
+                'title': 'Kurzgesagt – In a Nutshell',
+                'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+                'playlist_count': int,  # XXX: should have a way of saying > 1
+                'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+                'thumbnails': list
+            }
+        }],
+        'params': {'extract_flat': True, 'playlist_items': '1'},
+        'playlist_mincount': 1,
      }, {
          'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
          'only_matching': True,