[youtube] Detect DRM better

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index d74d5b0e9d327a2aec5aebe4846a2df136b1fab8..bd3a7d36bbe778a4861b6d2c115bb8fd478b6896 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -818,12 +818,17 @@ def _extract_video(self, renderer):
              renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
          badges = self._extract_badges(renderer)
          thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+        navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
+            renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))
+        url = f'https://www.youtube.com/watch?v={video_id}'
+        if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):
+            url = f'https://www.youtube.com/shorts/{video_id}'
  
          return {
              '_type': 'url',
              'ie_key': YoutubeIE.ie_key(),
              'id': video_id,
-            'url': f'https://www.youtube.com/watch?v={video_id}',
+            'url': url,
              'title': title,
              'description': description,
              'duration': duration,
@@ -2940,13 +2945,18 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
                  webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
                  video_id, 'initial player response')
  
-        original_clients = clients
+        all_clients = set(clients)
          clients = clients[::-1]
          prs = []
  
-        def append_client(client_name):
-            if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
-                clients.append(client_name)
+        def append_client(*client_names):
+            """ Append the first client name that exists """
+            for client_name in client_names:
+                if client_name in INNERTUBE_CLIENTS:
+                    if client_name not in all_clients:
+                        clients.append(client_name)
+                        all_clients.add(client_name)
+                    return
  
          # Android player_response does not have microFormats which are needed for
          # extraction of some data. So we return the initial_pr with formats
@@ -2992,7 +3002,7 @@ def append_client(client_name):
              if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
                  append_client(client.replace('_agegate', '_creator'))
              elif self._is_agegated(pr):
-                append_client(f'{client}_agegate')
+                append_client(f'{client}_embedded', f'{client.replace("_embedded", "")}_agegate')
  
          if last_error:
              if not len(prs):
@@ -3013,7 +3023,7 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live, durati
          streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
  
          for fmt in streaming_formats:
-            if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
+            if fmt.get('targetDurationSec'):
                  continue
  
              itag = str_or_none(fmt.get('itag'))
@@ -3095,6 +3105,7 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live, durati
                  'fps': int_or_none(fmt.get('fps')) or None,
                  'height': height,
                  'quality': q(quality),
+                'has_drm': bool(fmt.get('drmFamilies')),
                  'tbr': tbr,
                  'url': fmt_url,
                  'width': int_or_none(fmt.get('width')),