]> jfr.im git - yt-dlp.git/commitdiff
[YouTube] Age-gate bypass implementation (#575)
authorMinePlayersPE <redacted>
Tue, 27 Jul 2021 09:40:44 +0000 (16:40 +0700)
committerGitHub <redacted>
Tue, 27 Jul 2021 09:40:44 +0000 (15:10 +0530)
* Calling the API with `clientScreen=EMBED` allows access to most age-gated videos - discovered by @ccdffddfddfdsfedeee (https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136)
* Adds clients: (web/android/ios)_(embedded/agegate), mweb_embedded
* Renamed mobile_web to mweb

Closes #574

Authored by pukkandan, MinePlayersPE

README.md
yt_dlp/extractor/youtube.py

index 3d9edf5906011c6529971fdd5e23fb89d1999615..4a8364e57e31fec833354518ba07e7d34ba8fbf5 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1354,7 +1354,7 @@ # EXTRACTOR ARGUMENTS
 The following extractors use this feature:
 * **youtube**
     * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests
-    * `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mobile_web`, `web_music`, `android_music`, `ios_music` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used
+    * `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mweb`, `web_music`, `android_music`, `ios_music`, `web_embedded`, `android_embedded`, `ios_embedded`, `web_agegate`, `android_agegate`, `ios_agegate`, `mweb_agegate` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used. If age-gate is detected, the `_agegate` variants are automatically added.
     * `player_skip`: `configs` - skip any requests for client configs and use defaults
     * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side).
     * `max_comments`: maximum amount of comments to download (default all).
index 153cb299782e49be76df95b4969267ed9f8993b4..47e3c2f44d3b7e5f35f34ce2488540c19520cd49 100644 (file)
@@ -327,6 +327,21 @@ def _real_initialize(self):
             },
             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
         },
+        'WEB_AGEGATE': {
+            'INNERTUBE_API_VERSION': 'v1',
+            'INNERTUBE_CLIENT_NAME': 'WEB',
+            'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
+            'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'WEB',
+                    'clientVersion': '2.20210622.10.00',
+                    'clientScreen': 'EMBED',
+                    'hl': 'en',
+                }
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 1
+        },
         'WEB_REMIX': {
             'INNERTUBE_API_VERSION': 'v1',
             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
@@ -369,6 +384,21 @@ def _real_initialize(self):
             },
             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
         },
+        'ANDROID_AGEGATE': {
+            'INNERTUBE_API_VERSION': 'v1',
+            'INNERTUBE_CLIENT_NAME': 'ANDROID',
+            'INNERTUBE_CLIENT_VERSION': '16.20',
+            'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'ANDROID',
+                    'clientVersion': '16.20',
+                    'clientScreen': 'EMBED',
+                    'hl': 'en',
+                }
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 3
+        },
         'ANDROID_EMBEDDED_PLAYER': {
             'INNERTUBE_API_VERSION': 'v1',
             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
@@ -410,7 +440,21 @@ def _real_initialize(self):
                 }
             },
             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
-
+        },
+        'IOS_AGEGATE': {
+            'INNERTUBE_API_VERSION': 'v1',
+            'INNERTUBE_CLIENT_NAME': 'IOS',
+            'INNERTUBE_CLIENT_VERSION': '16.20',
+            'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'IOS',
+                    'clientVersion': '16.20',
+                    'clientScreen': 'EMBED',
+                    'hl': 'en',
+                }
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 5
         },
         'IOS_MUSIC': {
             'INNERTUBE_API_VERSION': 'v1',
@@ -454,6 +498,21 @@ def _real_initialize(self):
             },
             'INNERTUBE_CONTEXT_CLIENT_NAME': 2
         },
+        'MWEB_AGEGATE': {
+            'INNERTUBE_API_VERSION': 'v1',
+            'INNERTUBE_CLIENT_NAME': 'MWEB',
+            'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
+            'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'MWEB',
+                    'clientVersion': '2.20210721.07.00',
+                    'clientScreen': 'EMBED',
+                    'hl': 'en',
+                }
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 2
+        },
     }
 
     _YT_DEFAULT_INNERTUBE_HOSTS = {
@@ -467,17 +526,18 @@ def _real_initialize(self):
     _YT_CLIENTS = {
         'android': 'ANDROID',
         'android_music': 'ANDROID_MUSIC',
-        '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
-        '_android_agegate': 'ANDROID',
+        'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
+        'android_agegate': 'ANDROID_AGEGATE',
         'ios': 'IOS',
         'ios_music': 'IOS_MUSIC',
-        '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
-        '_ios_agegate': 'IOS',
+        'ios_embedded': 'IOS_MESSAGES_EXTENSION',
+        'ios_agegate': 'IOS_AGEGATE',
         'web': 'WEB',
         'web_music': 'WEB_REMIX',
-        '_web_embedded': 'WEB_EMBEDDED_PLAYER',
-        '_web_agegate': 'TVHTML5',
-        'mobile_web': 'MWEB',
+        'web_embedded': 'WEB_EMBEDDED_PLAYER',
+        'web_agegate': 'WEB_AGEGATE',
+        'mweb': 'MWEB',
+        'mweb_agegate': 'MWEB_AGEGATE',
     }
 
     def _get_default_ytcfg(self, client='WEB'):
@@ -2366,30 +2426,6 @@ def _generate_player_context(sts=None):
             'racyCheckOk': True
         }
 
-    @staticmethod
-    def _get_video_info_params(video_id, client='TVHTML5'):
-        GVI_CLIENTS = {
-            'ANDROID': {
-                'c': 'ANDROID',
-                'cver': '16.20',
-            },
-            'TVHTML5': {
-                'c': 'TVHTML5',
-                'cver': '6.20180913',
-            },
-            'IOS': {
-                'c': 'IOS',
-                'cver': '16.20'
-            }
-        }
-        query = {
-            'video_id': video_id,
-            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-            'html5': '1'
-        }
-        query.update(GVI_CLIENTS.get(client))
-        return query
-
     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
 
         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
@@ -2408,42 +2444,6 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
         ) or None
 
-    def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
-        # get_video_info endpoint seems to be completely dead
-        gvi_client = None  # self._YT_CLIENTS.get(f'_{client}_agegate')
-        if gvi_client:
-            pr = self._parse_json(traverse_obj(
-                compat_parse_qs(self._download_webpage(
-                    self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
-                    'Refetching age-gated %s info webpage' % gvi_client.lower(),
-                    'unable to download video info webpage', fatal=False,
-                    query=self._get_video_info_params(video_id, client=gvi_client))),
-                ('player_response', 0), expected_type=str) or '{}', video_id)
-            if pr:
-                return pr
-            self.report_warning('Falling back to embedded-only age-gate workaround')
-
-        if not self._YT_CLIENTS.get(f'_{client}_embedded'):
-            return
-        embed_webpage = None
-        if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
-            embed_webpage = self._download_webpage(
-                'https://www.youtube.com/embed/%s?html5=1' % video_id,
-                video_id=video_id, note=f'Downloading age-gated {client} embed config')
-
-        ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
-        # If we extracted the embed webpage, it'll tell us if we can view the video
-        embedded_pr = self._parse_json(
-            traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
-            video_id=video_id)
-        embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
-        if embedded_ps_reason in self._AGE_GATE_REASONS:
-            return
-        return self._extract_player_response(
-            f'_{client}_embedded', video_id,
-            ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
-            identity_token, player_url, initial_pr)
-
     def _get_requested_clients(self, url, smuggled_data):
         requested_clients = []
         allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
@@ -2463,6 +2463,16 @@ def _get_requested_clients(self, url, smuggled_data):
 
         return orderedSet(requested_clients)
 
+    def _extract_player_ytcfg(self, client, video_id):
+        url = {
+            'web_music': 'https://music.youtube.com',
+            'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+        }.get(client)
+        if not url:
+            return {}
+        webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
+        return self.extract_ytcfg(video_id, webpage) or {}
+
     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
         initial_pr = None
         if webpage:
@@ -2470,30 +2480,40 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, pl
                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
                 video_id, 'initial player response')
 
-        for client in clients:
+        original_clients = clients
+        clients = clients[::-1]
+        while clients:
+            client = clients.pop()
             player_ytcfg = master_ytcfg if client == 'web' else {}
-            if client == 'web' and initial_pr:
-                pr = initial_pr
-            else:
-                if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
-                    ytm_webpage = self._download_webpage(
-                        'https://music.youtube.com',
-                        video_id, fatal=False, note='Downloading remix client config')
-                    player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
-                pr = self._extract_player_response(
-                    client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
+            if 'configs' not in self._configuration_arg('player_skip'):
+                player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+                if client == 'web_embedded':
+                    # If we extracted the embed webpage, it'll tell us if we can view the video
+                    embedded_pr = self._parse_json(
+                        traverse_obj(player_ytcfg, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
+                        video_id=video_id)
+                    embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
+                    if embedded_ps_reason in self._AGE_GATE_REASONS:
+                        self.report_warning(f'Youtube said: {embedded_ps_reason}')
+                        continue
+
+            pr = (
+                initial_pr if client == 'web' and initial_pr
+                else self._extract_player_response(
+                    client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
             if pr:
                 yield pr
+
             if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
-                pr = self._extract_age_gated_player_response(
-                    client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
-                if pr:
-                    yield pr
+                client = f'{client}_agegate'
+                if client in self._YT_CLIENTS and client not in original_clients:
+                    clients.append(client)
+
         # Android player_response does not have microFormats which are needed for
         # extraction of some data. So we return the initial_pr with formats
         # stripped out even if not requested by the user
         # See: https://github.com/yt-dlp/yt-dlp/issues/501
-        if initial_pr and 'web' not in clients:
+        if initial_pr and 'web' not in original_clients:
             initial_pr['streamingData'] = None
             yield initial_pr