]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/tennistv.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / tennistv.py
index 5e3eb89380b09338f795683f979f205de9514fa3..197d7892d142d8d94344b9b9ec360f3939ab06b2 100644 (file)
@@ -1,19 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
+import urllib.parse
 
 from .common import InfoExtractor
-
 from ..utils import (
     ExtractorError,
+    random_uuidv4,
     unified_timestamp,
+    urlencode_postdata,
 )
 
 
 class TennisTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
         'info_dict': {
             'id': 'indian-wells-2018-verdasco-fritz',
@@ -28,86 +26,130 @@ class TennisTVIE(InfoExtractor):
             'skip_download': True,
         },
         'skip': 'Requires email and password of a subscribed account',
-    }
+    }, {
+        'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
+        'info_dict': {
+            'id': '2650480',
+            'ext': 'mp4',
+            'title': 'Best Matches of 2022 - Part 5',
+            'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
+            'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
+        },
+        'params': {'skip_download': 'm3u8'},
+        'skip': 'Requires email and password of a subscribed account',
+    }]
     _NETRC_MACHINE = 'tennistv'
 
-    def _login(self):
-        username, password = self._get_login_info()
-        if not username or not password:
-            raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
-
-        login_form = {
-            'Email': username,
-            'Password': password,
-        }
-        login_json = json.dumps(login_form).encode('utf-8')
-        headers = {
-            'content-type': 'application/json',
-            'Referer': 'https://www.tennistv.com/login',
-            'Origin': 'https://www.tennistv.com',
-        }
-
-        login_result = self._download_json(
-            'https://www.tennistv.com/api/users/v1/login', None,
-            note='Logging in',
-            errnote='Login failed (wrong password?)',
-            headers=headers,
-            data=login_json)
+    access_token, refresh_token = None, None
+    _PARTNER_ID = 3001482
+    _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
+    _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
+    _HEADERS = {
+        'origin': 'https://www.tennistv.com',
+        'referer': 'https://www.tennistv.com/',
+        'content-Type': 'application/x-www-form-urlencoded',
+    }
 
-        if login_result['error']['errorCode']:
-            raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+    def _perform_login(self, username, password):
+        login_page = self._download_webpage(
+            f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
+            query={
+                'client_id': 'tennis-tv-web',
+                'redirect_uri': 'https://tennistv.com',
+                'response_mode': 'fragment',
+                'response_type': 'code',
+                'scope': 'openid',
+            })
+
+        post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
+        temp_page = self._download_webpage(
+            post_url, None, 'Sending login data', 'Unable to send login data',
+            headers=self._HEADERS, data=urlencode_postdata({
+                'username': username,
+                'password': password,
+                'submitAction': 'Log In',
+            }))
+        if 'Your username or password was incorrect' in temp_page:
+            raise ExtractorError('Your username or password was incorrect', expected=True)
+
+        handle = self._request_webpage(
+            f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
+            query={
+                'client_id': 'tennis-tv-web',
+                'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+                'state': random_uuidv4(),
+                'response_mode': 'fragment',
+                'response_type': 'code',
+                'scope': 'openid',
+                'nonce': random_uuidv4(),
+                'prompt': 'none',
+            })
+
+        self.get_token(None, {
+            'code': urllib.parse.parse_qs(handle.url)['code'][-1],
+            'grant_type': 'authorization_code',
+            'client_id': 'tennis-tv-web',
+            'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+        })
+
+    def get_token(self, video_id, payload):
+        res = self._download_json(
+            f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
+            'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
+
+        self.access_token = res.get('access_token') or self.access_token
+        self.refresh_token = res.get('refresh_token') or self.refresh_token
 
-        if login_result['entitlement'] != 'SUBSCRIBED':
-            self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+    def _real_initialize(self):
+        if self.access_token and self.refresh_token:
+            return
 
-        self._session_token = login_result['sessionToken']
+        cookies = self._get_cookies('https://www.tennistv.com/')
+        if not cookies.get('access_token') or not cookies.get('refresh_token'):
+            self.raise_login_required()
+        self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
 
-    def _real_initialize(self):
-        self._login()
+    def _download_session_json(self, video_id, entryid):
+        return self._download_json(
+            f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
+            video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(url, video_id)
 
-        internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id')
-
-        headers = {
-            'Origin': 'https://www.tennistv.com',
-            'authorization': 'ATP %s' % self._session_token,
-            'content-type': 'application/json',
-            'Referer': url,
-        }
-        check_data = {
-            'videoID': internal_id,
-            'VideoUrlType': 'HLS',
-        }
-        check_json = json.dumps(check_data).encode('utf-8')
-        check_result = self._download_json(
-            'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
-            video_id, note='Checking video authorization', headers=headers, data=check_json)
-        formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
+        entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
+        session_json = self._download_session_json(video_id, entryid)
 
-        vdata = self._download_json(
-            'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id,
-            video_id, headers=headers)
+        k_session = session_json.get('KSession')
+        if k_session is None:
+            self.get_token(video_id, {
+                'grant_type': 'refresh_token',
+                'refresh_token': self.refresh_token,
+                'client_id': 'tennis-tv-web',
+            })
+            k_session = self._download_session_json(video_id, entryid).get('KSession')
+            if k_session is None:
+                raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
 
-        timestamp = unified_timestamp(vdata['timestamp'])
-        thumbnail = vdata['video']['thumbnailUrl']
-        description = vdata['displayText']['description']
-        title = vdata['video']['title']
+        if session_json.get('ErrorMessage'):
+            self.report_warning(session_json['ErrorMessage'])
 
-        series = vdata['tour']
-        venue = vdata['displayText']['venue']
-        round_str = vdata['seo']['round']
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
 
         return {
             'id': video_id,
-            'title': title,
-            'description': description,
+            'title': self._generic_title('', webpage),
+            'description': self._html_search_regex(
+                (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
+                webpage, 'description', fatal=False),
+            'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
+            'timestamp': unified_timestamp(self._html_search_regex(
+                r'<span itemprop="uploadDate" content=["\']([^"\']+)["\']>', webpage, 'upload time', fatal=False)),
+            'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
+            'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
+            'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
             'formats': formats,
-            'thumbnail': thumbnail,
-            'timestamp': timestamp,
-            'series': series,
-            'season': venue,
-            'episode': round_str,
+            'subtitles': subtitles,
         }