]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/rokfin.py
[ie/mlbtv] Fix extraction (#10296)
[yt-dlp.git] / yt_dlp / extractor / rokfin.py
index ad53d697e75e960512523865e418be860c2e1fc3..be174655e028f6c4ccea1e3a8375b18e808c693a 100644 (file)
@@ -1,8 +1,8 @@
+import datetime as dt
 import itertools
 import json
 import re
 import urllib.parse
-from datetime import datetime
 
 from .common import InfoExtractor, SearchInfoExtractor
 from ..utils import (
@@ -38,14 +38,14 @@ class RokfinIE(InfoExtractor):
             'upload_date': '20211023',
             'timestamp': 1634998029,
             'channel': 'Jimmy Dore',
-            'channel_id': 65429,
+            'channel_id': '65429',
             'channel_url': 'https://rokfin.com/TheJimmyDoreShow',
-            'duration': 213.0,
             'availability': 'public',
             'live_status': 'not_live',
             'dislike_count': int,
             'like_count': int,
-        }
+            'duration': 213,
+        },
     }, {
         'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time',
         'info_dict': {
@@ -56,14 +56,14 @@ class RokfinIE(InfoExtractor):
             'upload_date': '20190412',
             'timestamp': 1555052644,
             'channel': 'Ron Placone',
-            'channel_id': 10,
+            'channel_id': '10',
             'channel_url': 'https://rokfin.com/RonPlacone',
             'availability': 'public',
             'live_status': 'not_live',
             'dislike_count': int,
             'like_count': int,
             'tags': ['FreeThinkingMedia^', 'RealProgressives^'],
-        }
+        },
     }, {
         'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data',
         'info_dict': {
@@ -72,8 +72,8 @@ class RokfinIE(InfoExtractor):
             'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data',
             'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
             'description': 'md5:324ce2d3e3b62e659506409e458b9d8e',
-            'channel': 'Ryan Cristián',
-            'channel_id': 53856,
+            'channel': 'TLAVagabond',
+            'channel_id': '53856',
             'channel_url': 'https://rokfin.com/TLAVagabond',
             'availability': 'public',
             'is_live': False,
@@ -86,7 +86,47 @@ class RokfinIE(InfoExtractor):
             'dislike_count': int,
             'like_count': int,
             'tags': ['FreeThinkingMedia^'],
-        }
+        },
+    }, {
+        'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer',
+        'info_dict': {
+            'id': 'post/126703',
+            'ext': 'mp4',
+            'title': 'Brave New World - Aldous Huxley DEEPDIVE!  (Chpts 1-3) - Quite Frankly & Jay Dyer',
+            'thumbnail': r're:https://img\.production\.rokfin\.com/.+',
+            'channel': 'Jay Dyer',
+            'channel_id': '186881',
+            'channel_url': 'https://rokfin.com/jaydyer',
+            'availability': 'premium_only',
+            'live_status': 'not_live',
+            'dislike_count': int,
+            'like_count': int,
+            'timestamp': 1678213357,
+            'upload_date': '20230307',
+            'tags': ['FreeThinkingMedia^', 'OpenMind^'],
+            'description': 'md5:cb04e32e68326c9b2b251b297bacff35',
+            'duration': 3100,
+        },
+    }, {
+        'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game',
+        'info_dict': {
+            'id': 'stream/31332',
+            'ext': 'mp4',
+            'title': 'The Grayzone live on Nordstream blame game',
+            'thumbnail': r're:https://image\.v\.rokfin\.com/.+',
+            'channel': 'Max Blumenthal',
+            'channel_id': '248902',
+            'channel_url': 'https://rokfin.com/MaxBlumenthal',
+            'availability': 'premium_only',
+            'live_status': 'was_live',
+            'dislike_count': int,
+            'like_count': int,
+            'timestamp': 1678475166,
+            'release_timestamp': 1678475166.0,
+            'release_date': '20230310',
+            'upload_date': '20230310',
+            'tags': ['FreeThinkingMedia^'],
+        },
     }]
 
     def _real_extract(self, url):
@@ -100,6 +140,12 @@ def _real_extract(self, url):
                        else 'not_live')
 
         video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none)
+        if video_url in (None, 'fake.m3u8'):
+            video_url = format_field(self._search_regex(
+                r'https?://[^/]+/([^/]+)/storyboard.vtt',
+                traverse_obj(metadata, 'timelineUrl', ('content', 'timelineUrl'), expected_type=url_or_none),
+                video_id, default=None), None, 'https://stream.v.rokfin.com/%s.m3u8')
+
         formats, subtitles = [{'url': video_url}] if video_url else [], {}
         if determine_ext(video_url) == 'm3u8':
             formats, subtitles = self._extract_m3u8_formats_and_subtitles(
@@ -110,9 +156,8 @@ def _real_extract(self, url):
                 self.raise_login_required('This video is only available to premium users', True, method='cookies')
             elif scheduled:
                 self.raise_no_formats(
-                    f'Stream is offline; sheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
+                    f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
                     video_id=video_id, expected=True)
-        self._sort_formats(formats)
 
         uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
         timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000)
@@ -128,7 +173,7 @@ def _real_extract(self, url):
             'like_count': int_or_none(metadata.get('likeCount')),
             'dislike_count': int_or_none(metadata.get('dislikeCount')),
             'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))),
-            'channel_id': traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id')),
+            'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))),
             'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None,
             'timestamp': timestamp,
             'release_timestamp': timestamp if live_status != 'not_live' else None,
@@ -146,7 +191,7 @@ def _get_comments(self, video_id):
         for page_n in itertools.count():
             raw_comments = self._download_json(
                 f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
-                video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
+                video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
                 fatal=False) or {}
 
             for comment in raw_comments.get('content') or []:
@@ -158,7 +203,7 @@ def _get_comments(self, video_id):
                     'parent': 'root',
                     'like_count': int_or_none(comment.get('numLikes')),
                     'dislike_count': int_or_none(comment.get('numDislikes')),
-                    'timestamp': unified_timestamp(comment.get('postedAt'))
+                    'timestamp': unified_timestamp(comment.get('postedAt')),
                 }
 
             pages_total = int_or_none(raw_comments.get('totalPages')) or None
@@ -198,10 +243,10 @@ def _perform_login(self, username, password):
             f'{self._AUTH_BASE}/token', None,
             note='getting access credentials', errnote='error getting access credentials',
             data=urlencode_postdata({
-                'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0],
+                'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0],
                 'client_id': 'web',
                 'grant_type': 'authorization_code',
-                'redirect_uri': 'https://rokfin.com/silent-check-sso.html'
+                'redirect_uri': 'https://rokfin.com/silent-check-sso.html',
             }))
 
     def _authentication_active(self):
@@ -222,7 +267,7 @@ def _download_json_using_access_token(self, url_or_request, video_id, headers={}
 
         json_string, urlh = self._download_webpage_handle(
             url_or_request, video_id, headers=headers, query=query, expected_status=401)
-        if not auth_token or urlh.code != 401 or refresh_token is None:
+        if not auth_token or urlh.status != 401 or refresh_token is None:
             return self._parse_json(json_string, video_id)
 
         self._access_mgmt_tokens = self._download_json(
@@ -231,7 +276,7 @@ def _download_json_using_access_token(self, url_or_request, video_id, headers={}
             data=urlencode_postdata({
                 'grant_type': 'refresh_token',
                 'refresh_token': refresh_token,
-                'client_id': 'web'
+                'client_id': 'web',
             }))
         headers['authorization'] = self._get_auth_token()
         if headers['authorization'] is None:
@@ -318,7 +363,7 @@ def _entries(self, channel_id, channel_name, tab):
                 data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
             metadata = self._download_json(
                 data_url, channel_name,
-                note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
+                note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
 
             yield from self._get_video_data(metadata)
             pages_total = int_or_none(metadata.get('totalPages')) or None
@@ -354,13 +399,13 @@ class RokfinSearchIE(SearchInfoExtractor):
         'info_dict': {
             'id': '"zelenko"',
             'title': '"zelenko"',
-        }
+        },
     }]
     _db_url = None
     _db_access_key = None
 
     def _real_initialize(self):
-        self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None))
+        self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None))
         if not self._db_url:
             self._get_db_access_credentials()
 
@@ -369,7 +414,7 @@ def _search_results(self, query):
         for page_number in itertools.count(1):
             search_results = self._run_search_query(
                 query, data={'query': query, 'page': {'size': 100, 'current': page_number}},
-                note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}')
+                note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
             total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none)
 
             for result in search_results.get('results') or []:
@@ -405,6 +450,6 @@ def _get_db_access_credentials(self, video_id=None):
 
             self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
             self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}'
-            self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
+            self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
             return
         raise ExtractorError('Unable to extract access credentials')