]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/trovo.py
[ie/crunchyroll] Fix stream extraction (#10005)
[yt-dlp.git] / yt_dlp / extractor / trovo.py
index c049025a3edc37a7877f7a6a1650cbc39723dc73..545a67275ad09194dc43c38ccc88f474560fc0ea 100644 (file)
@@ -9,6 +9,7 @@
     format_field,
     int_or_none,
     str_or_none,
+    traverse_obj,
     try_get,
 )
 
@@ -26,7 +27,7 @@ def _call_api(self, video_id, data):
         resp = self._download_json(
             url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'},
             query={
-                'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)),
+                'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
             })[0]
         if 'errors' in resp:
             raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}')
@@ -38,12 +39,32 @@ def _extract_streamer_info(self, data):
         return {
             'uploader': streamer_info.get('nickName'),
             'uploader_id': str_or_none(streamer_info.get('uid')),
-            'uploader_url': format_field(username, template='https://trovo.live/%s'),
+            'uploader_url': format_field(username, None, 'https://trovo.live/%s'),
         }
 
 
 class TrovoIE(TrovoBaseIE):
-    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
+    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:s/)?(?!(?:clip|video)/)(?P<id>(?!s/)[^/?&#]+(?![^#]+[?&]vid=))'
+    _TESTS = [{
+        'url': 'https://trovo.live/Exsl',
+        'only_matching': True,
+    }, {
+        'url': 'https://trovo.live/s/SkenonSLive/549759191497',
+        'only_matching': True,
+    }, {
+        'url': 'https://trovo.live/s/zijo987/208251706',
+        'info_dict': {
+            'id': '104125853_104125853_1656439572',
+            'ext': 'flv',
+            'uploader_url': 'https://trovo.live/zijo987',
+            'uploader_id': '104125853',
+            'thumbnail': 'https://livecover.trovo.live/screenshot/73846_104125853_104125853-2022-06-29-04-00-22-852x480.jpg',
+            'uploader': 'zijo987',
+            'title': 'šŸ’„IGRAMO IGRICE UPADAJTEšŸ’„2500/5000 2022-06-28 22:01',
+            'live_status': 'is_live',
+        },
+        'skip': 'May not be live'
+    }]
 
     def _real_extract(self, url):
         username = self._match_id(url)
@@ -71,9 +92,9 @@ def _real_extract(self, url):
                 'format_id': format_id,
                 'height': int_or_none(format_id[:-1]) if format_id else None,
                 'url': play_url,
+                'tbr': stream_info.get('bitrate'),
                 'http_headers': self._HEADERS,
             })
-        self._sort_formats(formats)
 
         info = {
             'id': program_id,
@@ -87,7 +108,7 @@ def _real_extract(self, url):
 
 
 class TrovoVodIE(TrovoBaseIE):
-    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
+    _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video|s)/(?:[^/]+/\d+[^#]*[?&]vid=)?(?P<id>(?<!/s/)[^/?&#]+)'
     _TESTS = [{
         'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos',
         'params': {'getcomments': True},
@@ -108,9 +129,49 @@ class TrovoVodIE(TrovoBaseIE):
             'uploader_url': 'https://trovo.live/OneTappedYou',
             'thumbnail': r're:^https?://.*\.jpg',
         },
+    }, {
+        'url': 'https://trovo.live/s/SkenonSLive/549759191497?vid=ltv-100829718_100829718_387702301737980280',
+        'info_dict': {
+            'id': 'ltv-100829718_100829718_387702301737980280',
+            'ext': 'mp4',
+            'timestamp': 1654909624,
+            'thumbnail': 'http://vod.trovo.live/1f09baf0vodtransger1301120758/ef9ea3f0387702301737980280/coverBySnapshot/coverBySnapshot_10_0.jpg',
+            'uploader_id': '100829718',
+            'uploader': 'SkenonSLive',
+            'title': 'Trovo u secanju, uz par modova i muzike :)',
+            'uploader_url': 'https://trovo.live/SkenonSLive',
+            'duration': 10830,
+            'view_count': int,
+            'like_count': int,
+            'upload_date': '20220611',
+            'comment_count': int,
+            'categories': ['Minecraft'],
+        },
+        'skip': 'Not available',
+    }, {
+        'url': 'https://trovo.live/s/Trovo/549756886599?vid=ltv-100264059_100264059_387702304241698583',
+        'info_dict': {
+            'id': 'ltv-100264059_100264059_387702304241698583',
+            'ext': 'mp4',
+            'timestamp': 1661479563,
+            'thumbnail': 'http://vod.trovo.live/be5ae591vodtransusw1301120758/cccb9915387702304241698583/coverBySnapshot/coverBySnapshot_10_0.jpg',
+            'uploader_id': '100264059',
+            'uploader': 'Trovo',
+            'title': 'Dev Corner 8/25',
+            'uploader_url': 'https://trovo.live/Trovo',
+            'duration': 3753,
+            'view_count': int,
+            'like_count': int,
+            'upload_date': '20220826',
+            'comment_count': int,
+            'categories': ['Talk Shows'],
+        },
     }, {
         'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
         'only_matching': True,
+    }, {
+        'url': 'https://trovo.live/s/SkenonSLive/549759191497?foo=bar&vid=ltv-100829718_100829718_387702301737980280',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -120,22 +181,20 @@ def _real_extract(self, url):
         # however that seems unreliable - sometimes it randomly doesn't return the data,
         # at least when using a non-residential IP.
         resp = self._call_api(vid, data={
-            'operationName': 'batchGetVodDetailInfo',
+            'operationName': 'vod_VodReaderService_BatchGetVodDetailInfo',
             'variables': {
                 'params': {
                     'vids': [vid],
                 },
             },
-            'extensions': {
-                'persistedQuery': {
-                    'version': 1,
-                    'sha256Hash': 'ceae0355d66476e21a1dd8e8af9f68de95b4019da2cda8b177c9a2255dad31d0',
-                },
-            },
+            'extensions': {},
         })
-        vod_detail_info = resp['VodDetailInfos'][vid]
-        vod_info = vod_detail_info['vodInfo']
-        title = vod_info['title']
+
+        vod_detail_info = traverse_obj(resp, ('VodDetailInfos', vid), expected_type=dict)
+        if not vod_detail_info:
+            raise ExtractorError('This video not found or not available anymore', expected=True)
+        vod_info = vod_detail_info.get('vodInfo')
+        title = vod_info.get('title')
 
         if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'):
             playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting']
@@ -162,7 +221,6 @@ def _real_extract(self, url):
                 'url': play_url,
                 'http_headers': self._HEADERS,
             })
-        self._sort_formats(formats)
 
         category = vod_info.get('categoryName')
         get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
@@ -186,7 +244,7 @@ def _real_extract(self, url):
     def _get_comments(self, vid):
         for page in itertools.count(1):
             comments_json = self._call_api(vid, data={
-                'operationName': 'getCommentList',
+                'operationName': 'public_CommentProxyService_GetCommentList',
                 'variables': {
                     'params': {
                         'appInfo': {
@@ -198,10 +256,7 @@ def _get_comments(self, vid):
                     },
                 },
                 'extensions': {
-                    'persistedQuery': {
-                        'version': 1,
-                        'sha256Hash': 'be8e5f9522ddac7f7c604c0d284fd22481813263580849926c4c66fb767eed25',
-                    },
+                    'singleReq': 'true',
                 },
             })
             for comment in comments_json['commentList']:
@@ -224,33 +279,37 @@ def _get_comments(self, vid):
 
 
 class TrovoChannelBaseIE(TrovoBaseIE):
-    def _get_vod_json(self, page, uid):
-        raise NotImplementedError('This method must be implemented by subclasses')
-
-    def _entries(self, uid):
+    def _entries(self, spacename):
         for page in itertools.count(1):
-            vod_json = self._get_vod_json(page, uid)
+            vod_json = self._call_api(spacename, data={
+                'operationName': self._OPERATION,
+                'variables': {
+                    'params': {
+                        'terminalSpaceID': {
+                            'spaceName': spacename,
+                        },
+                        'currPage': page,
+                        'pageSize': 99,
+                    },
+                },
+                'extensions': {
+                    'singleReq': 'true',
+                },
+            })
             vods = vod_json.get('vodInfos', [])
             for vod in vods:
+                vid = vod.get('vid')
+                room = traverse_obj(vod, ('spaceInfo', 'roomID'))
                 yield self.url_result(
-                    'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')),
+                    f'https://trovo.live/s/{spacename}/{room}?vid={vid}',
                     ie=TrovoVodIE.ie_key())
-            has_more = vod_json['hasMore']
+            has_more = vod_json.get('hasMore')
             if not has_more:
                 break
 
     def _real_extract(self, url):
-        id = self._match_id(url)
-        live_info = self._call_api(id, data={
-            'operationName': 'live_LiveReaderService_GetLiveInfo',
-            'variables': {
-                'params': {
-                    'userName': id,
-                },
-            },
-        })
-        uid = str(live_info['streamerInfo']['uid'])
-        return self.playlist_result(self._entries(uid), playlist_id=uid)
+        spacename = self._match_id(url)
+        return self.playlist_result(self._entries(spacename), playlist_id=spacename)
 
 
 class TrovoChannelVodIE(TrovoChannelBaseIE):
@@ -261,29 +320,11 @@ class TrovoChannelVodIE(TrovoChannelBaseIE):
         'url': 'trovovod:OneTappedYou',
         'playlist_mincount': 24,
         'info_dict': {
-            'id': '100719456',
+            'id': 'OneTappedYou',
         },
     }]
 
-    _TYPE = 'video'
-
-    def _get_vod_json(self, page, uid):
-        return self._call_api(uid, data={
-            'operationName': 'getChannelLtvVideoInfos',
-            'variables': {
-                'params': {
-                    'channelID': int(uid),
-                    'pageSize': 99,
-                    'currPage': page,
-                },
-            },
-            'extensions': {
-                'persistedQuery': {
-                    'version': 1,
-                    'sha256Hash': '78fe32792005eab7e922cafcdad9c56bed8bbc5f5df3c7cd24fcb84a744f5f78',
-                },
-            },
-        })
+    _OPERATION = 'vod_VodReaderService_GetChannelLtvVideoInfos'
 
 
 class TrovoChannelClipIE(TrovoChannelBaseIE):
@@ -294,26 +335,8 @@ class TrovoChannelClipIE(TrovoChannelBaseIE):
         'url': 'trovoclip:OneTappedYou',
         'playlist_mincount': 29,
         'info_dict': {
-            'id': '100719456',
+            'id': 'OneTappedYou',
         },
     }]
 
-    _TYPE = 'clip'
-
-    def _get_vod_json(self, page, uid):
-        return self._call_api(uid, data={
-            'operationName': 'getChannelClipVideoInfos',
-            'variables': {
-                'params': {
-                    'channelID': int(uid),
-                    'pageSize': 99,
-                    'currPage': page,
-                },
-            },
-            'extensions': {
-                'persistedQuery': {
-                    'version': 1,
-                    'sha256Hash': 'e7924bfe20059b5c75fc8ff9e7929f43635681a7bdf3befa01072ed22c8eff31',
-                },
-            },
-        })
+    _OPERATION = 'vod_VodReaderService_GetChannelClipVideoInfos'