]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/bitchute.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / bitchute.py
index 10e7b0b2bb53b3eacb93671063381331596bacd6..c74f34c2a9679edde64804a386d44c6ed1d64e61 100644 (file)
@@ -2,13 +2,15 @@
 import re
 
 from .common import InfoExtractor
+from ..networking import HEADRequest
 from ..utils import (
     ExtractorError,
-    HEADRequest,
     OnDemandPagedList,
     clean_html,
+    extract_attributes,
     get_element_by_class,
     get_element_by_id,
+    get_element_html_by_class,
     get_elements_html_by_class,
     int_or_none,
     orderedSet,
@@ -17,6 +19,7 @@
     traverse_obj,
     unified_strdate,
     urlencode_postdata,
+    urljoin,
 )
 
 
@@ -34,6 +37,25 @@ class BitChuteIE(InfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'BitChute',
             'upload_date': '20170103',
+            'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+            'channel': 'BitChute',
+            'channel_url': 'https://www.bitchute.com/channel/bitchute/',
+        },
+    }, {
+        # test case: video with different channel and uploader
+        'url': 'https://www.bitchute.com/video/Yti_j9A-UZ4/',
+        'md5': 'f10e6a8e787766235946d0868703f1d0',
+        'info_dict': {
+            'id': 'Yti_j9A-UZ4',
+            'ext': 'mp4',
+            'title': 'Israel at War | Full Measure',
+            'description': 'md5:38cf7bc6f42da1a877835539111c69ef',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'sharylattkisson',
+            'upload_date': '20231106',
+            'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/',
+            'channel': 'Full Measure with Sharyl Attkisson',
+            'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/',
         },
     }, {
         # video not downloadable in browser, but we can recover it
@@ -48,6 +70,9 @@ class BitChuteIE(InfoExtractor):
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'BitChute',
             'upload_date': '20181113',
+            'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+            'channel': 'BitChute',
+            'channel_url': 'https://www.bitchute.com/channel/bitchute/',
         },
         'params': {'check_formats': None},
     }, {
@@ -77,7 +102,10 @@ class BitChuteIE(InfoExtractor):
     def _check_format(self, video_url, video_id):
         urls = orderedSet(
             re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
-            for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
+            for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
+                         'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
+                         'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
+                         'seedp29xb', 'zb10-7gsop1v78'))
         for url in urls:
             try:
                 response = self._request_webpage(
@@ -87,7 +115,7 @@ def _check_format(self, video_url, video_id):
                 continue
             return {
                 'url': url,
-                'filesize': int_or_none(response.headers.get('Content-Length'))
+                'filesize': int_or_none(response.headers.get('Content-Length')),
             }
 
     def _raise_if_restricted(self, webpage):
@@ -96,6 +124,11 @@ def _raise_if_restricted(self, webpage):
             reason = clean_html(get_element_by_id('page-detail', webpage)) or page_title
             self.raise_geo_restricted(reason)
 
+    @staticmethod
+    def _make_url(html):
+        path = extract_attributes(get_element_html_by_class('spa', html) or '').get('href')
+        return urljoin('https://www.bitchute.com', path)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
@@ -118,12 +151,19 @@ def _real_extract(self, url):
                 'Video is unavailable. Please make sure this video is playable in the browser '
                 'before reporting this issue.', expected=True, video_id=video_id)
 
+        details = get_element_by_class('details', webpage) or ''
+        uploader_html = get_element_html_by_class('creator', details) or ''
+        channel_html = get_element_html_by_class('name', details) or ''
+
         return {
             'id': video_id,
             'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
             'description': self._og_search_description(webpage, default=None),
             'thumbnail': self._og_search_thumbnail(webpage),
-            'uploader': clean_html(get_element_by_class('owner', webpage)),
+            'uploader': clean_html(uploader_html),
+            'uploader_url': self._make_url(uploader_html),
+            'channel': clean_html(channel_html),
+            'channel_url': self._make_url(channel_html),
             'upload_date': unified_strdate(self._search_regex(
                 r'at \d+:\d+ UTC on (.+?)\.', publish_date, 'upload date', fatal=False)),
             'formats': formats,
@@ -145,16 +185,18 @@ class BitChuteChannelIE(InfoExtractor):
                 'info_dict': {
                     'id': 'UGlrF9o9b-Q',
                     'ext': 'mp4',
-                    'filesize': None,
                     'title': 'This is the first video on #BitChute !',
                     'description': 'md5:a0337e7b1fe39e32336974af8173a034',
                     'thumbnail': r're:^https?://.*\.jpg$',
                     'uploader': 'BitChute',
                     'upload_date': '20170103',
+                    'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/',
+                    'channel': 'BitChute',
+                    'channel_url': 'https://www.bitchute.com/channel/bitchute/',
                     'duration': 16,
                     'view_count': int,
                 },
-            }
+            },
         ],
         'params': {
             'skip_download': True,
@@ -166,8 +208,8 @@ class BitChuteChannelIE(InfoExtractor):
         'info_dict': {
             'id': 'wV9Imujxasw9',
             'title': 'Bruce MacDonald and "The Light of Darkness"',
-            'description': 'md5:04913227d2714af1d36d804aa2ab6b1e',
-        }
+            'description': 'md5:747724ef404eebdfc04277714f81863e',
+        },
     }]
 
     _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@@ -182,7 +224,7 @@ class BitChuteChannelIE(InfoExtractor):
             'container': 'playlist-video',
             'title': 'title',
             'description': 'description',
-        }
+        },
 
     }