]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/slideslive.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / slideslive.py
index 3d36edbbc3b15a4419b5e0d12536e74f6c596f9b..e684ac7b8e083758e3fd3e773f52b22e70c0c497 100644 (file)
@@ -1,5 +1,6 @@
 import re
 import urllib.parse
+import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -24,8 +25,8 @@ class SlidesLiveIE(InfoExtractor):
             'id': '38902413',
             'ext': 'mp4',
             'title': 'GCC IA16 backend',
-            'timestamp': 1648189972,
-            'upload_date': '20220325',
+            'timestamp': 1697793372,
+            'upload_date': '20231020',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnails': 'count:42',
             'chapters': 'count:41',
@@ -41,8 +42,8 @@ class SlidesLiveIE(InfoExtractor):
             'id': '38935785',
             'ext': 'mp4',
             'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
-            'upload_date': '20211115',
-            'timestamp': 1636996003,
+            'upload_date': '20231020',
+            'timestamp': 1697807002,
             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'thumbnails': 'count:640',
             'chapters': 'count:639',
@@ -58,9 +59,9 @@ class SlidesLiveIE(InfoExtractor):
             'id': '38973182',
             'ext': 'mp4',
             'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
-            'upload_date': '20220201',
+            'upload_date': '20231020',
             'thumbnail': r're:^https?://.*\.jpg',
-            'timestamp': 1643728135,
+            'timestamp': 1697822521,
             'thumbnails': 'count:3',
             'chapters': 'count:2',
             'duration': 5889,
@@ -69,37 +70,22 @@ class SlidesLiveIE(InfoExtractor):
             'skip_download': 'm3u8',
         },
     }, {
-        # service_name = youtube, only XML slides info
+        # formerly youtube, converted to native
         'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
         'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
         'info_dict': {
-            'id': 'jmg02wCJD5M',
-            'display_id': '38897546',
+            'id': '38897546',
             'ext': 'mp4',
             'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
-            'description': 'Watch full version of this video at https://slideslive.com/38897546.',
-            'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
-            'channel': 'SlidesLive Videos - G1',
-            'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
-            'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
-            'uploader': 'SlidesLive Videos - G1',
-            'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
-            'live_status': 'not_live',
-            'upload_date': '20160710',
-            'timestamp': 1618786715,
-            'duration': 6827,
-            'like_count': int,
-            'view_count': int,
-            'comment_count': int,
-            'channel_follower_count': int,
-            'age_limit': 0,
-            'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'upload_date': '20231029',
+            'timestamp': 1698588144,
             'thumbnails': 'count:169',
-            'playable_in_embed': True,
-            'availability': 'unlisted',
-            'tags': [],
-            'categories': ['People & Blogs'],
             'chapters': 'count:168',
+            'duration': 6827,
+        },
+        'params': {
+            'skip_download': 'm3u8',
         },
     }, {
         # embed-only presentation, only XML slides info
@@ -110,8 +96,8 @@ class SlidesLiveIE(InfoExtractor):
             'title': 'Towards a Deep Network Architecture for Structured Smoothness',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnails': 'count:8',
-            'timestamp': 1629671508,
-            'upload_date': '20210822',
+            'timestamp': 1697803109,
+            'upload_date': '20231020',
             'chapters': 'count:7',
             'duration': 326,
         },
@@ -127,8 +113,8 @@ class SlidesLiveIE(InfoExtractor):
             'title': 'MoReL: Multi-omics Relational Learning',
             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'thumbnails': 'count:7',
-            'timestamp': 1654714970,
-            'upload_date': '20220608',
+            'timestamp': 1697824939,
+            'upload_date': '20231020',
             'chapters': 'count:6',
             'duration': 171,
         },
@@ -144,8 +130,8 @@ class SlidesLiveIE(InfoExtractor):
             'title': 'Decentralized Attribution of Generative Models',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnails': 'count:16',
-            'timestamp': 1622806321,
-            'upload_date': '20210604',
+            'timestamp': 1697814901,
+            'upload_date': '20231020',
             'chapters': 'count:15',
             'duration': 306,
         },
@@ -161,8 +147,8 @@ class SlidesLiveIE(InfoExtractor):
             'title': 'Efficient Active Search for Combinatorial Optimization Problems',
             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'thumbnails': 'count:9',
-            'timestamp': 1654714896,
-            'upload_date': '20220608',
+            'timestamp': 1697824757,
+            'upload_date': '20231020',
             'chapters': 'count:8',
             'duration': 295,
         },
@@ -176,10 +162,10 @@ class SlidesLiveIE(InfoExtractor):
             'id': '38979880',
             'ext': 'mp4',
             'title': 'The Representation Power of Neural Networks',
-            'timestamp': 1654714962,
+            'timestamp': 1697824919,
             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'thumbnails': 'count:22',
-            'upload_date': '20220608',
+            'upload_date': '20231020',
             'chapters': 'count:21',
             'duration': 294,
         },
@@ -199,10 +185,10 @@ class SlidesLiveIE(InfoExtractor):
                 'id': '38979682',
                 'ext': 'mp4',
                 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
-                'timestamp': 1654714920,
+                'timestamp': 1697824815,
                 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                 'thumbnails': 'count:30',
-                'upload_date': '20220608',
+                'upload_date': '20231020',
                 'chapters': 'count:31',
                 'duration': 272,
             },
@@ -212,8 +198,8 @@ class SlidesLiveIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
                 'duration': 3,
-                'timestamp': 1654714920,
-                'upload_date': '20220608',
+                'timestamp': 1697824815,
+                'upload_date': '20231020',
             },
         }, {
             'info_dict': {
@@ -221,8 +207,8 @@ class SlidesLiveIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
                 'duration': 4,
-                'timestamp': 1654714920,
-                'upload_date': '20220608',
+                'timestamp': 1697824815,
+                'upload_date': '20231020',
             },
         }],
         'params': {
@@ -241,10 +227,10 @@ class SlidesLiveIE(InfoExtractor):
                 'id': '38979481',
                 'ext': 'mp4',
                 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
-                'timestamp': 1654714877,
+                'timestamp': 1697824716,
                 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                 'thumbnails': 'count:43',
-                'upload_date': '20220608',
+                'upload_date': '20231020',
                 'chapters': 'count:43',
                 'duration': 315,
             },
@@ -254,8 +240,8 @@ class SlidesLiveIE(InfoExtractor):
                 'ext': 'mp4',
                 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
                 'duration': 3,
-                'timestamp': 1654714877,
-                'upload_date': '20220608',
+                'timestamp': 1697824716,
+                'upload_date': '20231020',
             },
         }],
         'params': {
@@ -274,10 +260,10 @@ class SlidesLiveIE(InfoExtractor):
             'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
             'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
             'uploader': 'SlidesLive Videos - A',
-            'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
-            'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
+            'uploader_id': '@slideslivevideos-a6075',
+            'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
             'upload_date': '20200903',
-            'timestamp': 1602599092,
+            'timestamp': 1697805922,
             'duration': 942,
             'age_limit': 0,
             'live_status': 'not_live',
@@ -302,8 +288,8 @@ class SlidesLiveIE(InfoExtractor):
             'id': '38983994',
             'ext': 'mp4',
             'title': 'Zero-Shot AutoML with Pretrained Models',
-            'timestamp': 1662384834,
-            'upload_date': '20220905',
+            'timestamp': 1697826708,
+            'upload_date': '20231020',
             'thumbnail': r're:^https?://.*\.(?:jpg|png)',
             'thumbnails': 'count:23',
             'chapters': 'count:22',
@@ -335,8 +321,8 @@ class SlidesLiveIE(InfoExtractor):
             'title': 'Towards a Deep Network Architecture for Structured Smoothness',
             'thumbnail': r're:^https?://.*\.jpg',
             'thumbnails': 'count:8',
-            'timestamp': 1629671508,
-            'upload_date': '20210822',
+            'timestamp': 1697803109,
+            'upload_date': '20231020',
             'chapters': 'count:7',
             'duration': 326,
         },
@@ -385,7 +371,7 @@ def _extract_custom_m3u8_info(self, m3u8_data):
             if not line.startswith('#EXT-SL-'):
                 continue
             tag, _, value = line.partition(':')
-            key = lookup.get(tag.lstrip('#EXT-SL-'))
+            key = lookup.get(tag[8:])
             if not key:
                 continue
             m3u8_dict[key] = value
@@ -426,7 +412,7 @@ def _real_extract(self, url):
             video_id, headers=traverse_obj(parse_qs(url), {
                 'Referer': ('embed_parent_url', -1),
                 'Origin': ('embed_container_origin', -1)}))
-        redirect_url = urlh.geturl()
+        redirect_url = urlh.url
         if 'domain_not_allowed' in redirect_url:
             domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
             if not domain:
@@ -469,11 +455,12 @@ def _real_extract(self, url):
             slides = self._download_xml(
                 player_info['slides_xml_url'], video_id, fatal=False,
                 note='Downloading slides XML', errnote='Failed to download slides info')
-            slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
-            for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
-                slides_info.append((
-                    slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
-                    int_or_none(xpath_text(slide, './timeSec', 'time'))))
+            if isinstance(slides, xml.etree.ElementTree.Element):
+                slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
+                for slide_id, slide in enumerate(slides.findall('./slide')):
+                    slides_info.append((
+                        slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
+                        int_or_none(xpath_text(slide, './timeSec', 'time'))))
 
         chapters, thumbnails = [], []
         if url_or_none(player_info.get('thumbnail')):
@@ -528,7 +515,7 @@ def _real_extract(self, url):
             if service_name == 'vimeo':
                 info['url'] = smuggle_url(
                     f'https://player.vimeo.com/video/{service_id}',
-                    {'http_headers': {'Referer': url}})
+                    {'referer': url})
 
         video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
         if not video_slides:
@@ -545,7 +532,7 @@ def entries():
                 }, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
 
             for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
-                if not traverse_obj(slide, ('video', 'service')) == 'yoda':
+                if traverse_obj(slide, ('video', 'service')) != 'yoda':
                     continue
                 video_path = traverse_obj(slide, ('video', 'id'))
                 cdn_hostname = traverse_obj(service_data, (