]> jfr.im git - yt-dlp.git/commitdiff
[ie/Viously] Add extractor (#8927)
authorMax <redacted>
Tue, 9 Jan 2024 03:11:52 +0000 (03:11 +0000)
committerGitHub <redacted>
Tue, 9 Jan 2024 03:11:52 +0000 (04:11 +0100)
Replaces Turbo extractor

Authored by: nbr23, seproDev

Co-authored-by: sepro <redacted>
yt_dlp/extractor/_extractors.py
yt_dlp/extractor/turbo.py [deleted file]
yt_dlp/extractor/viously.py [new file with mode: 0644]

index 6f7a1e4f1064f81b70cf640ae317cd00fa4fc88b..557ff9447042ecde3e32d9df7c28e3122a655794 100644 (file)
     TuneInPodcastEpisodeIE,
     TuneInShortenerIE,
 )
-from .turbo import TurboIE
 from .tv2 import (
     TV2IE,
     TV2ArticleIE,
     VikiIE,
     VikiChannelIE,
 )
+from .viously import ViouslyIE
 from .viqeo import ViqeoIE
 from .viu import (
     ViuIE,
diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py
deleted file mode 100644 (file)
index cdb7dcf..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-    int_or_none,
-    qualities,
-    xpath_text,
-)
-
-
-class TurboIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-'
-    _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}'
-    _TEST = {
-        'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
-        'md5': '33f4b91099b36b5d5a91f84b5bcba600',
-        'info_dict': {
-            'id': '454443',
-            'ext': 'mp4',
-            'duration': 3715,
-            'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ',
-            'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
-            'thumbnail': r're:^https?://.*\.jpg$',
-        }
-    }
-
-    def _real_extract(self, url):
-        mobj = self._match_valid_url(url)
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-
-        playlist = self._download_xml(self._API_URL.format(video_id), video_id)
-        item = playlist.find('./channel/item')
-        if item is None:
-            raise ExtractorError('Playlist item was not found', expected=True)
-
-        title = xpath_text(item, './title', 'title')
-        duration = int_or_none(xpath_text(item, './durate', 'duration'))
-        thumbnail = xpath_text(item, './visuel_clip', 'thumbnail')
-        description = self._html_search_meta('description', webpage)
-
-        formats = []
-        get_quality = qualities(['3g', 'sd', 'hq'])
-        for child in item:
-            m = re.search(r'url_video_(?P<quality>.+)', child.tag)
-            if m:
-                quality = compat_str(m.group('quality'))
-                formats.append({
-                    'format_id': quality,
-                    'url': child.text,
-                    'quality': get_quality(quality),
-                })
-
-        return {
-            'id': video_id,
-            'title': title,
-            'duration': duration,
-            'thumbnail': thumbnail,
-            'description': description,
-            'formats': formats,
-        }
diff --git a/yt_dlp/extractor/viously.py b/yt_dlp/extractor/viously.py
new file mode 100644 (file)
index 0000000..9ec7ed3
--- /dev/null
@@ -0,0 +1,60 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    extract_attributes,
+    int_or_none,
+    parse_iso8601,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ViouslyIE(InfoExtractor):
+    _VALID_URL = False
+    _WEBPAGE_TESTS = [{
+        'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+        'md5': '37a6c3381599381ff53a7e1e0575c0bc',
+        'info_dict': {
+            'id': 'F_xQzS2jwb3',
+            'ext': 'mp4',
+            'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+            'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+            'age_limit': 0,
+            'upload_date': '20230328',
+            'timestamp': 1680037507,
+            'duration': 3716,
+            'categories': ['motors'],
+        }
+    }]
+
+    def _extract_from_webpage(self, url, webpage):
+        viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
+        if not viously_players:
+            return
+
+        def custom_decode(text):
+            STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
+            CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
+            data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
+            return data.decode('utf-8').strip('\x00')
+
+        for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
+            formats = self._extract_m3u8_formats(
+                f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
+            if not formats:
+                continue
+            data = self._download_json(
+                f'https://www.viously.com/export/json/{video_id}', video_id,
+                transform_source=custom_decode, fatal=False)
+            yield {
+                'id': video_id,
+                'formats': formats,
+                **traverse_obj(data, ('video', {
+                    'title': ('title', {str}),
+                    'description': ('description', {str}),
+                    'duration': ('duration', {int_or_none}),
+                    'timestamp': ('iso_date', {parse_iso8601}),
+                    'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
+                })),
+            }