]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/viously.py
[ie/Viously] Add extractor (#8927)
[yt-dlp.git] / yt_dlp / extractor / viously.py
1 import base64
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6 extract_attributes,
7 int_or_none,
8 parse_iso8601,
9 )
10 from ..utils.traversal import traverse_obj
11
12
13 class ViouslyIE(InfoExtractor):
14 _VALID_URL = False
15 _WEBPAGE_TESTS = [{
16 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
17 'md5': '37a6c3381599381ff53a7e1e0575c0bc',
18 'info_dict': {
19 'id': 'F_xQzS2jwb3',
20 'ext': 'mp4',
21 'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
22 'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
23 'age_limit': 0,
24 'upload_date': '20230328',
25 'timestamp': 1680037507,
26 'duration': 3716,
27 'categories': ['motors'],
28 }
29 }]
30
31 def _extract_from_webpage(self, url, webpage):
32 viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
33 if not viously_players:
34 return
35
36 def custom_decode(text):
37 STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
38 CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
39 data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
40 return data.decode('utf-8').strip('\x00')
41
42 for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
43 formats = self._extract_m3u8_formats(
44 f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
45 if not formats:
46 continue
47 data = self._download_json(
48 f'https://www.viously.com/export/json/{video_id}', video_id,
49 transform_source=custom_decode, fatal=False)
50 yield {
51 'id': video_id,
52 'formats': formats,
53 **traverse_obj(data, ('video', {
54 'title': ('title', {str}),
55 'description': ('description', {str}),
56 'duration': ('duration', {int_or_none}),
57 'timestamp': ('iso_date', {parse_iso8601}),
58 'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
59 })),
60 }