]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/dctp.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / dctp.py
CommitLineData
0865f397 1from .common import InfoExtractor
8e01f3ca
S
2from ..compat import compat_str
3from ..utils import (
4 float_or_none,
acbd0ff5
S
5 int_or_none,
6 unified_timestamp,
3052a30d 7 url_or_none,
8e01f3ca 8)
0865f397 9
48a1e514 10
0865f397 11class DctpTvIE(InfoExtractor):
8e01f3ca 12 _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
acbd0ff5
S
13 _TESTS = [{
14 # 4x3
48a1e514 15 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
b2771a28 16 'md5': '3ffbd1556c3fe210724d7088fad723e3',
48a1e514 17 'info_dict': {
e295618f 18 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
75a4fc5b 19 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
b2771a28 20 'ext': 'm4v',
e295618f
YCH
21 'title': 'Videoinstallation für eine Kaufhausfassade',
22 'description': 'Kurzfilm',
ec85ded8 23 'thumbnail': r're:^https?://.*\.jpg$',
8e01f3ca 24 'duration': 71.24,
acbd0ff5
S
25 'timestamp': 1302172322,
26 'upload_date': '20110407',
8e01f3ca 27 },
acbd0ff5
S
28 }, {
29 # 16x9
30 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
31 'only_matching': True,
32 }]
33
34 _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
0865f397
PH
35
36 def _real_extract(self, url):
8e01f3ca 37 display_id = self._match_id(url)
e295618f 38
acbd0ff5
S
39 version = self._download_json(
40 '%s/version.json' % self._BASE_URL, display_id,
41 'Downloading version JSON')
42
43 restapi_base = '%s/%s/restapi' % (
44 self._BASE_URL, version['version_name'])
0865f397 45
acbd0ff5
S
46 info = self._download_json(
47 '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
48 'Downloading video info JSON')
e295618f 49
acbd0ff5
S
50 media = self._download_json(
51 '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
52 display_id, 'Downloading media JSON')
53
54 uuid = media['uuid']
55 title = media['title']
b2771a28
RA
56 is_wide = media.get('is_wide')
57 formats = []
58
59 def add_formats(suffix):
60 templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
61 formats.extend([{
62 'format_id': 'hls-' + suffix,
63 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
64 'protocol': 'm3u8_native',
65 }, {
66 'format_id': 's3-' + suffix,
67 'url': templ % 'completed-media.s3.amazonaws.com',
68 }, {
69 'format_id': 'http-' + suffix,
70 'url': templ % 'cdn-media.dctp.tv',
71 }])
72
73 add_formats('0500_' + ('16x9' if is_wide else '4x3'))
74 if is_wide:
75 add_formats('720p')
8e01f3ca 76
acbd0ff5
S
77 thumbnails = []
78 images = media.get('images')
79 if isinstance(images, list):
80 for image in images:
81 if not isinstance(image, dict):
82 continue
3052a30d
S
83 image_url = url_or_none(image.get('url'))
84 if not image_url:
acbd0ff5
S
85 continue
86 thumbnails.append({
87 'url': image_url,
88 'width': int_or_none(image.get('width')),
89 'height': int_or_none(image.get('height')),
90 })
0865f397
PH
91
92 return {
acbd0ff5
S
93 'id': uuid,
94 'display_id': display_id,
0865f397 95 'title': title,
acbd0ff5
S
96 'alt_title': media.get('subtitle'),
97 'description': media.get('description') or media.get('teaser'),
98 'timestamp': unified_timestamp(media.get('created')),
99 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
100 'thumbnails': thumbnails,
e295618f 101 'formats': formats,
0865f397 102 }