]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/dctp.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / dctp.py
1 from .common import InfoExtractor
2 from ..compat import compat_str
3 from ..utils import (
4 float_or_none,
5 int_or_none,
6 unified_timestamp,
7 url_or_none,
8 )
9
10
11 class DctpTvIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
13 _TESTS = [{
14 # 4x3
15 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
16 'md5': '3ffbd1556c3fe210724d7088fad723e3',
17 'info_dict': {
18 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
19 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
20 'ext': 'm4v',
21 'title': 'Videoinstallation für eine Kaufhausfassade',
22 'description': 'Kurzfilm',
23 'thumbnail': r're:^https?://.*\.jpg$',
24 'duration': 71.24,
25 'timestamp': 1302172322,
26 'upload_date': '20110407',
27 },
28 }, {
29 # 16x9
30 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
31 'only_matching': True,
32 }]
33
34 _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
35
36 def _real_extract(self, url):
37 display_id = self._match_id(url)
38
39 version = self._download_json(
40 '%s/version.json' % self._BASE_URL, display_id,
41 'Downloading version JSON')
42
43 restapi_base = '%s/%s/restapi' % (
44 self._BASE_URL, version['version_name'])
45
46 info = self._download_json(
47 '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
48 'Downloading video info JSON')
49
50 media = self._download_json(
51 '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
52 display_id, 'Downloading media JSON')
53
54 uuid = media['uuid']
55 title = media['title']
56 is_wide = media.get('is_wide')
57 formats = []
58
59 def add_formats(suffix):
60 templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
61 formats.extend([{
62 'format_id': 'hls-' + suffix,
63 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
64 'protocol': 'm3u8_native',
65 }, {
66 'format_id': 's3-' + suffix,
67 'url': templ % 'completed-media.s3.amazonaws.com',
68 }, {
69 'format_id': 'http-' + suffix,
70 'url': templ % 'cdn-media.dctp.tv',
71 }])
72
73 add_formats('0500_' + ('16x9' if is_wide else '4x3'))
74 if is_wide:
75 add_formats('720p')
76
77 thumbnails = []
78 images = media.get('images')
79 if isinstance(images, list):
80 for image in images:
81 if not isinstance(image, dict):
82 continue
83 image_url = url_or_none(image.get('url'))
84 if not image_url:
85 continue
86 thumbnails.append({
87 'url': image_url,
88 'width': int_or_none(image.get('width')),
89 'height': int_or_none(image.get('height')),
90 })
91
92 return {
93 'id': uuid,
94 'display_id': display_id,
95 'title': title,
96 'alt_title': media.get('subtitle'),
97 'description': media.get('description') or media.get('teaser'),
98 'timestamp': unified_timestamp(media.get('created')),
99 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
100 'thumbnails': thumbnails,
101 'formats': formats,
102 }