]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/dctp.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / dctp.py
1 from .common import InfoExtractor
2 from ..utils import (
3 float_or_none,
4 int_or_none,
5 unified_timestamp,
6 url_or_none,
7 )
8
9
10 class DctpTvIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
12 _TESTS = [{
13 # 4x3
14 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
15 'md5': '3ffbd1556c3fe210724d7088fad723e3',
16 'info_dict': {
17 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
18 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
19 'ext': 'm4v',
20 'title': 'Videoinstallation für eine Kaufhausfassade',
21 'description': 'Kurzfilm',
22 'thumbnail': r're:^https?://.*\.jpg$',
23 'duration': 71.24,
24 'timestamp': 1302172322,
25 'upload_date': '20110407',
26 },
27 }, {
28 # 16x9
29 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
30 'only_matching': True,
31 }]
32
33 _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
34
35 def _real_extract(self, url):
36 display_id = self._match_id(url)
37
38 version = self._download_json(
39 f'{self._BASE_URL}/version.json', display_id,
40 'Downloading version JSON')
41
42 restapi_base = '{}/{}/restapi'.format(
43 self._BASE_URL, version['version_name'])
44
45 info = self._download_json(
46 f'{restapi_base}/slugs/{display_id}.json', display_id,
47 'Downloading video info JSON')
48
49 media = self._download_json(
50 '{}/media/{}.json'.format(restapi_base, str(info['object_id'])),
51 display_id, 'Downloading media JSON')
52
53 uuid = media['uuid']
54 title = media['title']
55 is_wide = media.get('is_wide')
56 formats = []
57
58 def add_formats(suffix):
59 templ = f'https://%s/{uuid}_dctp_{suffix}.m4v'
60 formats.extend([{
61 'format_id': 'hls-' + suffix,
62 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
63 'protocol': 'm3u8_native',
64 }, {
65 'format_id': 's3-' + suffix,
66 'url': templ % 'completed-media.s3.amazonaws.com',
67 }, {
68 'format_id': 'http-' + suffix,
69 'url': templ % 'cdn-media.dctp.tv',
70 }])
71
72 add_formats('0500_' + ('16x9' if is_wide else '4x3'))
73 if is_wide:
74 add_formats('720p')
75
76 thumbnails = []
77 images = media.get('images')
78 if isinstance(images, list):
79 for image in images:
80 if not isinstance(image, dict):
81 continue
82 image_url = url_or_none(image.get('url'))
83 if not image_url:
84 continue
85 thumbnails.append({
86 'url': image_url,
87 'width': int_or_none(image.get('width')),
88 'height': int_or_none(image.get('height')),
89 })
90
91 return {
92 'id': uuid,
93 'display_id': display_id,
94 'title': title,
95 'alt_title': media.get('subtitle'),
96 'description': media.get('description') or media.get('teaser'),
97 'timestamp': unified_timestamp(media.get('created')),
98 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
99 'thumbnails': thumbnails,
100 'formats': formats,
101 }