]>
Commit | Line | Data |
---|---|---|
3a30508b | 1 | from .common import InfoExtractor |
a0904c5d YCH |
2 | from ..utils import ( |
3 | determine_ext, | |
564275e2 RA |
4 | int_or_none, |
5 | parse_iso8601, | |
6 | try_get, | |
a0904c5d | 7 | ) |
3a30508b S |
8 | |
9 | ||
10 | class TelegraafIE(InfoExtractor): | |
564275e2 | 11 | _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)' |
3a30508b | 12 | _TEST = { |
564275e2 | 13 | 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los', |
3a30508b | 14 | 'info_dict': { |
564275e2 | 15 | 'id': 'gaMItuoSeUg2', |
3a30508b | 16 | 'ext': 'mp4', |
564275e2 RA |
17 | 'title': 'Historisch scheepswrak slaat na 100 jaar los', |
18 | 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb', | |
19 | 'thumbnail': r're:^https?://.*\.jpg', | |
20 | 'duration': 55, | |
21 | 'timestamp': 1572805527, | |
22 | 'upload_date': '20191103', | |
3a30508b | 23 | }, |
a0904c5d YCH |
24 | 'params': { |
25 | # m3u8 download | |
26 | 'skip_download': True, | |
27 | }, | |
3a30508b S |
28 | } |
29 | ||
30 | def _real_extract(self, url): | |
564275e2 | 31 | article_id = self._match_id(url) |
3a30508b | 32 | |
564275e2 | 33 | video_id = self._download_json( |
32972518 | 34 | 'https://app.telegraaf.nl/graphql', article_id, |
35 | headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'}, | |
36 | query={ | |
564275e2 RA |
37 | 'query': '''{ |
38 | article(uid: %s) { | |
39 | videos { | |
40 | videoId | |
41 | } | |
42 | } | |
43 | }''' % article_id, | |
44 | })['data']['article']['videos'][0]['videoId'] | |
3a30508b | 45 | |
564275e2 RA |
46 | item = self._download_json( |
47 | 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id, | |
48 | video_id)['items'][0] | |
49 | title = item['title'] | |
a0904c5d | 50 | |
a0904c5d | 51 | formats = [] |
564275e2 | 52 | locations = item.get('locations') or {} |
a0904c5d | 53 | for location in locations.get('adaptive', []): |
564275e2 RA |
54 | manifest_url = location.get('src') |
55 | if not manifest_url: | |
56 | continue | |
a0904c5d YCH |
57 | ext = determine_ext(manifest_url) |
58 | if ext == 'm3u8': | |
59 | formats.extend(self._extract_m3u8_formats( | |
f75e6890 | 60 | manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) |
a0904c5d | 61 | elif ext == 'mpd': |
d9cb92c8 S |
62 | formats.extend(self._extract_mpd_formats( |
63 | manifest_url, video_id, mpd_id='dash', fatal=False)) | |
a0904c5d YCH |
64 | else: |
65 | self.report_warning('Unknown adaptive format %s' % ext) | |
66 | for location in locations.get('progressive', []): | |
564275e2 RA |
67 | src = try_get(location, lambda x: x['sources'][0]['src']) |
68 | if not src: | |
69 | continue | |
70 | label = location.get('label') | |
a0904c5d | 71 | formats.append({ |
564275e2 RA |
72 | 'url': src, |
73 | 'width': int_or_none(location.get('width')), | |
74 | 'height': int_or_none(location.get('height')), | |
75 | 'format_id': 'http' + ('-%s' % label if label else ''), | |
a0904c5d YCH |
76 | }) |
77 | ||
a0904c5d YCH |
78 | return { |
79 | 'id': video_id, | |
80 | 'title': title, | |
564275e2 | 81 | 'description': item.get('description'), |
a0904c5d | 82 | 'formats': formats, |
564275e2 RA |
83 | 'duration': int_or_none(item.get('duration')), |
84 | 'thumbnail': item.get('poster'), | |
85 | 'timestamp': parse_iso8601(item.get('datecreated'), ' '), | |
a0904c5d | 86 | } |