]>
Commit | Line | Data |
---|---|---|
3a30508b S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
a0904c5d YCH |
5 | from ..utils import ( |
6 | determine_ext, | |
564275e2 RA |
7 | int_or_none, |
8 | parse_iso8601, | |
9 | try_get, | |
a0904c5d | 10 | ) |
3a30508b S |
11 | |
12 | ||
13 | class TelegraafIE(InfoExtractor): | |
564275e2 | 14 | _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)' |
3a30508b | 15 | _TEST = { |
564275e2 | 16 | 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los', |
3a30508b | 17 | 'info_dict': { |
564275e2 | 18 | 'id': 'gaMItuoSeUg2', |
3a30508b | 19 | 'ext': 'mp4', |
564275e2 RA |
20 | 'title': 'Historisch scheepswrak slaat na 100 jaar los', |
21 | 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb', | |
22 | 'thumbnail': r're:^https?://.*\.jpg', | |
23 | 'duration': 55, | |
24 | 'timestamp': 1572805527, | |
25 | 'upload_date': '20191103', | |
3a30508b | 26 | }, |
a0904c5d YCH |
27 | 'params': { |
28 | # m3u8 download | |
29 | 'skip_download': True, | |
30 | }, | |
3a30508b S |
31 | } |
32 | ||
33 | def _real_extract(self, url): | |
564275e2 | 34 | article_id = self._match_id(url) |
3a30508b | 35 | |
564275e2 RA |
36 | video_id = self._download_json( |
37 | 'https://www.telegraaf.nl/graphql', article_id, query={ | |
38 | 'query': '''{ | |
39 | article(uid: %s) { | |
40 | videos { | |
41 | videoId | |
42 | } | |
43 | } | |
44 | }''' % article_id, | |
45 | })['data']['article']['videos'][0]['videoId'] | |
3a30508b | 46 | |
564275e2 RA |
47 | item = self._download_json( |
48 | 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id, | |
49 | video_id)['items'][0] | |
50 | title = item['title'] | |
a0904c5d | 51 | |
a0904c5d | 52 | formats = [] |
564275e2 | 53 | locations = item.get('locations') or {} |
a0904c5d | 54 | for location in locations.get('adaptive', []): |
564275e2 RA |
55 | manifest_url = location.get('src') |
56 | if not manifest_url: | |
57 | continue | |
a0904c5d YCH |
58 | ext = determine_ext(manifest_url) |
59 | if ext == 'm3u8': | |
60 | formats.extend(self._extract_m3u8_formats( | |
f75e6890 | 61 | manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) |
a0904c5d | 62 | elif ext == 'mpd': |
d9cb92c8 S |
63 | formats.extend(self._extract_mpd_formats( |
64 | manifest_url, video_id, mpd_id='dash', fatal=False)) | |
a0904c5d YCH |
65 | else: |
66 | self.report_warning('Unknown adaptive format %s' % ext) | |
67 | for location in locations.get('progressive', []): | |
564275e2 RA |
68 | src = try_get(location, lambda x: x['sources'][0]['src']) |
69 | if not src: | |
70 | continue | |
71 | label = location.get('label') | |
a0904c5d | 72 | formats.append({ |
564275e2 RA |
73 | 'url': src, |
74 | 'width': int_or_none(location.get('width')), | |
75 | 'height': int_or_none(location.get('height')), | |
76 | 'format_id': 'http' + ('-%s' % label if label else ''), | |
a0904c5d YCH |
77 | }) |
78 | ||
79 | self._sort_formats(formats) | |
3a30508b | 80 | |
a0904c5d YCH |
81 | return { |
82 | 'id': video_id, | |
83 | 'title': title, | |
564275e2 | 84 | 'description': item.get('description'), |
a0904c5d | 85 | 'formats': formats, |
564275e2 RA |
86 | 'duration': int_or_none(item.get('duration')), |
87 | 'thumbnail': item.get('poster'), | |
88 | 'timestamp': parse_iso8601(item.get('datecreated'), ' '), | |
a0904c5d | 89 | } |