]>
Commit | Line | Data |
---|---|---|
a71b8d3b RA |
1 | import json |
2 | ||
ea1f5e5d | 3 | from .brightcove import BrightcoveNewIE |
e897bd82 | 4 | from .common import InfoExtractor |
a71b8d3b | 5 | from ..utils import ( |
e897bd82 SS |
6 | JSON_LD_RE, |
7 | ExtractorError, | |
ea706726 | 8 | base_url, |
29f7c58a | 9 | clean_html, |
a4ec4517 | 10 | determine_ext, |
a71b8d3b | 11 | extract_attributes, |
29f7c58a | 12 | get_element_by_class, |
30374f4d | 13 | merge_dicts, |
a71b8d3b | 14 | parse_duration, |
ea1f5e5d | 15 | smuggle_url, |
6857df60 | 16 | try_get, |
ea706726 | 17 | url_basename, |
e897bd82 | 18 | url_or_none, |
ea706726 | 19 | urljoin, |
a71b8d3b RA |
20 | ) |
21 | ||
22 | ||
23 | class ITVIE(InfoExtractor): | |
f592ff98 | 24 | _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' |
4248dad9 | 25 | _GEO_COUNTRIES = ['GB'] |
3fae11ac | 26 | _TESTS = [{ |
ea706726 | 27 | 'url': 'https://www.itv.com/hub/plebs/2a1873a0002', |
28 | 'info_dict': { | |
29 | 'id': '2a1873a0002', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Plebs - The Orgy', | |
32 | 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4', | |
33 | 'series': 'Plebs', | |
34 | 'season_number': 1, | |
35 | 'episode_number': 1, | |
add96eb9 | 36 | 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', |
ea706726 | 37 | }, |
38 | 'params': { | |
39 | # m3u8 download | |
40 | 'skip_download': True, | |
41 | }, | |
42 | }, { | |
43 | 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209', | |
a71b8d3b | 44 | 'info_dict': { |
ea706726 | 45 | 'id': '2a1166a0209', |
29f7c58a | 46 | 'ext': 'mp4', |
ea706726 | 47 | 'title': 'The Jonathan Ross Show - Series 17 - Episode 8', |
48 | 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399', | |
49 | 'series': 'The Jonathan Ross Show', | |
50 | 'episode_number': 8, | |
51 | 'season_number': 17, | |
add96eb9 | 52 | 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', |
a71b8d3b RA |
53 | }, |
54 | 'params': { | |
29f7c58a | 55 | # m3u8 download |
a71b8d3b RA |
56 | 'skip_download': True, |
57 | }, | |
3fae11ac S |
58 | }, { |
59 | # unavailable via data-playlist-url | |
60 | 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', | |
61 | 'only_matching': True, | |
c18142da S |
62 | }, { |
63 | # InvalidVodcrid | |
64 | 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034', | |
65 | 'only_matching': True, | |
66 | }, { | |
67 | # ContentUnavailable | |
68 | 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024', | |
69 | 'only_matching': True, | |
3fae11ac | 70 | }] |
a71b8d3b | 71 | |
ea706726 | 72 | def _generate_api_headers(self, hmac): |
73 | return merge_dicts({ | |
29f7c58a | 74 | 'Accept': 'application/vnd.itv.vod.playlist.v2+json', |
75 | 'Content-Type': 'application/json', | |
76 | 'hmac': hmac.upper(), | |
ea706726 | 77 | }, self.geo_verification_headers()) |
78 | ||
79 | def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, fatal=True): | |
80 | return self._download_json( | |
81 | playlist_url, video_id, data=json.dumps({ | |
29f7c58a | 82 | 'user': { |
83 | 'itvUserId': '', | |
84 | 'entitlements': [], | |
add96eb9 | 85 | 'token': '', |
29f7c58a | 86 | }, |
87 | 'device': { | |
88 | 'manufacturer': 'Safari', | |
89 | 'model': '5', | |
90 | 'os': { | |
91 | 'name': 'Windows NT', | |
92 | 'version': '6.1', | |
add96eb9 | 93 | 'type': 'desktop', |
94 | }, | |
29f7c58a | 95 | }, |
96 | 'client': { | |
97 | 'version': '4.1', | |
add96eb9 | 98 | 'id': 'browser', |
29f7c58a | 99 | }, |
100 | 'variantAvailability': { | |
101 | 'featureset': { | |
ea706726 | 102 | 'min': featureset, |
add96eb9 | 103 | 'max': featureset, |
29f7c58a | 104 | }, |
add96eb9 | 105 | 'platformTag': platform_tag, |
106 | }, | |
ea706726 | 107 | }).encode(), headers=headers, fatal=fatal) |
108 | ||
109 | def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs): | |
110 | subtitles = {} | |
298bf1d2 | 111 | # Prefer last matching featureset |
112 | # See: https://github.com/yt-dlp/yt-dlp/issues/986 | |
ea706726 | 113 | platform_tag_subs, featureset_subs = next( |
114 | ((platform_tag, featureset) | |
bc8ab44e | 115 | for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets |
ea706726 | 116 | if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), |
117 | (None, None)) | |
298bf1d2 | 118 | |
119 | if platform_tag_subs and featureset_subs: | |
ea706726 | 120 | subs_playlist = self._call_api( |
121 | video_id, ios_playlist_url, headers, platform_tag_subs, featureset_subs, fatal=False) | |
122 | subs = try_get(subs_playlist, lambda x: x['Playlist']['Video']['Subtitles'], list) or [] | |
123 | for sub in subs: | |
124 | if not isinstance(sub, dict): | |
125 | continue | |
126 | href = url_or_none(sub.get('Href')) | |
127 | if not href: | |
128 | continue | |
129 | subtitles.setdefault('en', []).append({'url': href}) | |
130 | return subtitles | |
131 | ||
132 | def _real_extract(self, url): | |
133 | video_id = self._match_id(url) | |
134 | webpage = self._download_webpage(url, video_id) | |
135 | params = extract_attributes(self._search_regex( | |
136 | r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) | |
137 | variants = self._parse_json( | |
add96eb9 | 138 | try_get(params, lambda x: x['data-video-variants'], str) or '{}', |
ea706726 | 139 | video_id, fatal=False) |
298bf1d2 | 140 | # Prefer last matching featureset |
141 | # See: https://github.com/yt-dlp/yt-dlp/issues/986 | |
ea706726 | 142 | platform_tag_video, featureset_video = next( |
143 | ((platform_tag, featureset) | |
bc8ab44e | 144 | for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets |
da27aeea | 145 | if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}), |
ea706726 | 146 | (None, None)) |
147 | if not platform_tag_video or not featureset_video: | |
148 | raise ExtractorError('No downloads available', expected=True, video_id=video_id) | |
3fae11ac | 149 | |
ea706726 | 150 | ios_playlist_url = params.get('data-video-playlist') or params['data-video-id'] |
151 | headers = self._generate_api_headers(params['data-video-hmac']) | |
152 | ios_playlist = self._call_api( | |
153 | video_id, ios_playlist_url, headers, platform_tag_video, featureset_video) | |
154 | ||
155 | video_data = try_get(ios_playlist, lambda x: x['Playlist']['Video'], dict) or {} | |
156 | ios_base_url = video_data.get('Base') | |
3fae11ac | 157 | formats = [] |
29f7c58a | 158 | for media_file in (video_data.get('MediaFiles') or []): |
159 | href = media_file.get('Href') | |
160 | if not href: | |
161 | continue | |
162 | if ios_base_url: | |
163 | href = ios_base_url + href | |
164 | ext = determine_ext(href) | |
165 | if ext == 'm3u8': | |
166 | formats.extend(self._extract_m3u8_formats( | |
167 | href, video_id, 'mp4', entry_protocol='m3u8_native', | |
168 | m3u8_id='hls', fatal=False)) | |
30374f4d | 169 | else: |
29f7c58a | 170 | formats.append({ |
171 | 'url': href, | |
30374f4d | 172 | }) |
29f7c58a | 173 | info = self._search_json_ld(webpage, video_id, default={}) |
174 | if not info: | |
175 | json_ld = self._parse_json(self._search_regex( | |
176 | JSON_LD_RE, webpage, 'JSON-LD', '{}', | |
177 | group='json_ld'), video_id, fatal=False) | |
178 | if json_ld and json_ld.get('@type') == 'BreadcrumbList': | |
179 | for ile in (json_ld.get('itemListElement:') or []): | |
180 | item = ile.get('item:') or {} | |
181 | if item.get('@type') == 'TVEpisode': | |
182 | item['@context'] = 'http://schema.org' | |
183 | info = self._json_ld(item, video_id, fatal=False) or {} | |
184 | break | |
185 | ||
ea706726 | 186 | thumbnails = [] |
add96eb9 | 187 | thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], str) |
ea706726 | 188 | if thumbnail_url: |
189 | thumbnails.extend([{ | |
190 | 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'), | |
191 | 'width': 1920, | |
192 | 'height': 1080, | |
193 | }, { | |
194 | 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)), | |
add96eb9 | 195 | 'preference': -2, |
ea706726 | 196 | }]) |
197 | ||
198 | thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) | |
199 | if thumbnail_url: | |
200 | thumbnails.append({ | |
201 | 'url': thumbnail_url, | |
202 | }) | |
203 | self._remove_duplicate_formats(thumbnails) | |
204 | ||
29f7c58a | 205 | return merge_dicts({ |
a71b8d3b | 206 | 'id': video_id, |
29f7c58a | 207 | 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), |
a71b8d3b | 208 | 'formats': formats, |
ea706726 | 209 | 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers), |
29f7c58a | 210 | 'duration': parse_duration(video_data.get('Duration')), |
211 | 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), | |
add96eb9 | 212 | 'thumbnails': thumbnails, |
29f7c58a | 213 | }, info) |
ea1f5e5d S |
214 | |
215 | ||
216 | class ITVBTCCIE(InfoExtractor): | |
3783b5f1 | 217 | _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)' |
218 | _TESTS = [{ | |
6857df60 | 219 | 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action', |
ea1f5e5d | 220 | 'info_dict': { |
6857df60 W |
221 | 'id': 'btcc-2019-brands-hatch-gp-race-action', |
222 | 'title': 'BTCC 2019: Brands Hatch GP race action', | |
ea1f5e5d | 223 | }, |
85da4055 | 224 | 'playlist_count': 12, |
3783b5f1 | 225 | }, { |
226 | 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', | |
227 | 'info_dict': { | |
228 | 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', | |
add96eb9 | 229 | 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32', |
3783b5f1 | 230 | }, |
add96eb9 | 231 | 'playlist_count': 4, |
3783b5f1 | 232 | }] |
233 | BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' | |
ea1f5e5d S |
234 | |
235 | def _real_extract(self, url): | |
236 | playlist_id = self._match_id(url) | |
237 | ||
238 | webpage = self._download_webpage(url, playlist_id) | |
239 | ||
135dfa2c | 240 | json_map = try_get( |
241 | self._search_nextjs_data(webpage, playlist_id), | |
6857df60 W |
242 | lambda x: x['props']['pageProps']['article']['body']['content']) or [] |
243 | ||
3783b5f1 | 244 | entries = [] |
6857df60 | 245 | for video in json_map: |
3783b5f1 | 246 | if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')): |
247 | continue | |
248 | video_id = video['data']['id'] | |
249 | account_id = video['data']['accountId'] | |
250 | player_id = video['data']['playerId'] | |
251 | entries.append(self.url_result( | |
252 | smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), { | |
ea1f5e5d S |
253 | # ITV does not like some GB IP ranges, so here are some |
254 | # IP blocks it accepts | |
255 | 'geo_ip_blocks': [ | |
add96eb9 | 256 | '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21', |
ea1f5e5d S |
257 | ], |
258 | 'referrer': url, | |
259 | }), | |
3783b5f1 | 260 | ie=BrightcoveNewIE.ie_key(), video_id=video_id)) |
ea1f5e5d S |
261 | |
262 | title = self._og_search_title(webpage, fatal=False) | |
263 | ||
264 | return self.playlist_result(entries, playlist_id, title) |