]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/itv.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from .brightcove
import BrightcoveNewIE
9 from ..compat
import compat_str
28 class ITVIE(InfoExtractor
):
29 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
30 _GEO_COUNTRIES
= ['GB']
32 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
36 'title': 'Plebs - The Orgy',
37 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
41 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
45 'skip_download': True,
48 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
52 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
53 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
54 'series': 'The Jonathan Ross Show',
57 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
61 'skip_download': True,
64 # unavailable via data-playlist-url
65 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
66 'only_matching': True,
69 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
70 'only_matching': True,
73 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
74 'only_matching': True,
77 def _generate_api_headers(self
, hmac
):
79 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
80 'Content-Type': 'application/json',
82 }, self
.geo_verification_headers())
84 def _call_api(self
, video_id
, playlist_url
, headers
, platform_tag
, featureset
, fatal
=True):
85 return self
._download
_json
(
86 playlist_url
, video_id
, data
=json
.dumps({
93 'manufacturer': 'Safari',
105 'variantAvailability': {
110 'platformTag': platform_tag
112 }).encode(), headers
=headers
, fatal
=fatal
)
114 def _get_subtitles(self
, video_id
, variants
, ios_playlist_url
, headers
, *args
, **kwargs
):
116 # Prefer last matching featureset
117 # See: https://github.com/yt-dlp/yt-dlp/issues/986
118 platform_tag_subs
, featureset_subs
= next(
119 ((platform_tag
, featureset
)
120 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
121 if try_get(featureset
, lambda x
: x
[2]) == 'outband-webvtt'),
124 if platform_tag_subs
and featureset_subs
:
125 subs_playlist
= self
._call
_api
(
126 video_id
, ios_playlist_url
, headers
, platform_tag_subs
, featureset_subs
, fatal
=False)
127 subs
= try_get(subs_playlist
, lambda x
: x
['Playlist']['Video']['Subtitles'], list) or []
129 if not isinstance(sub
, dict):
131 href
= url_or_none(sub
.get('Href'))
134 subtitles
.setdefault('en', []).append({'url': href}
)
137 def _real_extract(self
, url
):
138 video_id
= self
._match
_id
(url
)
139 webpage
= self
._download
_webpage
(url
, video_id
)
140 params
= extract_attributes(self
._search
_regex
(
141 r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params'))
142 variants
= self
._parse
_json
(
143 try_get(params
, lambda x
: x
['data-video-variants'], compat_str
) or '{}',
144 video_id
, fatal
=False)
145 # Prefer last matching featureset
146 # See: https://github.com/yt-dlp/yt-dlp/issues/986
147 platform_tag_video
, featureset_video
= next(
148 ((platform_tag
, featureset
)
149 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
150 if set(try_get(featureset
, lambda x
: x
[:2]) or []) == {'aes', 'hls'}
),
152 if not platform_tag_video
or not featureset_video
:
153 raise ExtractorError('No downloads available', expected
=True, video_id
=video_id
)
155 ios_playlist_url
= params
.get('data-video-playlist') or params
['data-video-id']
156 headers
= self
._generate
_api
_headers
(params
['data-video-hmac'])
157 ios_playlist
= self
._call
_api
(
158 video_id
, ios_playlist_url
, headers
, platform_tag_video
, featureset_video
)
160 video_data
= try_get(ios_playlist
, lambda x
: x
['Playlist']['Video'], dict) or {}
161 ios_base_url
= video_data
.get('Base')
163 for media_file
in (video_data
.get('MediaFiles') or []):
164 href
= media_file
.get('Href')
168 href
= ios_base_url
+ href
169 ext
= determine_ext(href
)
171 formats
.extend(self
._extract
_m
3u8_formats
(
172 href
, video_id
, 'mp4', entry_protocol
='m3u8_native',
173 m3u8_id
='hls', fatal
=False))
178 self
._sort
_formats
(formats
)
179 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
181 json_ld
= self
._parse
_json
(self
._search
_regex
(
182 JSON_LD_RE
, webpage
, 'JSON-LD', '{}',
183 group
='json_ld'), video_id
, fatal
=False)
184 if json_ld
and json_ld
.get('@type') == 'BreadcrumbList':
185 for ile
in (json_ld
.get('itemListElement:') or []):
186 item
= ile
.get('item:') or {}
187 if item
.get('@type') == 'TVEpisode':
188 item
['@context'] = 'http://schema.org'
189 info
= self
._json
_ld
(item
, video_id
, fatal
=False) or {}
193 thumbnail_url
= try_get(params
, lambda x
: x
['data-video-posterframe'], compat_str
)
196 'url': thumbnail_url
.format(width
=1920, height
=1080, quality
=100, blur
=0, bg
='false'),
200 'url': urljoin(base_url(thumbnail_url
), url_basename(thumbnail_url
)),
204 thumbnail_url
= self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
, default
=None)
207 'url': thumbnail_url
,
209 self
._remove
_duplicate
_formats
(thumbnails
)
213 'title': self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
),
215 'subtitles': self
.extract_subtitles(video_id
, variants
, ios_playlist_url
, headers
),
216 'duration': parse_duration(video_data
.get('Duration')),
217 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage
)),
218 'thumbnails': thumbnails
222 class ITVBTCCIE(InfoExtractor
):
223 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
225 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
227 'id': 'btcc-2019-brands-hatch-gp-race-action',
228 'title': 'BTCC 2019: Brands Hatch GP race action',
230 'playlist_count': 12,
232 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
234 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
235 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32'
239 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
241 def _real_extract(self
, url
):
242 playlist_id
= self
._match
_id
(url
)
244 webpage
= self
._download
_webpage
(url
, playlist_id
)
247 self
._search
_nextjs
_data
(webpage
, playlist_id
),
248 lambda x
: x
['props']['pageProps']['article']['body']['content']) or []
251 for video
in json_map
:
252 if not any(video
['data'].get(attr
) == 'Brightcove' for attr
in ('name', 'type')):
254 video_id
= video
['data']['id']
255 account_id
= video
['data']['accountId']
256 player_id
= video
['data']['playerId']
257 entries
.append(self
.url_result(
258 smuggle_url(self
.BRIGHTCOVE_URL_TEMPLATE
% (account_id
, player_id
, video_id
), {
259 # ITV does not like some GB IP ranges, so here are some
260 # IP blocks it accepts
262 '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
266 ie
=BrightcoveNewIE
.ie_key(), video_id
=video_id
))
268 title
= self
._og
_search
_title
(webpage
, fatal
=False)
270 return self
.playlist_result(entries
, playlist_id
, title
)