]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/itv.py
3 from .brightcove
import BrightcoveNewIE
4 from .common
import InfoExtractor
5 from ..compat
import compat_str
24 class ITVIE(InfoExtractor
):
25 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
26 _GEO_COUNTRIES
= ['GB']
28 'url': 'https://www.itv.com/hub/plebs/2a1873a0002',
32 'title': 'Plebs - The Orgy',
33 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4',
37 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
41 'skip_download': True,
44 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209',
48 'title': 'The Jonathan Ross Show - Series 17 - Episode 8',
49 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399',
50 'series': 'The Jonathan Ross Show',
53 'thumbnail': r
're:https?://hubimages\.itv\.com/episode/2_1873_0002'
57 'skip_download': True,
60 # unavailable via data-playlist-url
61 'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033',
62 'only_matching': True,
65 'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034',
66 'only_matching': True,
69 'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024',
70 'only_matching': True,
73 def _generate_api_headers(self
, hmac
):
75 'Accept': 'application/vnd.itv.vod.playlist.v2+json',
76 'Content-Type': 'application/json',
78 }, self
.geo_verification_headers())
80 def _call_api(self
, video_id
, playlist_url
, headers
, platform_tag
, featureset
, fatal
=True):
81 return self
._download
_json
(
82 playlist_url
, video_id
, data
=json
.dumps({
89 'manufacturer': 'Safari',
101 'variantAvailability': {
106 'platformTag': platform_tag
108 }).encode(), headers
=headers
, fatal
=fatal
)
110 def _get_subtitles(self
, video_id
, variants
, ios_playlist_url
, headers
, *args
, **kwargs
):
112 # Prefer last matching featureset
113 # See: https://github.com/yt-dlp/yt-dlp/issues/986
114 platform_tag_subs
, featureset_subs
= next(
115 ((platform_tag
, featureset
)
116 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
117 if try_get(featureset
, lambda x
: x
[2]) == 'outband-webvtt'),
120 if platform_tag_subs
and featureset_subs
:
121 subs_playlist
= self
._call
_api
(
122 video_id
, ios_playlist_url
, headers
, platform_tag_subs
, featureset_subs
, fatal
=False)
123 subs
= try_get(subs_playlist
, lambda x
: x
['Playlist']['Video']['Subtitles'], list) or []
125 if not isinstance(sub
, dict):
127 href
= url_or_none(sub
.get('Href'))
130 subtitles
.setdefault('en', []).append({'url': href}
)
133 def _real_extract(self
, url
):
134 video_id
= self
._match
_id
(url
)
135 webpage
= self
._download
_webpage
(url
, video_id
)
136 params
= extract_attributes(self
._search
_regex
(
137 r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params'))
138 variants
= self
._parse
_json
(
139 try_get(params
, lambda x
: x
['data-video-variants'], compat_str
) or '{}',
140 video_id
, fatal
=False)
141 # Prefer last matching featureset
142 # See: https://github.com/yt-dlp/yt-dlp/issues/986
143 platform_tag_video
, featureset_video
= next(
144 ((platform_tag
, featureset
)
145 for platform_tag
, featuresets
in reversed(list(variants
.items())) for featureset
in featuresets
146 if set(try_get(featureset
, lambda x
: x
[:2]) or []) == {'aes', 'hls'}
),
148 if not platform_tag_video
or not featureset_video
:
149 raise ExtractorError('No downloads available', expected
=True, video_id
=video_id
)
151 ios_playlist_url
= params
.get('data-video-playlist') or params
['data-video-id']
152 headers
= self
._generate
_api
_headers
(params
['data-video-hmac'])
153 ios_playlist
= self
._call
_api
(
154 video_id
, ios_playlist_url
, headers
, platform_tag_video
, featureset_video
)
156 video_data
= try_get(ios_playlist
, lambda x
: x
['Playlist']['Video'], dict) or {}
157 ios_base_url
= video_data
.get('Base')
159 for media_file
in (video_data
.get('MediaFiles') or []):
160 href
= media_file
.get('Href')
164 href
= ios_base_url
+ href
165 ext
= determine_ext(href
)
167 formats
.extend(self
._extract
_m
3u8_formats
(
168 href
, video_id
, 'mp4', entry_protocol
='m3u8_native',
169 m3u8_id
='hls', fatal
=False))
174 info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
176 json_ld
= self
._parse
_json
(self
._search
_regex
(
177 JSON_LD_RE
, webpage
, 'JSON-LD', '{}',
178 group
='json_ld'), video_id
, fatal
=False)
179 if json_ld
and json_ld
.get('@type') == 'BreadcrumbList':
180 for ile
in (json_ld
.get('itemListElement:') or []):
181 item
= ile
.get('item:') or {}
182 if item
.get('@type') == 'TVEpisode':
183 item
['@context'] = 'http://schema.org'
184 info
= self
._json
_ld
(item
, video_id
, fatal
=False) or {}
188 thumbnail_url
= try_get(params
, lambda x
: x
['data-video-posterframe'], compat_str
)
191 'url': thumbnail_url
.format(width
=1920, height
=1080, quality
=100, blur
=0, bg
='false'),
195 'url': urljoin(base_url(thumbnail_url
), url_basename(thumbnail_url
)),
199 thumbnail_url
= self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
, default
=None)
202 'url': thumbnail_url
,
204 self
._remove
_duplicate
_formats
(thumbnails
)
208 'title': self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
),
210 'subtitles': self
.extract_subtitles(video_id
, variants
, ios_playlist_url
, headers
),
211 'duration': parse_duration(video_data
.get('Duration')),
212 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage
)),
213 'thumbnails': thumbnails
217 class ITVBTCCIE(InfoExtractor
):
218 _VALID_URL
= r
'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
220 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
222 'id': 'btcc-2019-brands-hatch-gp-race-action',
223 'title': 'BTCC 2019: Brands Hatch GP race action',
225 'playlist_count': 12,
227 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
229 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
230 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32'
234 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
236 def _real_extract(self
, url
):
237 playlist_id
= self
._match
_id
(url
)
239 webpage
= self
._download
_webpage
(url
, playlist_id
)
242 self
._search
_nextjs
_data
(webpage
, playlist_id
),
243 lambda x
: x
['props']['pageProps']['article']['body']['content']) or []
246 for video
in json_map
:
247 if not any(video
['data'].get(attr
) == 'Brightcove' for attr
in ('name', 'type')):
249 video_id
= video
['data']['id']
250 account_id
= video
['data']['accountId']
251 player_id
= video
['data']['playerId']
252 entries
.append(self
.url_result(
253 smuggle_url(self
.BRIGHTCOVE_URL_TEMPLATE
% (account_id
, player_id
, video_id
), {
254 # ITV does not like some GB IP ranges, so here are some
255 # IP blocks it accepts
257 '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21'
261 ie
=BrightcoveNewIE
.ie_key(), video_id
=video_id
))
263 title
= self
._og
_search
_title
(webpage
, fatal
=False)
265 return self
.playlist_result(entries
, playlist_id
, title
)