]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bbc.py
[ie/roosterteeth] Extract release date and timestamp (#9393)
[yt-dlp.git] / yt_dlp / extractor / bbc.py
CommitLineData
1418a043 1import functools
254e64a2 2import itertools
1418a043 3import json
f0228f56 4import re
ac668111 5import xml.etree.ElementTree
082c6c86 6
f13b1e7d 7from .common import InfoExtractor
3d2623a8 8from ..compat import compat_str, compat_urlparse
9from ..networking.exceptions import HTTPError
8683b4d8 10from ..utils import (
3721515b 11 ExtractorError,
1418a043 12 OnDemandPagedList,
97067db2 13 clean_html,
9fb64c04 14 dict_get,
9afa1770 15 float_or_none,
97067db2 16 get_element_by_class,
8683b4d8 17 int_or_none,
eda0e415 18 join_nonempty,
6d155707 19 js_to_json,
9afa1770
S
20 parse_duration,
21 parse_iso8601,
4dfbf869 22 parse_qs,
1bdae7d3 23 strip_or_none,
eda0e415 24 traverse_obj,
9fb64c04 25 try_get,
dab062fb 26 unescapeHTML,
1bdae7d3 27 unified_timestamp,
f0228f56 28 url_or_none,
97067db2
S
29 urlencode_postdata,
30 urljoin,
8683b4d8 31)
082c6c86 32
d12a1a47 33
f13b1e7d 34class BBCCoUkIE(InfoExtractor):
082c6c86 35 IE_NAME = 'bbc.co.uk'
2e3fd9ec 36 IE_DESC = 'BBC iPlayer'
50e93e03 37 _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
f20a11ed
S
38 _VALID_URL = r'''(?x)
39 https?://
40 (?:www\.)?bbc\.co\.uk/
41 (?:
42 programmes/(?!articles/)|
43 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
72d256c4 44 music/(?:clips|audiovideo/popular)[/#]|
d3d45e0a
S
45 radio/player/|
46 events/[^/]+/play/[^/]+/
f20a11ed 47 )
ded7511a 48 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 49 ''' % _ID_REGEX
bfd973ec 50 _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
082c6c86 51
97067db2
S
52 _LOGIN_URL = 'https://account.bbc.com/signin'
53 _NETRC_MACHINE = 'bbc'
54
29f7c58a 55 _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
56 _MEDIA_SETS = [
26ccc68b
S
57 # Provides HQ HLS streams with even better quality that pc mediaset but fails
58 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 59 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
29f7c58a 60 'iptv-all',
61 'pc',
d12a1a47 62 ]
a8b081a0 63
e6174ee9
S
64 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
65
2e3fd9ec
S
66 _TESTS = [
67 {
f2d0fc68 68 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 69 'info_dict': {
f2d0fc68 70 'id': 'b039d07m',
b1ea6802 71 'ext': 'flv',
acc86c9a 72 'title': 'Kaleidoscope, Leonard Cohen',
c4914185 73 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
74 },
75 'params': {
b1ea6802 76 # rtmp download
2e3fd9ec
S
77 'skip_download': True,
78 }
082c6c86 79 },
2e3fd9ec
S
80 {
81 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
82 'info_dict': {
83 'id': 'b00yng1d',
84 'ext': 'flv',
85 'title': 'The Man in Black: Series 3: The Printed Name',
86 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
87 'duration': 1800,
88 },
89 'params': {
90 # rtmp download
91 'skip_download': True,
c7f0177f
S
92 },
93 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
94 },
95 {
96 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
97 'info_dict': {
98 'id': 'b00yng1d',
99 'ext': 'flv',
17968e44 100 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 101 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 102 'duration': 5100,
2e3fd9ec
S
103 },
104 'params': {
105 # rtmp download
106 'skip_download': True,
107 },
b1ea6802 108 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
109 },
110 {
111 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
112 'info_dict': {
113 'id': 'b03k3pb7',
114 'ext': 'flv',
115 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
116 'description': '2. Invasion',
117 'duration': 3600,
118 },
119 'params': {
120 # rtmp download
121 'skip_download': True,
122 },
b1ea6802 123 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
124 }, {
125 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
126 'info_dict': {
127 'id': 'b04v209v',
128 'ext': 'flv',
129 'title': 'Pete Tong, The Essential New Tune Special',
130 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
131 'duration': 10800,
132 },
133 'params': {
134 # rtmp download
135 'skip_download': True,
a3ef0e1c
YCH
136 },
137 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 138 }, {
5aa535c3 139 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
140 'note': 'Audio',
141 'info_dict': {
5aa535c3 142 'id': 'p022h44j',
b1ea6802 143 'ext': 'flv',
5aa535c3
S
144 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
145 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
146 'duration': 227,
c7e67594
S
147 },
148 'params': {
b1ea6802 149 # rtmp download
c7e67594
S
150 'skip_download': True,
151 }
152 }, {
153 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
154 'note': 'Video',
155 'info_dict': {
156 'id': 'p025c103',
b1ea6802 157 'ext': 'flv',
c7e67594
S
158 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
159 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
160 'duration': 226,
161 },
162 'params': {
b1ea6802 163 # rtmp download
c7e67594
S
164 'skip_download': True,
165 }
e68ae99a
S
166 }, {
167 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
168 'info_dict': {
169 'id': 'p02n76xf',
170 'ext': 'flv',
171 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
172 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
173 'duration': 3540,
174 },
175 'params': {
176 # rtmp download
177 'skip_download': True,
178 },
b1ea6802 179 'skip': 'geolocation',
25fa8d66
YCH
180 }, {
181 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
182 'info_dict': {
183 'id': 'b05zmgw1',
184 'ext': 'flv',
185 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
186 'title': 'Royal Academy Summer Exhibition',
187 'duration': 3540,
188 },
189 'params': {
190 # rtmp download
191 'skip_download': True,
192 },
b1ea6802 193 'skip': 'geolocation',
54914380
S
194 }, {
195 # iptv-all mediaset fails with geolocation however there is no geo restriction
196 # for this programme at all
5aa535c3 197 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 198 'info_dict': {
5aa535c3 199 'id': 'b06rkms3',
54914380 200 'ext': 'flv',
5aa535c3
S
201 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
202 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
203 },
204 'params': {
205 # rtmp download
206 'skip_download': True,
207 },
b1ea6802 208 'skip': 'Now it\'s really geo-restricted',
1ac6e794 209 }, {
067aa17e 210 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
1ac6e794
S
211 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
212 'info_dict': {
213 'id': 'p028bfkj',
b1ea6802 214 'ext': 'flv',
1ac6e794
S
215 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
216 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
217 },
218 'params': {
b1ea6802 219 # rtmp download
1ac6e794
S
220 'skip_download': True,
221 },
31763975
S
222 }, {
223 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
224 'only_matching': True,
c7e67594
S
225 }, {
226 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
227 'only_matching': True,
0692ef86
S
228 }, {
229 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
230 'only_matching': True,
f20a11ed
S
231 }, {
232 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
233 'only_matching': True,
72d256c4
S
234 }, {
235 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
236 'only_matching': True,
53647dfd
S
237 }, {
238 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
239 'only_matching': True,
6f356cbb
S
240 }, {
241 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
242 'only_matching': True,
243 }, {
244 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
245 'only_matching': True,
72d256c4 246 }]
2e3fd9ec 247
52efa4b3 248 def _perform_login(self, username, password):
97067db2
S
249 login_page = self._download_webpage(
250 self._LOGIN_URL, None, 'Downloading signin page')
251
252 login_form = self._hidden_inputs(login_page)
253
254 login_form.update({
255 'username': username,
256 'password': password,
257 })
258
259 post_url = urljoin(self._LOGIN_URL, self._search_regex(
260 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
261 'post url', default=self._LOGIN_URL, group='url'))
262
263 response, urlh = self._download_webpage_handle(
264 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
265 headers={'Referer': self._LOGIN_URL})
266
3d2623a8 267 if self._LOGIN_URL in urlh.url:
97067db2
S
268 error = clean_html(get_element_by_class('form-message', response))
269 if error:
270 raise ExtractorError(
271 'Unable to login: %s' % error, expected=True)
272 raise ExtractorError('Unable to log in')
273
d12a1a47
S
274 class MediaSelectionError(Exception):
275 def __init__(self, id):
276 self.id = id
277
2e3fd9ec
S
278 def _extract_asx_playlist(self, connection, programme_id):
279 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
280 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
281
2e3fd9ec 282 def _extract_items(self, playlist):
e6174ee9
S
283 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
284
2e3fd9ec 285 def _extract_medias(self, media_selection):
29f7c58a 286 error = media_selection.get('result')
287 if error:
288 raise BBCCoUkIE.MediaSelectionError(error)
289 return media_selection.get('media') or []
2e3fd9ec
S
290
291 def _extract_connections(self, media):
29f7c58a 292 return media.get('connection') or []
2e3fd9ec 293
f13b1e7d 294 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
295 subtitles = {}
296 for connection in self._extract_connections(media):
f0228f56
S
297 cc_url = url_or_none(connection.get('href'))
298 if not cc_url:
299 continue
300 captions = self._download_xml(
301 cc_url, programme_id, 'Downloading captions', fatal=False)
f9934b96 302 if not isinstance(captions, xml.etree.ElementTree.Element):
f0228f56 303 continue
29f7c58a 304 subtitles['en'] = [
f13b1e7d
JMF
305 {
306 'url': connection.get('href'),
307 'ext': 'ttml',
308 },
f13b1e7d 309 ]
29f7c58a 310 break
2e3fd9ec 311 return subtitles
082c6c86 312
d12a1a47
S
313 def _raise_extractor_error(self, media_selection_error):
314 raise ExtractorError(
315 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
316 expected=True)
317
c056efa2 318 def _download_media_selector(self, programme_id):
d12a1a47 319 last_exception = None
c919b68f 320 formats, subtitles = [], {}
29f7c58a 321 for media_set in self._MEDIA_SETS:
d12a1a47 322 try:
c919b68f 323 fmts, subs = self._download_media_selector_url(
29f7c58a 324 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
c919b68f 325 formats.extend(fmts)
326 if subs:
327 self._merge_subtitles(subs, target=subtitles)
d12a1a47 328 except BBCCoUkIE.MediaSelectionError as e:
d781e293 329 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
330 last_exception = e
331 continue
332 self._raise_extractor_error(e)
c919b68f 333 if last_exception:
334 if formats or subtitles:
335 self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
336 else:
337 self._raise_extractor_error(last_exception)
338 return formats, subtitles
9afa1770
S
339
340 def _download_media_selector_url(self, url, programme_id=None):
29f7c58a 341 media_selection = self._download_json(
342 url, programme_id, 'Downloading media selection JSON',
9283d4ea 343 expected_status=(403, 404))
9afa1770 344 return self._process_media_selector(media_selection, programme_id)
082c6c86 345
9afa1770 346 def _process_media_selector(self, media_selection, programme_id):
082c6c86 347 formats = []
2e3fd9ec 348 subtitles = None
b0af1215 349 urls = []
2e3fd9ec 350
c056efa2
S
351 for media in self._extract_medias(media_selection):
352 kind = media.get('kind')
a7e5f274
RA
353 if kind in ('video', 'audio'):
354 bitrate = int_or_none(media.get('bitrate'))
355 encoding = media.get('encoding')
a7e5f274
RA
356 width = int_or_none(media.get('width'))
357 height = int_or_none(media.get('height'))
358 file_size = int_or_none(media.get('media_file_size'))
359 for connection in self._extract_connections(media):
b0af1215
RA
360 href = connection.get('href')
361 if href in urls:
362 continue
363 if href:
364 urls.append(href)
a7e5f274
RA
365 conn_kind = connection.get('kind')
366 protocol = connection.get('protocol')
367 supplier = connection.get('supplier')
a7e5f274
RA
368 transfer_format = connection.get('transferFormat')
369 format_id = supplier or conn_kind or protocol
a7e5f274
RA
370 # ASX playlist
371 if supplier == 'asx':
372 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
373 formats.append({
374 'url': ref,
375 'format_id': 'ref%s_%s' % (i, format_id),
376 })
377 elif transfer_format == 'dash':
378 formats.extend(self._extract_mpd_formats(
379 href, programme_id, mpd_id=format_id, fatal=False))
380 elif transfer_format == 'hls':
50e93e03 381 # TODO: let expected_status be passed into _extract_xxx_formats() instead
382 try:
383 fmts = self._extract_m3u8_formats(
384 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
385 m3u8_id=format_id, fatal=False)
386 except ExtractorError as e:
3d2623a8 387 if not (isinstance(e.exc_info[1], HTTPError)
388 and e.exc_info[1].status in (403, 404)):
50e93e03 389 raise
390 fmts = []
391 formats.extend(fmts)
a7e5f274
RA
392 elif transfer_format == 'hds':
393 formats.extend(self._extract_f4m_formats(
394 href, programme_id, f4m_id=format_id, fatal=False))
395 else:
29f7c58a 396 if not supplier and bitrate:
aaa42cf0 397 format_id += '-%d' % bitrate
a7e5f274
RA
398 fmt = {
399 'format_id': format_id,
400 'filesize': file_size,
401 }
402 if kind == 'video':
403 fmt.update({
404 'width': width,
405 'height': height,
6240925b 406 'tbr': bitrate,
a7e5f274
RA
407 'vcodec': encoding,
408 })
409 else:
410 fmt.update({
411 'abr': bitrate,
412 'acodec': encoding,
413 'vcodec': 'none',
414 })
1af959ef 415 if protocol in ('http', 'https'):
a7e5f274
RA
416 # Direct link
417 fmt.update({
418 'url': href,
419 })
420 elif protocol == 'rtmp':
421 application = connection.get('application', 'ondemand')
422 auth_string = connection.get('authString')
423 identifier = connection.get('identifier')
424 server = connection.get('server')
425 fmt.update({
426 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
427 'play_path': identifier,
428 'app': '%s?%s' % (application, auth_string),
429 'page_url': 'http://www.bbc.co.uk',
430 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
431 'rtmp_live': False,
432 'ext': 'flv',
433 })
964744af
S
434 else:
435 continue
a7e5f274 436 formats.append(fmt)
c056efa2 437 elif kind == 'captions':
f13b1e7d 438 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 439 return formats, subtitles
2e3fd9ec 440
ae6986fb
S
441 def _download_playlist(self, playlist_id):
442 try:
443 playlist = self._download_json(
444 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
445 playlist_id, 'Downloading playlist JSON')
c45b8741 446 formats = []
447 subtitles = {}
ae6986fb 448
c45b8741 449 for version in playlist.get('allAvailableVersions', []):
ae6986fb
S
450 smp_config = version['smpConfig']
451 title = smp_config['title']
452 description = smp_config['summary']
453 for item in smp_config['items']:
454 kind = item['kind']
40fcba5e 455 if kind not in ('programme', 'radioProgramme'):
ae6986fb
S
456 continue
457 programme_id = item.get('vpid')
d97f5cd7 458 duration = int_or_none(item.get('duration'))
c45b8741 459 version_formats, version_subtitles = self._download_media_selector(programme_id)
460 types = version['types']
461 for f in version_formats:
462 f['format_note'] = ', '.join(types)
463 if any('AudioDescribed' in x for x in types):
464 f['language_preference'] = -10
465 formats += version_formats
466 for tag, subformats in (version_subtitles or {}).items():
f304da8a 467 subtitles.setdefault(tag, []).extend(subformats)
c45b8741 468
469 return programme_id, title, description, duration, formats, subtitles
ae6986fb 470 except ExtractorError as ee:
3d2623a8 471 if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
ae6986fb
S
472 raise
473
474 # fallback to legacy playlist
9afa1770
S
475 return self._process_legacy_playlist(playlist_id)
476
477 def _process_legacy_playlist_url(self, url, display_id):
478 playlist = self._download_legacy_playlist_url(url, display_id)
479 return self._extract_from_legacy_playlist(playlist, display_id)
480
481 def _process_legacy_playlist(self, playlist_id):
482 return self._process_legacy_playlist_url(
483 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
484
485 def _download_legacy_playlist_url(self, url, playlist_id=None):
486 return self._download_xml(
487 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 488
9afa1770 489 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 490 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
491 if no_items is not None:
492 reason = no_items.get('reason')
493 if reason == 'preAvailability':
494 msg = 'Episode %s is not yet available' % playlist_id
495 elif reason == 'postAvailability':
496 msg = 'Episode %s is no longer available' % playlist_id
497 elif reason == 'noMedia':
498 msg = 'Episode %s is not currently available' % playlist_id
499 else:
500 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
501 raise ExtractorError(msg, expected=True)
502
503 for item in self._extract_items(playlist):
504 kind = item.get('kind')
40fcba5e 505 if kind not in ('programme', 'radioProgramme'):
ae6986fb 506 continue
e6174ee9
S
507 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
508 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 509 description = description_el.text if description_el is not None else None
9afa1770
S
510
511 def get_programme_id(item):
512 def get_from_attributes(item):
32759325 513 for p in ('identifier', 'group'):
9afa1770
S
514 value = item.get(p)
515 if value and re.match(r'^[pb][\da-z]{7}$', value):
516 return value
517 get_from_attributes(item)
e6174ee9 518 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
519 if mediator is not None:
520 return get_from_attributes(mediator)
521
522 programme_id = get_programme_id(item)
d97f5cd7 523 duration = int_or_none(item.get('duration'))
e6174ee9
S
524
525 if programme_id:
526 formats, subtitles = self._download_media_selector(programme_id)
527 else:
528 formats, subtitles = self._process_media_selector(item, playlist_id)
529 programme_id = playlist_id
ae6986fb
S
530
531 return programme_id, title, description, duration, formats, subtitles
532
c056efa2
S
533 def _real_extract(self, url):
534 group_id = self._match_id(url)
535
536 webpage = self._download_webpage(url, group_id, 'Downloading video page')
537
b2ed954f 538 error = self._search_regex(
29f7c58a 539 r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
b2ed954f
S
540 webpage, 'error', default=None)
541 if error:
542 raise ExtractorError(error, expected=True)
543
8683b4d8 544 programme_id = None
679bacf0 545 duration = None
8683b4d8
S
546
547 tviplayer = self._search_regex(
548 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
549 webpage, 'player', default=None)
550
551 if tviplayer:
552 player = self._parse_json(tviplayer, group_id).get('player', {})
553 duration = int_or_none(player.get('duration'))
554 programme_id = player.get('vpid')
555
556 if not programme_id:
557 programme_id = self._search_regex(
22d7368d 558 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 559
c056efa2 560 if programme_id:
c056efa2 561 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 562 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
563 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
564 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 565 description = self._search_regex(
a8534274
S
566 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
567 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
568 webpage, 'description', default=None)
569 if not description:
570 description = self._html_search_meta('description', webpage)
c056efa2 571 else:
ae6986fb 572 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 573
082c6c86 574 return {
2e3fd9ec 575 'id': programme_id,
082c6c86
S
576 'title': title,
577 'description': description,
650cfd0c 578 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
579 'duration': duration,
580 'formats': formats,
2e3fd9ec 581 'subtitles': subtitles,
5f6a1245 582 }
10273d6e 583
584
6368e2e6 585class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
9afa1770
S
586 IE_NAME = 'bbc'
587 IE_DESC = 'BBC'
ed13a772 588 _VALID_URL = r'''(?x)
589 https?://(?:www\.)?(?:
590 bbc\.(?:com|co\.uk)|
591 bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
592 bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
593 )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
10273d6e 594
29f7c58a 595 _MEDIA_SETS = [
29f7c58a 596 'pc',
2d997542 597 'mobile-tablet-main',
d12a1a47 598 ]
10273d6e 599
600 _TESTS = [{
6a747190 601 # article with multiple videos embedded with data-playable containing vpids
10273d6e 602 'url': 'http://www.bbc.com/news/world-europe-32668511',
603 'info_dict': {
604 'id': 'world-europe-32668511',
acc86c9a 605 'title': 'Russia stages massive WW2 parade',
9afa1770 606 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 607 },
608 'playlist_count': 2,
a3bfddfa 609 }, {
6a747190 610 # article with multiple videos embedded with data-playable (more videos)
10273d6e 611 'url': 'http://www.bbc.com/news/business-28299555',
612 'info_dict': {
613 'id': 'business-28299555',
614 'title': 'Farnborough Airshow: Video highlights',
9afa1770 615 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 616 },
617 'playlist_count': 9,
9afa1770 618 'skip': 'Save time',
88ed52ae
S
619 }, {
620 # article with multiple videos embedded with `new SMP()`
6a747190 621 # broken
88ed52ae
S
622 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
623 'info_dict': {
624 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 625 'title': 'BUGGER',
88ed52ae
S
626 },
627 'playlist_count': 18,
a3bfddfa 628 }, {
6a747190 629 # single video embedded with data-playable containing vpid
10273d6e 630 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 631 'info_dict': {
632 'id': 'p02mprgb',
55ebae26 633 'ext': 'mp4',
10273d6e 634 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 635 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 636 'duration': 47,
9afa1770 637 'timestamp': 1427219242,
da92eeae 638 'upload_date': '20150324',
10273d6e 639 },
640 'params': {
9afa1770 641 # rtmp download
10273d6e 642 'skip_download': True,
643 }
a3bfddfa 644 }, {
6a747190
S
645 # article with single video embedded with data-playable containing XML playlist
646 # with direct video links as progressiveDownloadUrl (for now these are extracted)
647 # and playlist with f4m and m3u8 as streamingUrl
de939d89 648 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 649 'info_dict': {
9afa1770 650 'id': '150615_telabyad_kentin_cogu',
de939d89 651 'ext': 'mp4',
ad152e2d 652 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
05087d1b 653 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 654 'timestamp': 1434397334,
da92eeae 655 'upload_date': '20150615',
de939d89 656 },
657 'params': {
658 'skip_download': True,
659 }
c936d8cc 660 }, {
6a747190 661 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 662 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 663 'info_dict': {
9afa1770 664 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 665 'ext': 'mp4',
9afa1770 666 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 667 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 668 'timestamp': 1434713142,
da92eeae 669 'upload_date': '20150619',
de939d89 670 },
671 'params': {
672 'skip_download': True,
673 }
a346b1ff
S
674 }, {
675 # single video from video playlist embedded with vxp-playlist-data JSON
676 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
677 'info_dict': {
678 'id': 'p02w6qjc',
55ebae26 679 'ext': 'mp4',
a346b1ff
S
680 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
681 'duration': 56,
0bc4ee60 682 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
683 },
684 'params': {
685 'skip_download': True,
686 }
9afa1770
S
687 }, {
688 # single video story with digitalData
689 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
690 'info_dict': {
691 'id': 'p02q6gc4',
692 'ext': 'flv',
693 'title': 'Sri Lanka’s spicy secret',
694 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
695 'timestamp': 1437674293,
696 'upload_date': '20150723',
697 },
698 'params': {
699 # rtmp download
700 'skip_download': True,
701 }
702 }, {
703 # single video story without digitalData
704 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
705 'info_dict': {
706 'id': 'p018zqqg',
55ebae26 707 'ext': 'mp4',
9afa1770
S
708 'title': 'Hyundai Santa Fe Sport: Rock star',
709 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
710 'timestamp': 1415867444,
711 'upload_date': '20141113',
9afa1770
S
712 },
713 'params': {
714 # rtmp download
715 'skip_download': True,
716 }
9fb64c04
S
717 }, {
718 # single video embedded with Morph
719 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
720 'info_dict': {
721 'id': 'p041vhd0',
722 'ext': 'mp4',
723 'title': "Nigeria v Japan - Men's First Round",
724 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
725 'duration': 7980,
726 'uploader': 'BBC Sport',
727 'uploader_id': 'bbc_sport',
728 },
729 'params': {
730 # m3u8 download
731 'skip_download': True,
9fb64c04
S
732 },
733 'skip': 'Georestricted to UK',
9afa1770 734 }, {
6a747190 735 # single video with playlist.sxml URL in playlist param
9afa1770
S
736 'url': 'http://www.bbc.com/sport/0/football/33653409',
737 'info_dict': {
738 'id': 'p02xycnp',
55ebae26 739 'ext': 'mp4',
9afa1770 740 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 741 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
742 'duration': 140,
743 },
744 'params': {
745 # rtmp download
746 'skip_download': True,
747 }
b5d48cb1 748 }, {
6a747190 749 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
750 'url': 'http://www.bbc.com/sport/0/football/34475836',
751 'info_dict': {
752 'id': '34475836',
450b233c 753 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 754 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
755 },
756 'playlist_count': 3,
450b233c
S
757 }, {
758 # school report article with single video
759 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
760 'info_dict': {
761 'id': '35744779',
762 'title': 'School which breaks down barriers in Jerusalem',
763 },
764 'playlist_count': 1,
9afa1770
S
765 }, {
766 # single video with playlist URL from weather section
767 'url': 'http://www.bbc.com/weather/features/33601775',
768 'only_matching': True,
769 }, {
770 # custom redirection to www.bbc.com
1bdae7d3 771 # also, video with window.__INITIAL_DATA__
9afa1770 772 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
1bdae7d3 773 'info_dict': {
774 'id': 'p02xzws1',
775 'ext': 'mp4',
776 'title': "Pluto may have 'nitrogen glaciers'",
777 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
778 'thumbnail': r're:https?://.+/.+\.jpg',
779 'timestamp': 1437785037,
780 'upload_date': '20150725',
781 },
50e93e03 782 }, {
783 # video with window.__INITIAL_DATA__ and value as JSON string
784 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
785 'info_dict': {
786 'id': 'p0b71qth',
787 'ext': 'mp4',
788 'title': 'Why France is making this woman a national hero',
789 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
790 'thumbnail': r're:https?://.+/.+\.jpg',
791 'timestamp': 1638230731,
792 'upload_date': '20211130',
793 },
a1cf3e38
S
794 }, {
795 # single video article embedded with data-media-vpid
796 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
797 'only_matching': True,
6d155707 798 }, {
50e93e03 799 # bbcthreeConfig
6d155707
S
800 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
801 'info_dict': {
802 'id': 'p06556y7',
803 'ext': 'mp4',
50e93e03 804 'title': 'Things Not To Say to people that live on council estates',
805 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
806 'duration': 360,
807 'thumbnail': r're:https?://.+/.+\.jpg',
6d155707 808 },
b96b4be4
RA
809 }, {
810 # window.__PRELOADED_STATE__
811 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
812 'info_dict': {
813 'id': 'b0b9z4vz',
814 'ext': 'mp4',
815 'title': 'Prom 6: An American in Paris and Turangalila',
816 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
817 'uploader': 'Radio 3',
818 'uploader_id': 'bbc_radio_three',
819 },
373941c5
S
820 }, {
821 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
822 'info_dict': {
823 'id': 'p06w9tws',
824 'ext': 'mp4',
825 'title': 'md5:2fabf12a726603193a2879a055f72514',
826 'description': 'Learn English words and phrases from this story',
827 },
828 'add_ie': [BBCCoUkIE.ie_key()],
3721515b 829 }, {
830 # BBC Reel
831 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
832 'info_dict': {
833 'id': 'p07c6sb9',
834 'ext': 'mp4',
835 'title': 'How positive thinking is harming your happiness',
836 'alt_title': 'The downsides of positive thinking',
837 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
838 'duration': 235,
839 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
840 'upload_date': '20190604',
841 'categories': ['Psychology'],
842 },
eda0e415 843 }, {
844 # BBC Sounds
845 'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
846 'info_dict': {
847 'id': 'm001q789',
848 'ext': 'mp4',
849 'title': 'The Night Tracks Mix - Music for the darkling hour',
850 'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
851 'chapters': 'count:8',
852 'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
853 'uploader': 'Radio 3',
854 'duration': 1800,
855 'uploader_id': 'bbc_radio_three',
856 },
ed13a772 857 }, { # onion routes
858 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
859 'only_matching': True,
860 }, {
861 'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
862 'only_matching': True,
10273d6e 863 }]
864
9afa1770
S
865 @classmethod
866 def suitable(cls, url):
1418a043 867 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
ded7511a
S
868 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
869 else super(BBCIE, cls).suitable(url))
9afa1770
S
870
871 def _extract_from_media_meta(self, media_meta, video_id):
872 # Direct links to media in media metadata (e.g.
873 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
874 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
875 source_files = media_meta.get('sourceFiles')
876 if source_files:
877 return [{
878 'url': f['url'],
879 'format_id': format_id,
880 'ext': f.get('encoding'),
881 'tbr': float_or_none(f.get('bitrate'), 1000),
882 'filesize': int_or_none(f.get('filesize')),
883 } for format_id, f in source_files.items() if f.get('url')], []
884
885 programme_id = media_meta.get('externalId')
886 if programme_id:
887 return self._download_media_selector(programme_id)
888
889 # Process playlist.sxml as legacy playlist
890 href = media_meta.get('href')
891 if href:
892 playlist = self._download_legacy_playlist_url(href)
893 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
894 return formats, subtitles
895
896 return [], []
897
baf39a1a
S
898 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
899 programme_id, title, description, duration, formats, subtitles = \
900 self._process_legacy_playlist_url(url, playlist_id)
baf39a1a
S
901 return {
902 'id': programme_id,
903 'title': title,
904 'description': description,
905 'duration': duration,
906 'timestamp': timestamp,
907 'formats': formats,
908 'subtitles': subtitles,
909 }
910
10273d6e 911 def _real_extract(self, url):
9afa1770
S
912 playlist_id = self._match_id(url)
913
914 webpage = self._download_webpage(url, playlist_id)
915
522f6c06 916 json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
350e02d4 917 timestamp = json_ld_info.get('timestamp')
0e832c2c 918
62b8dac4 919 playlist_title = json_ld_info.get('title') or re.sub(
920 r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
0e832c2c
S
921
922 playlist_description = json_ld_info.get(
923 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
924
925 if not timestamp:
926 timestamp = parse_iso8601(self._search_regex(
927 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
928 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 929 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 930 webpage, 'date', default=None))
9afa1770 931
78f9d843
S
932 entries = []
933
de665713
S
934 # article with multiple videos embedded with playlist.sxml (e.g.
935 # http://www.bbc.com/sport/0/football/34475836)
936 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 937 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 938 if playlists:
baf39a1a
S
939 entries = [
940 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
941 for playlist_url in playlists]
de939d89 942
78f9d843
S
943 # news article with multiple videos embedded with data-playable
944 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
945 if data_playables:
946 for _, data_playable_json in data_playables:
947 data_playable = self._parse_json(
948 unescapeHTML(data_playable_json), playlist_id, fatal=False)
949 if not data_playable:
950 continue
baf39a1a
S
951 settings = data_playable.get('settings', {})
952 if settings:
78f9d843
S
953 # data-playable with video vpid in settings.playlistObject.items (e.g.
954 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
955 playlist_object = settings.get('playlistObject', {})
956 if playlist_object:
957 items = playlist_object.get('items')
958 if items and isinstance(items, list):
78f9d843
S
959 title = playlist_object['title']
960 description = playlist_object.get('summary')
baf39a1a
S
961 duration = int_or_none(items[0].get('duration'))
962 programme_id = items[0].get('vpid')
78f9d843 963 formats, subtitles = self._download_media_selector(programme_id)
78f9d843
S
964 entries.append({
965 'id': programme_id,
966 'title': title,
967 'description': description,
968 'timestamp': timestamp,
969 'duration': duration,
970 'formats': formats,
971 'subtitles': subtitles,
972 })
973 else:
974 # data-playable without vpid but with a playlist.sxml URLs
975 # in otherSettings.playlist (e.g.
976 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
977 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
978 if playlist:
a7e5f274
RA
979 entry = None
980 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
981 playlist_url = playlist.get('%sUrl' % key)
982 if not playlist_url:
983 continue
984 try:
a7e5f274
RA
985 info = self._extract_from_playlist_sxml(
986 playlist_url, playlist_id, timestamp)
987 if not entry:
988 entry = info
989 else:
990 entry['title'] = info['title']
991 entry['formats'].extend(info['formats'])
3721515b 992 except ExtractorError as e:
05087d1b
S
993 # Some playlist URL may fail with 500, at the same time
994 # the other one may work fine (e.g.
995 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
3d2623a8 996 if isinstance(e.cause, HTTPError) and e.cause.status == 500:
05087d1b
S
997 continue
998 raise
a7e5f274 999 if entry:
a7e5f274 1000 entries.append(entry)
78f9d843
S
1001
1002 if entries:
78f9d843
S
1003 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1004
373941c5
S
1005 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
1006 group_id = self._search_regex(
1007 r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
1008 webpage, 'group id', default=None)
38d70284 1009 if group_id:
373941c5
S
1010 return self.url_result(
1011 'https://www.bbc.co.uk/programmes/%s' % group_id,
1012 ie=BBCCoUkIE.ie_key())
1013
78f9d843
S
1014 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1015 programme_id = self._search_regex(
a1cf3e38 1016 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
1017 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
1018 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 1019 webpage, 'vpid', default=None)
dab062fb 1020
9afa1770
S
1021 if programme_id:
1022 formats, subtitles = self._download_media_selector(programme_id)
9afa1770
S
1023 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1024 digital_data = self._parse_json(
1025 self._search_regex(
1026 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1027 programme_id, fatal=False)
1028 page_info = digital_data.get('page', {}).get('pageInfo', {})
1029 title = page_info.get('pageName') or self._og_search_title(webpage)
1030 description = page_info.get('description') or self._og_search_description(webpage)
1031 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1032 return {
1033 'id': programme_id,
1034 'title': title,
1035 'description': description,
1036 'timestamp': timestamp,
1037 'formats': formats,
1038 'subtitles': subtitles,
1039 }
a3bfddfa 1040
3721515b 1041 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1042 initial_data = self._parse_json(self._html_search_regex(
1043 r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1044 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1045 if initial_data:
1046 init_data = try_get(
1047 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1048 smp_data = init_data.get('smpData') or {}
1049 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1050 version_id = clip_data.get('versionID')
1051 if version_id:
1052 title = smp_data['title']
1053 formats, subtitles = self._download_media_selector(version_id)
3721515b 1054 image_url = smp_data.get('holdingImageURL')
1055 display_date = init_data.get('displayDate')
1056 topic_title = init_data.get('topicTitle')
1057
1058 return {
1059 'id': version_id,
1060 'title': title,
1061 'formats': formats,
1062 'alt_title': init_data.get('shortTitle'),
1063 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1064 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1065 'upload_date': display_date.replace('-', '') if display_date else None,
1066 'subtitles': subtitles,
1067 'duration': int_or_none(clip_data.get('duration')),
1068 'categories': [topic_title] if topic_title else None,
1069 }
1070
9fb64c04
S
1071 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1072 # There are several setPayload calls may be present but the video
1073 # seems to be always related to the first one
1074 morph_payload = self._parse_json(
1075 self._search_regex(
1076 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1077 webpage, 'morph payload', default='{}'),
1078 playlist_id, fatal=False)
1079 if morph_payload:
1080 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1081 for component in components:
1082 if not isinstance(component, dict):
1083 continue
1084 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1085 if not lead_media:
1086 continue
1087 identifiers = lead_media.get('identifiers')
1088 if not identifiers or not isinstance(identifiers, dict):
1089 continue
1090 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1091 if not programme_id:
1092 continue
1093 title = lead_media.get('title') or self._og_search_title(webpage)
1094 formats, subtitles = self._download_media_selector(programme_id)
9fb64c04
S
1095 description = lead_media.get('summary')
1096 uploader = lead_media.get('masterBrand')
1097 uploader_id = lead_media.get('mid')
1098 duration = None
1099 duration_d = lead_media.get('duration')
1100 if isinstance(duration_d, dict):
1101 duration = parse_duration(dict_get(
1102 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1103 return {
1104 'id': programme_id,
1105 'title': title,
1106 'description': description,
1107 'duration': duration,
1108 'uploader': uploader,
1109 'uploader_id': uploader_id,
1110 'formats': formats,
1111 'subtitles': subtitles,
1112 }
1113
b96b4be4
RA
1114 preload_state = self._parse_json(self._search_regex(
1115 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1116 'preload state', default='{}'), playlist_id, fatal=False)
1117 if preload_state:
1118 current_programme = preload_state.get('programmes', {}).get('current') or {}
1119 programme_id = current_programme.get('id')
1120 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1121 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1122 formats, subtitles = self._download_media_selector(programme_id)
b96b4be4
RA
1123 synopses = current_programme.get('synopses') or {}
1124 network = current_programme.get('network') or {}
1125 duration = int_or_none(
1126 current_programme.get('duration', {}).get('value'))
1127 thumbnail = None
1128 image_url = current_programme.get('image_url')
1129 if image_url:
3721515b 1130 thumbnail = image_url.replace('{recipe}', 'raw')
b96b4be4
RA
1131 return {
1132 'id': programme_id,
1133 'title': title,
1134 'description': dict_get(synopses, ('long', 'medium', 'short')),
1135 'thumbnail': thumbnail,
1136 'duration': duration,
1137 'uploader': network.get('short_title'),
1138 'uploader_id': network.get('id'),
1139 'formats': formats,
1140 'subtitles': subtitles,
eda0e415 1141 'chapters': traverse_obj(preload_state, (
1142 'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
1143 'title': ('titles', {lambda x: join_nonempty(
1144 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
1145 'start_time': ('offset', 'start', {float_or_none}),
1146 'end_time': ('offset', 'end', {float_or_none}),
1147 })) or None,
b96b4be4
RA
1148 }
1149
6d155707
S
1150 bbc3_config = self._parse_json(
1151 self._search_regex(
1152 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1153 'bbcthree config', default='{}'),
38d70284 1154 playlist_id, transform_source=js_to_json, fatal=False) or {}
1155 payload = bbc3_config.get('payload') or {}
1156 if payload:
1157 clip = payload.get('currentClip') or {}
1158 clip_vpid = clip.get('vpid')
1159 clip_title = clip.get('title')
1160 if clip_vpid and clip_title:
1161 formats, subtitles = self._download_media_selector(clip_vpid)
38d70284 1162 return {
1163 'id': clip_vpid,
1164 'title': clip_title,
1165 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1166 'description': clip.get('description'),
1167 'duration': parse_duration(clip.get('duration')),
1168 'formats': formats,
1169 'subtitles': subtitles,
1170 }
6d155707 1171 bbc3_playlist = try_get(
38d70284 1172 payload, lambda x: x['content']['bbcMedia']['playlist'],
6d155707
S
1173 dict)
1174 if bbc3_playlist:
1175 playlist_title = bbc3_playlist.get('title') or playlist_title
1176 thumbnail = bbc3_playlist.get('holdingImageURL')
1177 entries = []
1178 for bbc3_item in bbc3_playlist['items']:
1179 programme_id = bbc3_item.get('versionID')
1180 if not programme_id:
1181 continue
1182 formats, subtitles = self._download_media_selector(programme_id)
6d155707
S
1183 entries.append({
1184 'id': programme_id,
1185 'title': playlist_title,
1186 'thumbnail': thumbnail,
1187 'timestamp': timestamp,
1188 'formats': formats,
1189 'subtitles': subtitles,
1190 })
1191 return self.playlist_result(
1192 entries, playlist_id, playlist_title, playlist_description)
1193
50e93e03 1194 initial_data = self._search_regex(
1195 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1196 'quoted preload state', default=None)
1197 if initial_data is None:
1198 initial_data = self._search_regex(
1199 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
19741ab8 1200 'preload state', default='{}')
50e93e03 1201 else:
1202 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1203 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
38d70284 1204 if initial_data:
1205 def parse_media(media):
1206 if not media:
1207 return
1208 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1209 item_id = item.get('id')
1210 item_title = item.get('title')
1211 if not (item_id and item_title):
1212 continue
1213 formats, subtitles = self._download_media_selector(item_id)
1bdae7d3 1214 item_desc = None
1215 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1216 if blocks:
1217 summary = []
1218 for block in blocks:
1219 text = try_get(block, lambda x: x['model']['text'], compat_str)
1220 if text:
1221 summary.append(text)
1222 if summary:
1223 item_desc = '\n\n'.join(summary)
1224 item_time = None
1225 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1226 if try_get(meta, lambda x: x['label']) == 'Published':
1227 item_time = unified_timestamp(meta.get('timestamp'))
1228 break
38d70284 1229 entries.append({
1230 'id': item_id,
1231 'title': item_title,
1232 'thumbnail': item.get('holdingImageUrl'),
1233 'formats': formats,
1234 'subtitles': subtitles,
1bdae7d3 1235 'timestamp': item_time,
1236 'description': strip_or_none(item_desc),
38d70284 1237 })
1238 for resp in (initial_data.get('data') or {}).values():
1239 name = resp.get('name')
1240 if name == 'media-experience':
1241 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1242 elif name == 'article':
50e93e03 1243 for block in (try_get(resp,
1244 (lambda x: x['data']['blocks'],
1245 lambda x: x['data']['content']['model']['blocks'],),
1246 list) or []):
edebb651 1247 if block.get('type') not in ['media', 'video']:
38d70284 1248 continue
1249 parse_media(block.get('model'))
1250 return self.playlist_result(
1251 entries, playlist_id, playlist_title, playlist_description)
1252
88ed52ae
S
1253 def extract_all(pattern):
1254 return list(filter(None, map(
1255 lambda s: self._parse_json(s, playlist_id, fatal=False),
1256 re.findall(pattern, webpage))))
1257
1258 # Multiple video article (e.g.
1259 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 1260 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
1261 entries = []
1262 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1263 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1264 if embed_url and re.match(EMBED_URL, embed_url):
1265 entries.append(embed_url)
1266 entries.extend(re.findall(
1267 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
1268 if entries:
1269 return self.playlist_result(
aaa42cf0 1270 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
88ed52ae 1271 playlist_id, playlist_title, playlist_description)
9afa1770
S
1272
1273 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 1274 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
1275
1276 if not medias:
1277 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
1278 media_asset = self._search_regex(
1279 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1280 webpage, 'media asset', default=None)
1281 if media_asset:
1282 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1283 medias = []
1284 for video in media_asset_page.get('videos', {}).values():
1285 medias.extend(video.values())
1286
1287 if not medias:
1288 # Multiple video playlist with single `now playing` entry (e.g.
1289 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1290 vxp_playlist = self._parse_json(
9afa1770 1291 self._search_regex(
a346b1ff
S
1292 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1293 webpage, 'playlist data'),
9afa1770 1294 playlist_id)
a346b1ff
S
1295 playlist_medias = []
1296 for item in vxp_playlist:
1297 media = item.get('media')
1298 if not media:
1299 continue
1300 playlist_medias.append(media)
1301 # Download single video if found media with asset id matching the video id from URL
1302 if item.get('advert', {}).get('assetId') == playlist_id:
1303 medias = [media]
1304 break
1305 # Fallback to the whole playlist
1306 if not medias:
1307 medias = playlist_medias
9afa1770
S
1308
1309 entries = []
1310 for num, media_meta in enumerate(medias, start=1):
1311 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
a06916d9 1312 if not formats and not self.get_param('ignore_no_formats'):
9afa1770 1313 continue
10273d6e 1314
9afa1770
S
1315 video_id = media_meta.get('externalId')
1316 if not video_id:
1317 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1318
1319 title = media_meta.get('caption')
1320 if not title:
1321 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1322
1323 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 1324
9afa1770
S
1325 images = []
1326 for image in media_meta.get('images', {}).values():
1327 images.extend(image.values())
1328 if 'image' in media_meta:
1329 images.append(media_meta['image'])
1330
1331 thumbnails = [{
1332 'url': image.get('href'),
1333 'width': int_or_none(image.get('width')),
1334 'height': int_or_none(image.get('height')),
1335 } for image in images]
1336
1337 entries.append({
1338 'id': video_id,
10273d6e 1339 'title': title,
9afa1770 1340 'thumbnails': thumbnails,
10273d6e 1341 'duration': duration,
9afa1770 1342 'timestamp': timestamp,
10273d6e 1343 'formats': formats,
1344 'subtitles': subtitles,
a3bfddfa 1345 })
10273d6e 1346
9afa1770 1347 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
1348
1349
1350class BBCCoUkArticleIE(InfoExtractor):
92519402 1351 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
1352 IE_NAME = 'bbc.co.uk:article'
1353 IE_DESC = 'BBC articles'
1354
1355 _TEST = {
1356 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1357 'info_dict': {
1358 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1359 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1360 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1361 },
1362 'playlist_count': 4,
1363 'add_ie': ['BBCCoUk'],
1364 }
1365
1366 def _real_extract(self, url):
1367 playlist_id = self._match_id(url)
1368
1369 webpage = self._download_webpage(url, playlist_id)
1370
1371 title = self._og_search_title(webpage)
1372 description = self._og_search_description(webpage).strip()
1373
1374 entries = [self.url_result(programme_url) for programme_url in re.findall(
1375 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1376
1377 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
1378
1379
1380class BBCCoUkPlaylistBaseIE(InfoExtractor):
254e64a2
S
1381 def _entries(self, webpage, url, playlist_id):
1382 single_page = 'page' in compat_urlparse.parse_qs(
1383 compat_urlparse.urlparse(url).query)
1384 for page_num in itertools.count(2):
1385 for video_id in re.findall(
1386 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1387 yield self.url_result(
1388 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1389 if single_page:
1390 return
1391 next_page = self._search_regex(
1392 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1393 webpage, 'next page url', default=None, group='url')
1394 if not next_page:
1395 break
1396 webpage = self._download_webpage(
1397 compat_urlparse.urljoin(url, next_page), playlist_id,
1398 'Downloading page %d' % page_num, page_num)
1399
ded7511a
S
1400 def _real_extract(self, url):
1401 playlist_id = self._match_id(url)
1402
1403 webpage = self._download_webpage(url, playlist_id)
1404
ded7511a
S
1405 title, description = self._extract_title_and_description(webpage)
1406
254e64a2
S
1407 return self.playlist_result(
1408 self._entries(webpage, url, playlist_id),
1409 playlist_id, title, description)
ded7511a
S
1410
1411
1418a043 1412class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1413 _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1414
1415 @staticmethod
1416 def _get_default(episode, key, default_key='default'):
1417 return try_get(episode, lambda x: x[key][default_key])
1418
1419 def _get_description(self, data):
1420 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1421 return dict_get(synopsis, ('large', 'medium', 'small'))
1422
1423 def _fetch_page(self, programme_id, per_page, series_id, page):
1424 elements = self._get_elements(self._call_api(
1425 programme_id, per_page, page + 1, series_id))
1426 for element in elements:
1427 episode = self._get_episode(element)
1428 episode_id = episode.get('id')
1429 if not episode_id:
1430 continue
1431 thumbnail = None
1432 image = self._get_episode_image(episode)
1433 if image:
1434 thumbnail = image.replace('{recipe}', 'raw')
1435 category = self._get_default(episode, 'labels', 'category')
1436 yield {
1437 '_type': 'url',
1438 'id': episode_id,
1439 'title': self._get_episode_field(episode, 'subtitle'),
1440 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1441 'thumbnail': thumbnail,
1442 'description': self._get_description(episode),
1443 'categories': [category] if category else None,
1444 'series': self._get_episode_field(episode, 'title'),
1445 'ie_key': BBCCoUkIE.ie_key(),
1446 }
1447
1448 def _real_extract(self, url):
1449 pid = self._match_id(url)
4dfbf869 1450 qs = parse_qs(url)
1418a043 1451 series_id = qs.get('seriesId', [None])[0]
1452 page = qs.get('page', [None])[0]
1453 per_page = 36 if page else self._PAGE_SIZE
1454 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1455 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1456 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1457 return self.playlist_result(
1458 entries, pid, self._get_playlist_title(playlist_data),
1459 self._get_description(playlist_data))
1460
1461
1462class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1463 IE_NAME = 'bbc.co.uk:iplayer:episodes'
1464 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
9158af16 1465 _TESTS = [{
ded7511a
S
1466 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1467 'info_dict': {
1468 'id': 'b05rcz9v',
1469 'title': 'The Disappearance',
1418a043 1470 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
ded7511a 1471 },
1418a043 1472 'playlist_mincount': 8,
9158af16 1473 }, {
1418a043 1474 # all seasons
1475 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1476 'info_dict': {
1477 'id': 'b094m5t9',
1478 'title': 'Doctor Foster',
1479 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1480 },
1481 'playlist_mincount': 10,
1482 }, {
1483 # explicit season
1484 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1485 'info_dict': {
1486 'id': 'b094m5t9',
1487 'title': 'Doctor Foster',
1488 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1489 },
1490 'playlist_mincount': 5,
1491 }, {
1492 # all pages
1493 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1494 'info_dict': {
1495 'id': 'm0004c4v',
1496 'title': 'Beechgrove',
1497 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1498 },
1499 'playlist_mincount': 37,
1500 }, {
1501 # explicit page
1502 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1503 'info_dict': {
1504 'id': 'm0004c4v',
1505 'title': 'Beechgrove',
1506 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1507 },
1508 'playlist_mincount': 1,
1509 }]
1510 _PAGE_SIZE = 100
1511 _DESCRIPTION_KEY = 'synopsis'
1512
1513 def _get_episode_image(self, episode):
1514 return self._get_default(episode, 'image')
1515
1516 def _get_episode_field(self, episode, field):
1517 return self._get_default(episode, field)
1518
1519 @staticmethod
1520 def _get_elements(data):
1521 return data['entities']['results']
1522
1523 @staticmethod
1524 def _get_episode(element):
1525 return element.get('episode') or {}
1526
1527 def _call_api(self, pid, per_page, page=1, series_id=None):
1528 variables = {
1529 'id': pid,
1530 'page': page,
1531 'perPage': per_page,
1532 }
1533 if series_id:
1534 variables['sliceId'] = series_id
1535 return self._download_json(
1536 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1537 'Content-Type': 'application/json'
1538 }, data=json.dumps({
1539 'id': '5692d93d5aac8d796a0305e895e61551',
1540 'variables': variables,
1541 }).encode('utf-8'))['data']['programme']
1542
1543 @staticmethod
1544 def _get_playlist_data(data):
1545 return data
1546
1547 def _get_playlist_title(self, data):
1548 return self._get_default(data, 'title')
1549
1550
1551class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1552 IE_NAME = 'bbc.co.uk:iplayer:group'
1553 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1554 _TESTS = [{
9158af16
S
1555 # Available for over a year unlike 30 days for most other programmes
1556 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1557 'info_dict': {
1558 'id': 'p02tcc32',
1559 'title': 'Bohemian Icons',
1560 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1561 },
1562 'playlist_mincount': 10,
1418a043 1563 }, {
1564 # all pages
1565 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1566 'info_dict': {
1567 'id': 'p081d7j7',
1568 'title': 'Music in Scotland',
1569 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1570 },
1571 'playlist_mincount': 47,
1572 }, {
1573 # explicit page
1574 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1575 'info_dict': {
1576 'id': 'p081d7j7',
1577 'title': 'Music in Scotland',
1578 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1579 },
1580 'playlist_mincount': 11,
9158af16 1581 }]
1418a043 1582 _PAGE_SIZE = 200
1583 _DESCRIPTION_KEY = 'synopses'
1584
1585 def _get_episode_image(self, episode):
1586 return self._get_default(episode, 'images', 'standard')
1587
1588 def _get_episode_field(self, episode, field):
1589 return episode.get(field)
1590
1591 @staticmethod
1592 def _get_elements(data):
1593 return data['elements']
1594
1595 @staticmethod
1596 def _get_episode(element):
1597 return element
1598
1599 def _call_api(self, pid, per_page, page=1, series_id=None):
1600 return self._download_json(
1601 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
1602 pid, query={
1603 'page': page,
1604 'per_page': per_page,
1605 })['group_episodes']
1606
1607 @staticmethod
1608 def _get_playlist_data(data):
1609 return data['group']
ded7511a 1610
1418a043 1611 def _get_playlist_title(self, data):
1612 return data.get('title')
ded7511a
S
1613
1614
1615class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1616 IE_NAME = 'bbc.co.uk:playlist'
1617 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1618 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1619 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1620 _TESTS = [{
1621 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1622 'info_dict': {
1623 'id': 'b05rcz9v',
1624 'title': 'The Disappearance - Clips - BBC Four',
1625 'description': 'French thriller serial about a missing teenager.',
1626 },
1627 'playlist_mincount': 7,
4f640f28
S
1628 }, {
1629 # multipage playlist, explicit page
1630 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1631 'info_dict': {
1632 'id': 'b00mfl7n',
1633 'title': 'Frozen Planet - Clips - BBC One',
1634 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1635 },
1636 'playlist_mincount': 24,
1637 }, {
1638 # multipage playlist, all pages
1639 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1640 'info_dict': {
1641 'id': 'b00mfl7n',
1642 'title': 'Frozen Planet - Clips - BBC One',
1643 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1644 },
1645 'playlist_mincount': 142,
ded7511a
S
1646 }, {
1647 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1648 'only_matching': True,
1649 }, {
1650 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1651 'only_matching': True,
1652 }, {
1653 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1654 'only_matching': True,
1655 }]
1656
1657 def _extract_title_and_description(self, webpage):
1658 title = self._og_search_title(webpage, fatal=False)
1659 description = self._og_search_description(webpage)
1660 return title, description