]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bbc.py
[panopto] Add extractors (#2908)
[yt-dlp.git] / yt_dlp / extractor / bbc.py
CommitLineData
9afa1770 1# coding: utf-8
082c6c86
S
2from __future__ import unicode_literals
3
1418a043 4import functools
254e64a2 5import itertools
1418a043 6import json
f0228f56 7import re
082c6c86 8
f13b1e7d 9from .common import InfoExtractor
3721515b 10from ..compat import (
11 compat_etree_Element,
12 compat_HTTPError,
1bdae7d3 13 compat_str,
50e93e03 14 compat_urllib_error,
3721515b 15 compat_urlparse,
16)
8683b4d8 17from ..utils import (
3721515b 18 ExtractorError,
1418a043 19 OnDemandPagedList,
97067db2 20 clean_html,
9fb64c04 21 dict_get,
9afa1770 22 float_or_none,
97067db2 23 get_element_by_class,
8683b4d8 24 int_or_none,
6d155707 25 js_to_json,
9afa1770
S
26 parse_duration,
27 parse_iso8601,
4dfbf869 28 parse_qs,
1bdae7d3 29 strip_or_none,
9fb64c04 30 try_get,
dab062fb 31 unescapeHTML,
1bdae7d3 32 unified_timestamp,
f0228f56 33 url_or_none,
97067db2
S
34 urlencode_postdata,
35 urljoin,
8683b4d8 36)
082c6c86 37
d12a1a47 38
f13b1e7d 39class BBCCoUkIE(InfoExtractor):
082c6c86 40 IE_NAME = 'bbc.co.uk'
2e3fd9ec 41 IE_DESC = 'BBC iPlayer'
50e93e03 42 _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
f20a11ed
S
43 _VALID_URL = r'''(?x)
44 https?://
45 (?:www\.)?bbc\.co\.uk/
46 (?:
47 programmes/(?!articles/)|
48 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
72d256c4 49 music/(?:clips|audiovideo/popular)[/#]|
d3d45e0a 50 radio/player/|
b72305f0 51 sounds/play/|
d3d45e0a 52 events/[^/]+/play/[^/]+/
f20a11ed 53 )
ded7511a 54 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 55 ''' % _ID_REGEX
082c6c86 56
97067db2
S
57 _LOGIN_URL = 'https://account.bbc.com/signin'
58 _NETRC_MACHINE = 'bbc'
59
29f7c58a 60 _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
61 _MEDIA_SETS = [
26ccc68b
S
62 # Provides HQ HLS streams with even better quality that pc mediaset but fails
63 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 64 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
29f7c58a 65 'iptv-all',
66 'pc',
d12a1a47 67 ]
a8b081a0 68
e6174ee9
S
69 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
70
2e3fd9ec
S
71 _TESTS = [
72 {
f2d0fc68 73 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 74 'info_dict': {
f2d0fc68 75 'id': 'b039d07m',
b1ea6802 76 'ext': 'flv',
acc86c9a 77 'title': 'Kaleidoscope, Leonard Cohen',
c4914185 78 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
79 },
80 'params': {
b1ea6802 81 # rtmp download
2e3fd9ec
S
82 'skip_download': True,
83 }
082c6c86 84 },
2e3fd9ec
S
85 {
86 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
87 'info_dict': {
88 'id': 'b00yng1d',
89 'ext': 'flv',
90 'title': 'The Man in Black: Series 3: The Printed Name',
91 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
92 'duration': 1800,
93 },
94 'params': {
95 # rtmp download
96 'skip_download': True,
c7f0177f
S
97 },
98 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
99 },
100 {
101 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
102 'info_dict': {
103 'id': 'b00yng1d',
104 'ext': 'flv',
17968e44 105 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 106 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 107 'duration': 5100,
2e3fd9ec
S
108 },
109 'params': {
110 # rtmp download
111 'skip_download': True,
112 },
b1ea6802 113 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
114 },
115 {
116 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
117 'info_dict': {
118 'id': 'b03k3pb7',
119 'ext': 'flv',
120 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
121 'description': '2. Invasion',
122 'duration': 3600,
123 },
124 'params': {
125 # rtmp download
126 'skip_download': True,
127 },
b1ea6802 128 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
129 }, {
130 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
131 'info_dict': {
132 'id': 'b04v209v',
133 'ext': 'flv',
134 'title': 'Pete Tong, The Essential New Tune Special',
135 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
136 'duration': 10800,
137 },
138 'params': {
139 # rtmp download
140 'skip_download': True,
a3ef0e1c
YCH
141 },
142 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 143 }, {
5aa535c3 144 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
145 'note': 'Audio',
146 'info_dict': {
5aa535c3 147 'id': 'p022h44j',
b1ea6802 148 'ext': 'flv',
5aa535c3
S
149 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
150 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
151 'duration': 227,
c7e67594
S
152 },
153 'params': {
b1ea6802 154 # rtmp download
c7e67594
S
155 'skip_download': True,
156 }
157 }, {
158 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
159 'note': 'Video',
160 'info_dict': {
161 'id': 'p025c103',
b1ea6802 162 'ext': 'flv',
c7e67594
S
163 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
164 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
165 'duration': 226,
166 },
167 'params': {
b1ea6802 168 # rtmp download
c7e67594
S
169 'skip_download': True,
170 }
e68ae99a
S
171 }, {
172 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
173 'info_dict': {
174 'id': 'p02n76xf',
175 'ext': 'flv',
176 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
177 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
178 'duration': 3540,
179 },
180 'params': {
181 # rtmp download
182 'skip_download': True,
183 },
b1ea6802 184 'skip': 'geolocation',
25fa8d66
YCH
185 }, {
186 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
187 'info_dict': {
188 'id': 'b05zmgw1',
189 'ext': 'flv',
190 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
191 'title': 'Royal Academy Summer Exhibition',
192 'duration': 3540,
193 },
194 'params': {
195 # rtmp download
196 'skip_download': True,
197 },
b1ea6802 198 'skip': 'geolocation',
54914380
S
199 }, {
200 # iptv-all mediaset fails with geolocation however there is no geo restriction
201 # for this programme at all
5aa535c3 202 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 203 'info_dict': {
5aa535c3 204 'id': 'b06rkms3',
54914380 205 'ext': 'flv',
5aa535c3
S
206 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
207 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
208 },
209 'params': {
210 # rtmp download
211 'skip_download': True,
212 },
b1ea6802 213 'skip': 'Now it\'s really geo-restricted',
1ac6e794 214 }, {
067aa17e 215 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
1ac6e794
S
216 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
217 'info_dict': {
218 'id': 'p028bfkj',
b1ea6802 219 'ext': 'flv',
1ac6e794
S
220 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
221 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
222 },
223 'params': {
b1ea6802 224 # rtmp download
1ac6e794
S
225 'skip_download': True,
226 },
b72305f0
J
227 }, {
228 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
229 'note': 'Audio',
230 'info_dict': {
231 'id': 'm0007jz9',
232 'ext': 'mp4',
233 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
234 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
235 'duration': 9840,
236 },
237 'params': {
238 # rtmp download
239 'skip_download': True,
240 }
31763975
S
241 }, {
242 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
243 'only_matching': True,
c7e67594
S
244 }, {
245 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
246 'only_matching': True,
0692ef86
S
247 }, {
248 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
249 'only_matching': True,
f20a11ed
S
250 }, {
251 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
252 'only_matching': True,
72d256c4
S
253 }, {
254 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
255 'only_matching': True,
53647dfd
S
256 }, {
257 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
258 'only_matching': True,
6f356cbb
S
259 }, {
260 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
261 'only_matching': True,
262 }, {
263 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
264 'only_matching': True,
72d256c4 265 }]
2e3fd9ec 266
97067db2
S
267 def _login(self):
268 username, password = self._get_login_info()
269 if username is None:
270 return
271
272 login_page = self._download_webpage(
273 self._LOGIN_URL, None, 'Downloading signin page')
274
275 login_form = self._hidden_inputs(login_page)
276
277 login_form.update({
278 'username': username,
279 'password': password,
280 })
281
282 post_url = urljoin(self._LOGIN_URL, self._search_regex(
283 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
284 'post url', default=self._LOGIN_URL, group='url'))
285
286 response, urlh = self._download_webpage_handle(
287 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
288 headers={'Referer': self._LOGIN_URL})
289
290 if self._LOGIN_URL in urlh.geturl():
291 error = clean_html(get_element_by_class('form-message', response))
292 if error:
293 raise ExtractorError(
294 'Unable to login: %s' % error, expected=True)
295 raise ExtractorError('Unable to log in')
296
297 def _real_initialize(self):
298 self._login()
299
d12a1a47
S
300 class MediaSelectionError(Exception):
301 def __init__(self, id):
302 self.id = id
303
2e3fd9ec
S
304 def _extract_asx_playlist(self, connection, programme_id):
305 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
306 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
307
2e3fd9ec 308 def _extract_items(self, playlist):
e6174ee9
S
309 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
310
2e3fd9ec 311 def _extract_medias(self, media_selection):
29f7c58a 312 error = media_selection.get('result')
313 if error:
314 raise BBCCoUkIE.MediaSelectionError(error)
315 return media_selection.get('media') or []
2e3fd9ec
S
316
317 def _extract_connections(self, media):
29f7c58a 318 return media.get('connection') or []
2e3fd9ec 319
f13b1e7d 320 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
321 subtitles = {}
322 for connection in self._extract_connections(media):
f0228f56
S
323 cc_url = url_or_none(connection.get('href'))
324 if not cc_url:
325 continue
326 captions = self._download_xml(
327 cc_url, programme_id, 'Downloading captions', fatal=False)
ee0ba927 328 if not isinstance(captions, compat_etree_Element):
f0228f56 329 continue
29f7c58a 330 subtitles['en'] = [
f13b1e7d
JMF
331 {
332 'url': connection.get('href'),
333 'ext': 'ttml',
334 },
f13b1e7d 335 ]
29f7c58a 336 break
2e3fd9ec 337 return subtitles
082c6c86 338
d12a1a47
S
339 def _raise_extractor_error(self, media_selection_error):
340 raise ExtractorError(
341 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
342 expected=True)
343
c056efa2 344 def _download_media_selector(self, programme_id):
d12a1a47 345 last_exception = None
29f7c58a 346 for media_set in self._MEDIA_SETS:
d12a1a47
S
347 try:
348 return self._download_media_selector_url(
29f7c58a 349 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
d12a1a47 350 except BBCCoUkIE.MediaSelectionError as e:
d781e293 351 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
352 last_exception = e
353 continue
354 self._raise_extractor_error(e)
355 self._raise_extractor_error(last_exception)
9afa1770
S
356
357 def _download_media_selector_url(self, url, programme_id=None):
29f7c58a 358 media_selection = self._download_json(
359 url, programme_id, 'Downloading media selection JSON',
9283d4ea 360 expected_status=(403, 404))
9afa1770 361 return self._process_media_selector(media_selection, programme_id)
082c6c86 362
9afa1770 363 def _process_media_selector(self, media_selection, programme_id):
082c6c86 364 formats = []
2e3fd9ec 365 subtitles = None
b0af1215 366 urls = []
2e3fd9ec 367
c056efa2
S
368 for media in self._extract_medias(media_selection):
369 kind = media.get('kind')
a7e5f274
RA
370 if kind in ('video', 'audio'):
371 bitrate = int_or_none(media.get('bitrate'))
372 encoding = media.get('encoding')
a7e5f274
RA
373 width = int_or_none(media.get('width'))
374 height = int_or_none(media.get('height'))
375 file_size = int_or_none(media.get('media_file_size'))
376 for connection in self._extract_connections(media):
b0af1215
RA
377 href = connection.get('href')
378 if href in urls:
379 continue
380 if href:
381 urls.append(href)
a7e5f274
RA
382 conn_kind = connection.get('kind')
383 protocol = connection.get('protocol')
384 supplier = connection.get('supplier')
a7e5f274
RA
385 transfer_format = connection.get('transferFormat')
386 format_id = supplier or conn_kind or protocol
a7e5f274
RA
387 # ASX playlist
388 if supplier == 'asx':
389 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
390 formats.append({
391 'url': ref,
392 'format_id': 'ref%s_%s' % (i, format_id),
393 })
394 elif transfer_format == 'dash':
395 formats.extend(self._extract_mpd_formats(
396 href, programme_id, mpd_id=format_id, fatal=False))
397 elif transfer_format == 'hls':
50e93e03 398 # TODO: let expected_status be passed into _extract_xxx_formats() instead
399 try:
400 fmts = self._extract_m3u8_formats(
401 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
402 m3u8_id=format_id, fatal=False)
403 except ExtractorError as e:
404 if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
405 and e.exc_info[1].code in (403, 404)):
406 raise
407 fmts = []
408 formats.extend(fmts)
a7e5f274
RA
409 elif transfer_format == 'hds':
410 formats.extend(self._extract_f4m_formats(
411 href, programme_id, f4m_id=format_id, fatal=False))
412 else:
29f7c58a 413 if not supplier and bitrate:
aaa42cf0 414 format_id += '-%d' % bitrate
a7e5f274
RA
415 fmt = {
416 'format_id': format_id,
417 'filesize': file_size,
418 }
419 if kind == 'video':
420 fmt.update({
421 'width': width,
422 'height': height,
6240925b 423 'tbr': bitrate,
a7e5f274
RA
424 'vcodec': encoding,
425 })
426 else:
427 fmt.update({
428 'abr': bitrate,
429 'acodec': encoding,
430 'vcodec': 'none',
431 })
1af959ef 432 if protocol in ('http', 'https'):
a7e5f274
RA
433 # Direct link
434 fmt.update({
435 'url': href,
436 })
437 elif protocol == 'rtmp':
438 application = connection.get('application', 'ondemand')
439 auth_string = connection.get('authString')
440 identifier = connection.get('identifier')
441 server = connection.get('server')
442 fmt.update({
443 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
444 'play_path': identifier,
445 'app': '%s?%s' % (application, auth_string),
446 'page_url': 'http://www.bbc.co.uk',
447 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
448 'rtmp_live': False,
449 'ext': 'flv',
450 })
964744af
S
451 else:
452 continue
a7e5f274 453 formats.append(fmt)
c056efa2 454 elif kind == 'captions':
f13b1e7d 455 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 456 return formats, subtitles
2e3fd9ec 457
ae6986fb
S
458 def _download_playlist(self, playlist_id):
459 try:
460 playlist = self._download_json(
461 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
462 playlist_id, 'Downloading playlist JSON')
c45b8741 463 formats = []
464 subtitles = {}
ae6986fb 465
c45b8741 466 for version in playlist.get('allAvailableVersions', []):
ae6986fb
S
467 smp_config = version['smpConfig']
468 title = smp_config['title']
469 description = smp_config['summary']
470 for item in smp_config['items']:
471 kind = item['kind']
40fcba5e 472 if kind not in ('programme', 'radioProgramme'):
ae6986fb
S
473 continue
474 programme_id = item.get('vpid')
d97f5cd7 475 duration = int_or_none(item.get('duration'))
c45b8741 476 version_formats, version_subtitles = self._download_media_selector(programme_id)
477 types = version['types']
478 for f in version_formats:
479 f['format_note'] = ', '.join(types)
480 if any('AudioDescribed' in x for x in types):
481 f['language_preference'] = -10
482 formats += version_formats
483 for tag, subformats in (version_subtitles or {}).items():
f304da8a 484 subtitles.setdefault(tag, []).extend(subformats)
c45b8741 485
486 return programme_id, title, description, duration, formats, subtitles
ae6986fb 487 except ExtractorError as ee:
f813928e 488 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
489 raise
490
491 # fallback to legacy playlist
9afa1770
S
492 return self._process_legacy_playlist(playlist_id)
493
494 def _process_legacy_playlist_url(self, url, display_id):
495 playlist = self._download_legacy_playlist_url(url, display_id)
496 return self._extract_from_legacy_playlist(playlist, display_id)
497
498 def _process_legacy_playlist(self, playlist_id):
499 return self._process_legacy_playlist_url(
500 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
501
502 def _download_legacy_playlist_url(self, url, playlist_id=None):
503 return self._download_xml(
504 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 505
9afa1770 506 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 507 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
508 if no_items is not None:
509 reason = no_items.get('reason')
510 if reason == 'preAvailability':
511 msg = 'Episode %s is not yet available' % playlist_id
512 elif reason == 'postAvailability':
513 msg = 'Episode %s is no longer available' % playlist_id
514 elif reason == 'noMedia':
515 msg = 'Episode %s is not currently available' % playlist_id
516 else:
517 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
518 raise ExtractorError(msg, expected=True)
519
520 for item in self._extract_items(playlist):
521 kind = item.get('kind')
40fcba5e 522 if kind not in ('programme', 'radioProgramme'):
ae6986fb 523 continue
e6174ee9
S
524 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
525 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 526 description = description_el.text if description_el is not None else None
9afa1770
S
527
528 def get_programme_id(item):
529 def get_from_attributes(item):
32759325 530 for p in ('identifier', 'group'):
9afa1770
S
531 value = item.get(p)
532 if value and re.match(r'^[pb][\da-z]{7}$', value):
533 return value
534 get_from_attributes(item)
e6174ee9 535 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
536 if mediator is not None:
537 return get_from_attributes(mediator)
538
539 programme_id = get_programme_id(item)
d97f5cd7 540 duration = int_or_none(item.get('duration'))
e6174ee9
S
541
542 if programme_id:
543 formats, subtitles = self._download_media_selector(programme_id)
544 else:
545 formats, subtitles = self._process_media_selector(item, playlist_id)
546 programme_id = playlist_id
ae6986fb
S
547
548 return programme_id, title, description, duration, formats, subtitles
549
c056efa2
S
550 def _real_extract(self, url):
551 group_id = self._match_id(url)
552
553 webpage = self._download_webpage(url, group_id, 'Downloading video page')
554
b2ed954f 555 error = self._search_regex(
29f7c58a 556 r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
b2ed954f
S
557 webpage, 'error', default=None)
558 if error:
559 raise ExtractorError(error, expected=True)
560
8683b4d8 561 programme_id = None
679bacf0 562 duration = None
8683b4d8
S
563
564 tviplayer = self._search_regex(
565 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
566 webpage, 'player', default=None)
567
568 if tviplayer:
569 player = self._parse_json(tviplayer, group_id).get('player', {})
570 duration = int_or_none(player.get('duration'))
571 programme_id = player.get('vpid')
572
573 if not programme_id:
574 programme_id = self._search_regex(
22d7368d 575 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 576
c056efa2 577 if programme_id:
c056efa2 578 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 579 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
580 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
581 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 582 description = self._search_regex(
a8534274
S
583 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
584 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
585 webpage, 'description', default=None)
586 if not description:
587 description = self._html_search_meta('description', webpage)
c056efa2 588 else:
ae6986fb 589 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 590
082c6c86
S
591 self._sort_formats(formats)
592
593 return {
2e3fd9ec 594 'id': programme_id,
082c6c86
S
595 'title': title,
596 'description': description,
650cfd0c 597 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
598 'duration': duration,
599 'formats': formats,
2e3fd9ec 600 'subtitles': subtitles,
5f6a1245 601 }
10273d6e 602
603
9afa1770
S
604class BBCIE(BBCCoUkIE):
605 IE_NAME = 'bbc'
606 IE_DESC = 'BBC'
607 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 608
29f7c58a 609 _MEDIA_SETS = [
29f7c58a 610 'pc',
2d997542 611 'mobile-tablet-main',
d12a1a47 612 ]
10273d6e 613
614 _TESTS = [{
6a747190 615 # article with multiple videos embedded with data-playable containing vpids
10273d6e 616 'url': 'http://www.bbc.com/news/world-europe-32668511',
617 'info_dict': {
618 'id': 'world-europe-32668511',
acc86c9a 619 'title': 'Russia stages massive WW2 parade',
9afa1770 620 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 621 },
622 'playlist_count': 2,
a3bfddfa 623 }, {
6a747190 624 # article with multiple videos embedded with data-playable (more videos)
10273d6e 625 'url': 'http://www.bbc.com/news/business-28299555',
626 'info_dict': {
627 'id': 'business-28299555',
628 'title': 'Farnborough Airshow: Video highlights',
9afa1770 629 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 630 },
631 'playlist_count': 9,
9afa1770 632 'skip': 'Save time',
88ed52ae
S
633 }, {
634 # article with multiple videos embedded with `new SMP()`
6a747190 635 # broken
88ed52ae
S
636 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
637 'info_dict': {
638 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 639 'title': 'BUGGER',
88ed52ae
S
640 },
641 'playlist_count': 18,
a3bfddfa 642 }, {
6a747190 643 # single video embedded with data-playable containing vpid
10273d6e 644 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 645 'info_dict': {
646 'id': 'p02mprgb',
55ebae26 647 'ext': 'mp4',
10273d6e 648 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 649 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 650 'duration': 47,
9afa1770 651 'timestamp': 1427219242,
da92eeae 652 'upload_date': '20150324',
10273d6e 653 },
654 'params': {
9afa1770 655 # rtmp download
10273d6e 656 'skip_download': True,
657 }
a3bfddfa 658 }, {
6a747190
S
659 # article with single video embedded with data-playable containing XML playlist
660 # with direct video links as progressiveDownloadUrl (for now these are extracted)
661 # and playlist with f4m and m3u8 as streamingUrl
de939d89 662 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 663 'info_dict': {
9afa1770 664 'id': '150615_telabyad_kentin_cogu',
de939d89 665 'ext': 'mp4',
ad152e2d 666 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
05087d1b 667 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 668 'timestamp': 1434397334,
da92eeae 669 'upload_date': '20150615',
de939d89 670 },
671 'params': {
672 'skip_download': True,
673 }
c936d8cc 674 }, {
6a747190 675 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 676 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 677 'info_dict': {
9afa1770 678 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 679 'ext': 'mp4',
9afa1770 680 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 681 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 682 'timestamp': 1434713142,
da92eeae 683 'upload_date': '20150619',
de939d89 684 },
685 'params': {
686 'skip_download': True,
687 }
a346b1ff
S
688 }, {
689 # single video from video playlist embedded with vxp-playlist-data JSON
690 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
691 'info_dict': {
692 'id': 'p02w6qjc',
55ebae26 693 'ext': 'mp4',
a346b1ff
S
694 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
695 'duration': 56,
0bc4ee60 696 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
697 },
698 'params': {
699 'skip_download': True,
700 }
9afa1770
S
701 }, {
702 # single video story with digitalData
703 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
704 'info_dict': {
705 'id': 'p02q6gc4',
706 'ext': 'flv',
707 'title': 'Sri Lanka’s spicy secret',
708 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
709 'timestamp': 1437674293,
710 'upload_date': '20150723',
711 },
712 'params': {
713 # rtmp download
714 'skip_download': True,
715 }
716 }, {
717 # single video story without digitalData
718 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
719 'info_dict': {
720 'id': 'p018zqqg',
55ebae26 721 'ext': 'mp4',
9afa1770
S
722 'title': 'Hyundai Santa Fe Sport: Rock star',
723 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
724 'timestamp': 1415867444,
725 'upload_date': '20141113',
9afa1770
S
726 },
727 'params': {
728 # rtmp download
729 'skip_download': True,
730 }
9fb64c04
S
731 }, {
732 # single video embedded with Morph
733 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
734 'info_dict': {
735 'id': 'p041vhd0',
736 'ext': 'mp4',
737 'title': "Nigeria v Japan - Men's First Round",
738 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
739 'duration': 7980,
740 'uploader': 'BBC Sport',
741 'uploader_id': 'bbc_sport',
742 },
743 'params': {
744 # m3u8 download
745 'skip_download': True,
9fb64c04
S
746 },
747 'skip': 'Georestricted to UK',
9afa1770 748 }, {
6a747190 749 # single video with playlist.sxml URL in playlist param
9afa1770
S
750 'url': 'http://www.bbc.com/sport/0/football/33653409',
751 'info_dict': {
752 'id': 'p02xycnp',
55ebae26 753 'ext': 'mp4',
9afa1770 754 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 755 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
756 'duration': 140,
757 },
758 'params': {
759 # rtmp download
760 'skip_download': True,
761 }
b5d48cb1 762 }, {
6a747190 763 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
764 'url': 'http://www.bbc.com/sport/0/football/34475836',
765 'info_dict': {
766 'id': '34475836',
450b233c 767 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 768 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
769 },
770 'playlist_count': 3,
450b233c
S
771 }, {
772 # school report article with single video
773 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
774 'info_dict': {
775 'id': '35744779',
776 'title': 'School which breaks down barriers in Jerusalem',
777 },
778 'playlist_count': 1,
9afa1770
S
779 }, {
780 # single video with playlist URL from weather section
781 'url': 'http://www.bbc.com/weather/features/33601775',
782 'only_matching': True,
783 }, {
784 # custom redirection to www.bbc.com
1bdae7d3 785 # also, video with window.__INITIAL_DATA__
9afa1770 786 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
1bdae7d3 787 'info_dict': {
788 'id': 'p02xzws1',
789 'ext': 'mp4',
790 'title': "Pluto may have 'nitrogen glaciers'",
791 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
792 'thumbnail': r're:https?://.+/.+\.jpg',
793 'timestamp': 1437785037,
794 'upload_date': '20150725',
795 },
50e93e03 796 }, {
797 # video with window.__INITIAL_DATA__ and value as JSON string
798 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
799 'info_dict': {
800 'id': 'p0b71qth',
801 'ext': 'mp4',
802 'title': 'Why France is making this woman a national hero',
803 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
804 'thumbnail': r're:https?://.+/.+\.jpg',
805 'timestamp': 1638230731,
806 'upload_date': '20211130',
807 },
a1cf3e38
S
808 }, {
809 # single video article embedded with data-media-vpid
810 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
811 'only_matching': True,
6d155707 812 }, {
50e93e03 813 # bbcthreeConfig
6d155707
S
814 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
815 'info_dict': {
816 'id': 'p06556y7',
817 'ext': 'mp4',
50e93e03 818 'title': 'Things Not To Say to people that live on council estates',
819 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
820 'duration': 360,
821 'thumbnail': r're:https?://.+/.+\.jpg',
6d155707 822 },
b96b4be4
RA
823 }, {
824 # window.__PRELOADED_STATE__
825 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
826 'info_dict': {
827 'id': 'b0b9z4vz',
828 'ext': 'mp4',
829 'title': 'Prom 6: An American in Paris and Turangalila',
830 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
831 'uploader': 'Radio 3',
832 'uploader_id': 'bbc_radio_three',
833 },
373941c5
S
834 }, {
835 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
836 'info_dict': {
837 'id': 'p06w9tws',
838 'ext': 'mp4',
839 'title': 'md5:2fabf12a726603193a2879a055f72514',
840 'description': 'Learn English words and phrases from this story',
841 },
842 'add_ie': [BBCCoUkIE.ie_key()],
3721515b 843 }, {
844 # BBC Reel
845 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
846 'info_dict': {
847 'id': 'p07c6sb9',
848 'ext': 'mp4',
849 'title': 'How positive thinking is harming your happiness',
850 'alt_title': 'The downsides of positive thinking',
851 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
852 'duration': 235,
853 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
854 'upload_date': '20190604',
855 'categories': ['Psychology'],
856 },
10273d6e 857 }]
858
9afa1770
S
859 @classmethod
860 def suitable(cls, url):
1418a043 861 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
ded7511a
S
862 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
863 else super(BBCIE, cls).suitable(url))
9afa1770
S
864
865 def _extract_from_media_meta(self, media_meta, video_id):
866 # Direct links to media in media metadata (e.g.
867 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
868 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
869 source_files = media_meta.get('sourceFiles')
870 if source_files:
871 return [{
872 'url': f['url'],
873 'format_id': format_id,
874 'ext': f.get('encoding'),
875 'tbr': float_or_none(f.get('bitrate'), 1000),
876 'filesize': int_or_none(f.get('filesize')),
877 } for format_id, f in source_files.items() if f.get('url')], []
878
879 programme_id = media_meta.get('externalId')
880 if programme_id:
881 return self._download_media_selector(programme_id)
882
883 # Process playlist.sxml as legacy playlist
884 href = media_meta.get('href')
885 if href:
886 playlist = self._download_legacy_playlist_url(href)
887 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
888 return formats, subtitles
889
890 return [], []
891
baf39a1a
S
892 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
893 programme_id, title, description, duration, formats, subtitles = \
894 self._process_legacy_playlist_url(url, playlist_id)
895 self._sort_formats(formats)
896 return {
897 'id': programme_id,
898 'title': title,
899 'description': description,
900 'duration': duration,
901 'timestamp': timestamp,
902 'formats': formats,
903 'subtitles': subtitles,
904 }
905
10273d6e 906 def _real_extract(self, url):
9afa1770
S
907 playlist_id = self._match_id(url)
908
909 webpage = self._download_webpage(url, playlist_id)
910
522f6c06 911 json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
350e02d4 912 timestamp = json_ld_info.get('timestamp')
0e832c2c 913
350e02d4 914 playlist_title = json_ld_info.get('title')
0e832c2c
S
915 if not playlist_title:
916 playlist_title = self._og_search_title(
917 webpage, default=None) or self._html_search_regex(
918 r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
919 if playlist_title:
920 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
921
922 playlist_description = json_ld_info.get(
923 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
924
925 if not timestamp:
926 timestamp = parse_iso8601(self._search_regex(
927 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
928 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 929 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 930 webpage, 'date', default=None))
9afa1770 931
78f9d843
S
932 entries = []
933
de665713
S
934 # article with multiple videos embedded with playlist.sxml (e.g.
935 # http://www.bbc.com/sport/0/football/34475836)
936 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 937 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 938 if playlists:
baf39a1a
S
939 entries = [
940 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
941 for playlist_url in playlists]
de939d89 942
78f9d843
S
943 # news article with multiple videos embedded with data-playable
944 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
945 if data_playables:
946 for _, data_playable_json in data_playables:
947 data_playable = self._parse_json(
948 unescapeHTML(data_playable_json), playlist_id, fatal=False)
949 if not data_playable:
950 continue
baf39a1a
S
951 settings = data_playable.get('settings', {})
952 if settings:
78f9d843
S
953 # data-playable with video vpid in settings.playlistObject.items (e.g.
954 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
955 playlist_object = settings.get('playlistObject', {})
956 if playlist_object:
957 items = playlist_object.get('items')
958 if items and isinstance(items, list):
78f9d843
S
959 title = playlist_object['title']
960 description = playlist_object.get('summary')
baf39a1a
S
961 duration = int_or_none(items[0].get('duration'))
962 programme_id = items[0].get('vpid')
78f9d843
S
963 formats, subtitles = self._download_media_selector(programme_id)
964 self._sort_formats(formats)
965 entries.append({
966 'id': programme_id,
967 'title': title,
968 'description': description,
969 'timestamp': timestamp,
970 'duration': duration,
971 'formats': formats,
972 'subtitles': subtitles,
973 })
974 else:
975 # data-playable without vpid but with a playlist.sxml URLs
976 # in otherSettings.playlist (e.g.
977 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
978 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
979 if playlist:
a7e5f274
RA
980 entry = None
981 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
982 playlist_url = playlist.get('%sUrl' % key)
983 if not playlist_url:
984 continue
985 try:
a7e5f274
RA
986 info = self._extract_from_playlist_sxml(
987 playlist_url, playlist_id, timestamp)
988 if not entry:
989 entry = info
990 else:
991 entry['title'] = info['title']
992 entry['formats'].extend(info['formats'])
3721515b 993 except ExtractorError as e:
05087d1b
S
994 # Some playlist URL may fail with 500, at the same time
995 # the other one may work fine (e.g.
996 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
997 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
998 continue
999 raise
a7e5f274
RA
1000 if entry:
1001 self._sort_formats(entry['formats'])
1002 entries.append(entry)
78f9d843
S
1003
1004 if entries:
78f9d843
S
1005 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1006
373941c5
S
1007 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
1008 group_id = self._search_regex(
1009 r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
1010 webpage, 'group id', default=None)
38d70284 1011 if group_id:
373941c5
S
1012 return self.url_result(
1013 'https://www.bbc.co.uk/programmes/%s' % group_id,
1014 ie=BBCCoUkIE.ie_key())
1015
78f9d843
S
1016 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1017 programme_id = self._search_regex(
a1cf3e38 1018 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
1019 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
1020 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 1021 webpage, 'vpid', default=None)
dab062fb 1022
9afa1770
S
1023 if programme_id:
1024 formats, subtitles = self._download_media_selector(programme_id)
1025 self._sort_formats(formats)
1026 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1027 digital_data = self._parse_json(
1028 self._search_regex(
1029 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1030 programme_id, fatal=False)
1031 page_info = digital_data.get('page', {}).get('pageInfo', {})
1032 title = page_info.get('pageName') or self._og_search_title(webpage)
1033 description = page_info.get('description') or self._og_search_description(webpage)
1034 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1035 return {
1036 'id': programme_id,
1037 'title': title,
1038 'description': description,
1039 'timestamp': timestamp,
1040 'formats': formats,
1041 'subtitles': subtitles,
1042 }
a3bfddfa 1043
3721515b 1044 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1045 initial_data = self._parse_json(self._html_search_regex(
1046 r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1047 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1048 if initial_data:
1049 init_data = try_get(
1050 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1051 smp_data = init_data.get('smpData') or {}
1052 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1053 version_id = clip_data.get('versionID')
1054 if version_id:
1055 title = smp_data['title']
1056 formats, subtitles = self._download_media_selector(version_id)
1057 self._sort_formats(formats)
1058 image_url = smp_data.get('holdingImageURL')
1059 display_date = init_data.get('displayDate')
1060 topic_title = init_data.get('topicTitle')
1061
1062 return {
1063 'id': version_id,
1064 'title': title,
1065 'formats': formats,
1066 'alt_title': init_data.get('shortTitle'),
1067 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1068 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1069 'upload_date': display_date.replace('-', '') if display_date else None,
1070 'subtitles': subtitles,
1071 'duration': int_or_none(clip_data.get('duration')),
1072 'categories': [topic_title] if topic_title else None,
1073 }
1074
9fb64c04
S
1075 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1076 # There are several setPayload calls may be present but the video
1077 # seems to be always related to the first one
1078 morph_payload = self._parse_json(
1079 self._search_regex(
1080 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1081 webpage, 'morph payload', default='{}'),
1082 playlist_id, fatal=False)
1083 if morph_payload:
1084 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1085 for component in components:
1086 if not isinstance(component, dict):
1087 continue
1088 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1089 if not lead_media:
1090 continue
1091 identifiers = lead_media.get('identifiers')
1092 if not identifiers or not isinstance(identifiers, dict):
1093 continue
1094 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1095 if not programme_id:
1096 continue
1097 title = lead_media.get('title') or self._og_search_title(webpage)
1098 formats, subtitles = self._download_media_selector(programme_id)
1099 self._sort_formats(formats)
1100 description = lead_media.get('summary')
1101 uploader = lead_media.get('masterBrand')
1102 uploader_id = lead_media.get('mid')
1103 duration = None
1104 duration_d = lead_media.get('duration')
1105 if isinstance(duration_d, dict):
1106 duration = parse_duration(dict_get(
1107 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1108 return {
1109 'id': programme_id,
1110 'title': title,
1111 'description': description,
1112 'duration': duration,
1113 'uploader': uploader,
1114 'uploader_id': uploader_id,
1115 'formats': formats,
1116 'subtitles': subtitles,
1117 }
1118
b96b4be4
RA
1119 preload_state = self._parse_json(self._search_regex(
1120 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1121 'preload state', default='{}'), playlist_id, fatal=False)
1122 if preload_state:
1123 current_programme = preload_state.get('programmes', {}).get('current') or {}
1124 programme_id = current_programme.get('id')
1125 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1126 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1127 formats, subtitles = self._download_media_selector(programme_id)
1128 self._sort_formats(formats)
1129 synopses = current_programme.get('synopses') or {}
1130 network = current_programme.get('network') or {}
1131 duration = int_or_none(
1132 current_programme.get('duration', {}).get('value'))
1133 thumbnail = None
1134 image_url = current_programme.get('image_url')
1135 if image_url:
3721515b 1136 thumbnail = image_url.replace('{recipe}', 'raw')
b96b4be4
RA
1137 return {
1138 'id': programme_id,
1139 'title': title,
1140 'description': dict_get(synopses, ('long', 'medium', 'short')),
1141 'thumbnail': thumbnail,
1142 'duration': duration,
1143 'uploader': network.get('short_title'),
1144 'uploader_id': network.get('id'),
1145 'formats': formats,
1146 'subtitles': subtitles,
1147 }
1148
6d155707
S
1149 bbc3_config = self._parse_json(
1150 self._search_regex(
1151 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1152 'bbcthree config', default='{}'),
38d70284 1153 playlist_id, transform_source=js_to_json, fatal=False) or {}
1154 payload = bbc3_config.get('payload') or {}
1155 if payload:
1156 clip = payload.get('currentClip') or {}
1157 clip_vpid = clip.get('vpid')
1158 clip_title = clip.get('title')
1159 if clip_vpid and clip_title:
1160 formats, subtitles = self._download_media_selector(clip_vpid)
1161 self._sort_formats(formats)
1162 return {
1163 'id': clip_vpid,
1164 'title': clip_title,
1165 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1166 'description': clip.get('description'),
1167 'duration': parse_duration(clip.get('duration')),
1168 'formats': formats,
1169 'subtitles': subtitles,
1170 }
6d155707 1171 bbc3_playlist = try_get(
38d70284 1172 payload, lambda x: x['content']['bbcMedia']['playlist'],
6d155707
S
1173 dict)
1174 if bbc3_playlist:
1175 playlist_title = bbc3_playlist.get('title') or playlist_title
1176 thumbnail = bbc3_playlist.get('holdingImageURL')
1177 entries = []
1178 for bbc3_item in bbc3_playlist['items']:
1179 programme_id = bbc3_item.get('versionID')
1180 if not programme_id:
1181 continue
1182 formats, subtitles = self._download_media_selector(programme_id)
1183 self._sort_formats(formats)
1184 entries.append({
1185 'id': programme_id,
1186 'title': playlist_title,
1187 'thumbnail': thumbnail,
1188 'timestamp': timestamp,
1189 'formats': formats,
1190 'subtitles': subtitles,
1191 })
1192 return self.playlist_result(
1193 entries, playlist_id, playlist_title, playlist_description)
1194
50e93e03 1195 initial_data = self._search_regex(
1196 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1197 'quoted preload state', default=None)
1198 if initial_data is None:
1199 initial_data = self._search_regex(
1200 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1201 'preload state', default={})
1202 else:
1203 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1204 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
38d70284 1205 if initial_data:
1206 def parse_media(media):
1207 if not media:
1208 return
1209 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1210 item_id = item.get('id')
1211 item_title = item.get('title')
1212 if not (item_id and item_title):
1213 continue
1214 formats, subtitles = self._download_media_selector(item_id)
1215 self._sort_formats(formats)
1bdae7d3 1216 item_desc = None
1217 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1218 if blocks:
1219 summary = []
1220 for block in blocks:
1221 text = try_get(block, lambda x: x['model']['text'], compat_str)
1222 if text:
1223 summary.append(text)
1224 if summary:
1225 item_desc = '\n\n'.join(summary)
1226 item_time = None
1227 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1228 if try_get(meta, lambda x: x['label']) == 'Published':
1229 item_time = unified_timestamp(meta.get('timestamp'))
1230 break
38d70284 1231 entries.append({
1232 'id': item_id,
1233 'title': item_title,
1234 'thumbnail': item.get('holdingImageUrl'),
1235 'formats': formats,
1236 'subtitles': subtitles,
1bdae7d3 1237 'timestamp': item_time,
1238 'description': strip_or_none(item_desc),
38d70284 1239 })
1240 for resp in (initial_data.get('data') or {}).values():
1241 name = resp.get('name')
1242 if name == 'media-experience':
1243 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1244 elif name == 'article':
50e93e03 1245 for block in (try_get(resp,
1246 (lambda x: x['data']['blocks'],
1247 lambda x: x['data']['content']['model']['blocks'],),
1248 list) or []):
38d70284 1249 if block.get('type') != 'media':
1250 continue
1251 parse_media(block.get('model'))
1252 return self.playlist_result(
1253 entries, playlist_id, playlist_title, playlist_description)
1254
88ed52ae
S
1255 def extract_all(pattern):
1256 return list(filter(None, map(
1257 lambda s: self._parse_json(s, playlist_id, fatal=False),
1258 re.findall(pattern, webpage))))
1259
1260 # Multiple video article (e.g.
1261 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 1262 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
1263 entries = []
1264 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1265 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1266 if embed_url and re.match(EMBED_URL, embed_url):
1267 entries.append(embed_url)
1268 entries.extend(re.findall(
1269 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
1270 if entries:
1271 return self.playlist_result(
aaa42cf0 1272 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
88ed52ae 1273 playlist_id, playlist_title, playlist_description)
9afa1770
S
1274
1275 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 1276 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
1277
1278 if not medias:
1279 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
1280 media_asset = self._search_regex(
1281 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1282 webpage, 'media asset', default=None)
1283 if media_asset:
1284 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1285 medias = []
1286 for video in media_asset_page.get('videos', {}).values():
1287 medias.extend(video.values())
1288
1289 if not medias:
1290 # Multiple video playlist with single `now playing` entry (e.g.
1291 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1292 vxp_playlist = self._parse_json(
9afa1770 1293 self._search_regex(
a346b1ff
S
1294 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1295 webpage, 'playlist data'),
9afa1770 1296 playlist_id)
a346b1ff
S
1297 playlist_medias = []
1298 for item in vxp_playlist:
1299 media = item.get('media')
1300 if not media:
1301 continue
1302 playlist_medias.append(media)
1303 # Download single video if found media with asset id matching the video id from URL
1304 if item.get('advert', {}).get('assetId') == playlist_id:
1305 medias = [media]
1306 break
1307 # Fallback to the whole playlist
1308 if not medias:
1309 medias = playlist_medias
9afa1770
S
1310
1311 entries = []
1312 for num, media_meta in enumerate(medias, start=1):
1313 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
a06916d9 1314 if not formats and not self.get_param('ignore_no_formats'):
9afa1770 1315 continue
10273d6e 1316 self._sort_formats(formats)
1317
9afa1770
S
1318 video_id = media_meta.get('externalId')
1319 if not video_id:
1320 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1321
1322 title = media_meta.get('caption')
1323 if not title:
1324 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1325
1326 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 1327
9afa1770
S
1328 images = []
1329 for image in media_meta.get('images', {}).values():
1330 images.extend(image.values())
1331 if 'image' in media_meta:
1332 images.append(media_meta['image'])
1333
1334 thumbnails = [{
1335 'url': image.get('href'),
1336 'width': int_or_none(image.get('width')),
1337 'height': int_or_none(image.get('height')),
1338 } for image in images]
1339
1340 entries.append({
1341 'id': video_id,
10273d6e 1342 'title': title,
9afa1770 1343 'thumbnails': thumbnails,
10273d6e 1344 'duration': duration,
9afa1770 1345 'timestamp': timestamp,
10273d6e 1346 'formats': formats,
1347 'subtitles': subtitles,
a3bfddfa 1348 })
10273d6e 1349
9afa1770 1350 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
1351
1352
1353class BBCCoUkArticleIE(InfoExtractor):
92519402 1354 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
1355 IE_NAME = 'bbc.co.uk:article'
1356 IE_DESC = 'BBC articles'
1357
1358 _TEST = {
1359 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1360 'info_dict': {
1361 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1362 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1363 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1364 },
1365 'playlist_count': 4,
1366 'add_ie': ['BBCCoUk'],
1367 }
1368
1369 def _real_extract(self, url):
1370 playlist_id = self._match_id(url)
1371
1372 webpage = self._download_webpage(url, playlist_id)
1373
1374 title = self._og_search_title(webpage)
1375 description = self._og_search_description(webpage).strip()
1376
1377 entries = [self.url_result(programme_url) for programme_url in re.findall(
1378 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1379
1380 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
1381
1382
1383class BBCCoUkPlaylistBaseIE(InfoExtractor):
254e64a2
S
1384 def _entries(self, webpage, url, playlist_id):
1385 single_page = 'page' in compat_urlparse.parse_qs(
1386 compat_urlparse.urlparse(url).query)
1387 for page_num in itertools.count(2):
1388 for video_id in re.findall(
1389 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1390 yield self.url_result(
1391 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1392 if single_page:
1393 return
1394 next_page = self._search_regex(
1395 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1396 webpage, 'next page url', default=None, group='url')
1397 if not next_page:
1398 break
1399 webpage = self._download_webpage(
1400 compat_urlparse.urljoin(url, next_page), playlist_id,
1401 'Downloading page %d' % page_num, page_num)
1402
ded7511a
S
1403 def _real_extract(self, url):
1404 playlist_id = self._match_id(url)
1405
1406 webpage = self._download_webpage(url, playlist_id)
1407
ded7511a
S
1408 title, description = self._extract_title_and_description(webpage)
1409
254e64a2
S
1410 return self.playlist_result(
1411 self._entries(webpage, url, playlist_id),
1412 playlist_id, title, description)
ded7511a
S
1413
1414
1418a043 1415class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1416 _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1417
1418 @staticmethod
1419 def _get_default(episode, key, default_key='default'):
1420 return try_get(episode, lambda x: x[key][default_key])
1421
1422 def _get_description(self, data):
1423 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1424 return dict_get(synopsis, ('large', 'medium', 'small'))
1425
1426 def _fetch_page(self, programme_id, per_page, series_id, page):
1427 elements = self._get_elements(self._call_api(
1428 programme_id, per_page, page + 1, series_id))
1429 for element in elements:
1430 episode = self._get_episode(element)
1431 episode_id = episode.get('id')
1432 if not episode_id:
1433 continue
1434 thumbnail = None
1435 image = self._get_episode_image(episode)
1436 if image:
1437 thumbnail = image.replace('{recipe}', 'raw')
1438 category = self._get_default(episode, 'labels', 'category')
1439 yield {
1440 '_type': 'url',
1441 'id': episode_id,
1442 'title': self._get_episode_field(episode, 'subtitle'),
1443 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1444 'thumbnail': thumbnail,
1445 'description': self._get_description(episode),
1446 'categories': [category] if category else None,
1447 'series': self._get_episode_field(episode, 'title'),
1448 'ie_key': BBCCoUkIE.ie_key(),
1449 }
1450
1451 def _real_extract(self, url):
1452 pid = self._match_id(url)
4dfbf869 1453 qs = parse_qs(url)
1418a043 1454 series_id = qs.get('seriesId', [None])[0]
1455 page = qs.get('page', [None])[0]
1456 per_page = 36 if page else self._PAGE_SIZE
1457 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1458 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1459 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1460 return self.playlist_result(
1461 entries, pid, self._get_playlist_title(playlist_data),
1462 self._get_description(playlist_data))
1463
1464
1465class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1466 IE_NAME = 'bbc.co.uk:iplayer:episodes'
1467 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
9158af16 1468 _TESTS = [{
ded7511a
S
1469 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1470 'info_dict': {
1471 'id': 'b05rcz9v',
1472 'title': 'The Disappearance',
1418a043 1473 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
ded7511a 1474 },
1418a043 1475 'playlist_mincount': 8,
9158af16 1476 }, {
1418a043 1477 # all seasons
1478 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1479 'info_dict': {
1480 'id': 'b094m5t9',
1481 'title': 'Doctor Foster',
1482 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1483 },
1484 'playlist_mincount': 10,
1485 }, {
1486 # explicit season
1487 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1488 'info_dict': {
1489 'id': 'b094m5t9',
1490 'title': 'Doctor Foster',
1491 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1492 },
1493 'playlist_mincount': 5,
1494 }, {
1495 # all pages
1496 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1497 'info_dict': {
1498 'id': 'm0004c4v',
1499 'title': 'Beechgrove',
1500 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1501 },
1502 'playlist_mincount': 37,
1503 }, {
1504 # explicit page
1505 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1506 'info_dict': {
1507 'id': 'm0004c4v',
1508 'title': 'Beechgrove',
1509 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1510 },
1511 'playlist_mincount': 1,
1512 }]
1513 _PAGE_SIZE = 100
1514 _DESCRIPTION_KEY = 'synopsis'
1515
1516 def _get_episode_image(self, episode):
1517 return self._get_default(episode, 'image')
1518
1519 def _get_episode_field(self, episode, field):
1520 return self._get_default(episode, field)
1521
1522 @staticmethod
1523 def _get_elements(data):
1524 return data['entities']['results']
1525
1526 @staticmethod
1527 def _get_episode(element):
1528 return element.get('episode') or {}
1529
1530 def _call_api(self, pid, per_page, page=1, series_id=None):
1531 variables = {
1532 'id': pid,
1533 'page': page,
1534 'perPage': per_page,
1535 }
1536 if series_id:
1537 variables['sliceId'] = series_id
1538 return self._download_json(
1539 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1540 'Content-Type': 'application/json'
1541 }, data=json.dumps({
1542 'id': '5692d93d5aac8d796a0305e895e61551',
1543 'variables': variables,
1544 }).encode('utf-8'))['data']['programme']
1545
1546 @staticmethod
1547 def _get_playlist_data(data):
1548 return data
1549
1550 def _get_playlist_title(self, data):
1551 return self._get_default(data, 'title')
1552
1553
1554class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1555 IE_NAME = 'bbc.co.uk:iplayer:group'
1556 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1557 _TESTS = [{
9158af16
S
1558 # Available for over a year unlike 30 days for most other programmes
1559 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1560 'info_dict': {
1561 'id': 'p02tcc32',
1562 'title': 'Bohemian Icons',
1563 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1564 },
1565 'playlist_mincount': 10,
1418a043 1566 }, {
1567 # all pages
1568 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1569 'info_dict': {
1570 'id': 'p081d7j7',
1571 'title': 'Music in Scotland',
1572 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1573 },
1574 'playlist_mincount': 47,
1575 }, {
1576 # explicit page
1577 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1578 'info_dict': {
1579 'id': 'p081d7j7',
1580 'title': 'Music in Scotland',
1581 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1582 },
1583 'playlist_mincount': 11,
9158af16 1584 }]
1418a043 1585 _PAGE_SIZE = 200
1586 _DESCRIPTION_KEY = 'synopses'
1587
1588 def _get_episode_image(self, episode):
1589 return self._get_default(episode, 'images', 'standard')
1590
1591 def _get_episode_field(self, episode, field):
1592 return episode.get(field)
1593
1594 @staticmethod
1595 def _get_elements(data):
1596 return data['elements']
1597
1598 @staticmethod
1599 def _get_episode(element):
1600 return element
1601
1602 def _call_api(self, pid, per_page, page=1, series_id=None):
1603 return self._download_json(
1604 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
1605 pid, query={
1606 'page': page,
1607 'per_page': per_page,
1608 })['group_episodes']
1609
1610 @staticmethod
1611 def _get_playlist_data(data):
1612 return data['group']
ded7511a 1613
1418a043 1614 def _get_playlist_title(self, data):
1615 return data.get('title')
ded7511a
S
1616
1617
1618class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1619 IE_NAME = 'bbc.co.uk:playlist'
1620 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1621 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1622 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1623 _TESTS = [{
1624 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1625 'info_dict': {
1626 'id': 'b05rcz9v',
1627 'title': 'The Disappearance - Clips - BBC Four',
1628 'description': 'French thriller serial about a missing teenager.',
1629 },
1630 'playlist_mincount': 7,
4f640f28
S
1631 }, {
1632 # multipage playlist, explicit page
1633 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1634 'info_dict': {
1635 'id': 'b00mfl7n',
1636 'title': 'Frozen Planet - Clips - BBC One',
1637 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1638 },
1639 'playlist_mincount': 24,
1640 }, {
1641 # multipage playlist, all pages
1642 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1643 'info_dict': {
1644 'id': 'b00mfl7n',
1645 'title': 'Frozen Planet - Clips - BBC One',
1646 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1647 },
1648 'playlist_mincount': 142,
ded7511a
S
1649 }, {
1650 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1651 'only_matching': True,
1652 }, {
1653 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1654 'only_matching': True,
1655 }, {
1656 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1657 'only_matching': True,
1658 }]
1659
1660 def _extract_title_and_description(self, webpage):
1661 title = self._og_search_title(webpage, fatal=False)
1662 description = self._og_search_description(webpage)
1663 return title, description