]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bbc.py
Reject entire playlists faster with `--match-filter`
[yt-dlp.git] / yt_dlp / extractor / bbc.py
CommitLineData
1418a043 1import functools
254e64a2 2import itertools
1418a043 3import json
f0228f56 4import re
ac668111 5import urllib.error
6import xml.etree.ElementTree
082c6c86 7
f13b1e7d 8from .common import InfoExtractor
ac668111 9from ..compat import compat_HTTPError, compat_str, compat_urlparse
8683b4d8 10from ..utils import (
3721515b 11 ExtractorError,
1418a043 12 OnDemandPagedList,
97067db2 13 clean_html,
9fb64c04 14 dict_get,
9afa1770 15 float_or_none,
97067db2 16 get_element_by_class,
8683b4d8 17 int_or_none,
6d155707 18 js_to_json,
9afa1770
S
19 parse_duration,
20 parse_iso8601,
4dfbf869 21 parse_qs,
1bdae7d3 22 strip_or_none,
9fb64c04 23 try_get,
dab062fb 24 unescapeHTML,
1bdae7d3 25 unified_timestamp,
f0228f56 26 url_or_none,
97067db2
S
27 urlencode_postdata,
28 urljoin,
8683b4d8 29)
082c6c86 30
d12a1a47 31
f13b1e7d 32class BBCCoUkIE(InfoExtractor):
082c6c86 33 IE_NAME = 'bbc.co.uk'
2e3fd9ec 34 IE_DESC = 'BBC iPlayer'
50e93e03 35 _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
f20a11ed
S
36 _VALID_URL = r'''(?x)
37 https?://
38 (?:www\.)?bbc\.co\.uk/
39 (?:
40 programmes/(?!articles/)|
41 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
72d256c4 42 music/(?:clips|audiovideo/popular)[/#]|
d3d45e0a 43 radio/player/|
b72305f0 44 sounds/play/|
d3d45e0a 45 events/[^/]+/play/[^/]+/
f20a11ed 46 )
ded7511a 47 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 48 ''' % _ID_REGEX
082c6c86 49
97067db2
S
50 _LOGIN_URL = 'https://account.bbc.com/signin'
51 _NETRC_MACHINE = 'bbc'
52
29f7c58a 53 _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
54 _MEDIA_SETS = [
26ccc68b
S
55 # Provides HQ HLS streams with even better quality that pc mediaset but fails
56 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 57 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
29f7c58a 58 'iptv-all',
59 'pc',
d12a1a47 60 ]
a8b081a0 61
e6174ee9
S
62 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
63
2e3fd9ec
S
64 _TESTS = [
65 {
f2d0fc68 66 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 67 'info_dict': {
f2d0fc68 68 'id': 'b039d07m',
b1ea6802 69 'ext': 'flv',
acc86c9a 70 'title': 'Kaleidoscope, Leonard Cohen',
c4914185 71 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
72 },
73 'params': {
b1ea6802 74 # rtmp download
2e3fd9ec
S
75 'skip_download': True,
76 }
082c6c86 77 },
2e3fd9ec
S
78 {
79 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
80 'info_dict': {
81 'id': 'b00yng1d',
82 'ext': 'flv',
83 'title': 'The Man in Black: Series 3: The Printed Name',
84 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
85 'duration': 1800,
86 },
87 'params': {
88 # rtmp download
89 'skip_download': True,
c7f0177f
S
90 },
91 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
92 },
93 {
94 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
95 'info_dict': {
96 'id': 'b00yng1d',
97 'ext': 'flv',
17968e44 98 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 99 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 100 'duration': 5100,
2e3fd9ec
S
101 },
102 'params': {
103 # rtmp download
104 'skip_download': True,
105 },
b1ea6802 106 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
107 },
108 {
109 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
110 'info_dict': {
111 'id': 'b03k3pb7',
112 'ext': 'flv',
113 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
114 'description': '2. Invasion',
115 'duration': 3600,
116 },
117 'params': {
118 # rtmp download
119 'skip_download': True,
120 },
b1ea6802 121 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
122 }, {
123 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
124 'info_dict': {
125 'id': 'b04v209v',
126 'ext': 'flv',
127 'title': 'Pete Tong, The Essential New Tune Special',
128 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
129 'duration': 10800,
130 },
131 'params': {
132 # rtmp download
133 'skip_download': True,
a3ef0e1c
YCH
134 },
135 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 136 }, {
5aa535c3 137 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
138 'note': 'Audio',
139 'info_dict': {
5aa535c3 140 'id': 'p022h44j',
b1ea6802 141 'ext': 'flv',
5aa535c3
S
142 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
143 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
144 'duration': 227,
c7e67594
S
145 },
146 'params': {
b1ea6802 147 # rtmp download
c7e67594
S
148 'skip_download': True,
149 }
150 }, {
151 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
152 'note': 'Video',
153 'info_dict': {
154 'id': 'p025c103',
b1ea6802 155 'ext': 'flv',
c7e67594
S
156 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
157 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
158 'duration': 226,
159 },
160 'params': {
b1ea6802 161 # rtmp download
c7e67594
S
162 'skip_download': True,
163 }
e68ae99a
S
164 }, {
165 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
166 'info_dict': {
167 'id': 'p02n76xf',
168 'ext': 'flv',
169 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
170 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
171 'duration': 3540,
172 },
173 'params': {
174 # rtmp download
175 'skip_download': True,
176 },
b1ea6802 177 'skip': 'geolocation',
25fa8d66
YCH
178 }, {
179 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
180 'info_dict': {
181 'id': 'b05zmgw1',
182 'ext': 'flv',
183 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
184 'title': 'Royal Academy Summer Exhibition',
185 'duration': 3540,
186 },
187 'params': {
188 # rtmp download
189 'skip_download': True,
190 },
b1ea6802 191 'skip': 'geolocation',
54914380
S
192 }, {
193 # iptv-all mediaset fails with geolocation however there is no geo restriction
194 # for this programme at all
5aa535c3 195 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 196 'info_dict': {
5aa535c3 197 'id': 'b06rkms3',
54914380 198 'ext': 'flv',
5aa535c3
S
199 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
200 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
201 },
202 'params': {
203 # rtmp download
204 'skip_download': True,
205 },
b1ea6802 206 'skip': 'Now it\'s really geo-restricted',
1ac6e794 207 }, {
067aa17e 208 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
1ac6e794
S
209 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
210 'info_dict': {
211 'id': 'p028bfkj',
b1ea6802 212 'ext': 'flv',
1ac6e794
S
213 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
214 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
215 },
216 'params': {
b1ea6802 217 # rtmp download
1ac6e794
S
218 'skip_download': True,
219 },
b72305f0
J
220 }, {
221 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
222 'note': 'Audio',
223 'info_dict': {
224 'id': 'm0007jz9',
225 'ext': 'mp4',
226 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
227 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
228 'duration': 9840,
229 },
230 'params': {
231 # rtmp download
232 'skip_download': True,
233 }
31763975
S
234 }, {
235 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
236 'only_matching': True,
c7e67594
S
237 }, {
238 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
239 'only_matching': True,
0692ef86
S
240 }, {
241 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
242 'only_matching': True,
f20a11ed
S
243 }, {
244 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
245 'only_matching': True,
72d256c4
S
246 }, {
247 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
248 'only_matching': True,
53647dfd
S
249 }, {
250 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
251 'only_matching': True,
6f356cbb
S
252 }, {
253 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
254 'only_matching': True,
255 }, {
256 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
257 'only_matching': True,
72d256c4 258 }]
2e3fd9ec 259
52efa4b3 260 def _perform_login(self, username, password):
97067db2
S
261 login_page = self._download_webpage(
262 self._LOGIN_URL, None, 'Downloading signin page')
263
264 login_form = self._hidden_inputs(login_page)
265
266 login_form.update({
267 'username': username,
268 'password': password,
269 })
270
271 post_url = urljoin(self._LOGIN_URL, self._search_regex(
272 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
273 'post url', default=self._LOGIN_URL, group='url'))
274
275 response, urlh = self._download_webpage_handle(
276 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
277 headers={'Referer': self._LOGIN_URL})
278
279 if self._LOGIN_URL in urlh.geturl():
280 error = clean_html(get_element_by_class('form-message', response))
281 if error:
282 raise ExtractorError(
283 'Unable to login: %s' % error, expected=True)
284 raise ExtractorError('Unable to log in')
285
d12a1a47
S
286 class MediaSelectionError(Exception):
287 def __init__(self, id):
288 self.id = id
289
2e3fd9ec
S
290 def _extract_asx_playlist(self, connection, programme_id):
291 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
292 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
293
2e3fd9ec 294 def _extract_items(self, playlist):
e6174ee9
S
295 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
296
2e3fd9ec 297 def _extract_medias(self, media_selection):
29f7c58a 298 error = media_selection.get('result')
299 if error:
300 raise BBCCoUkIE.MediaSelectionError(error)
301 return media_selection.get('media') or []
2e3fd9ec
S
302
303 def _extract_connections(self, media):
29f7c58a 304 return media.get('connection') or []
2e3fd9ec 305
f13b1e7d 306 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
307 subtitles = {}
308 for connection in self._extract_connections(media):
f0228f56
S
309 cc_url = url_or_none(connection.get('href'))
310 if not cc_url:
311 continue
312 captions = self._download_xml(
313 cc_url, programme_id, 'Downloading captions', fatal=False)
f9934b96 314 if not isinstance(captions, xml.etree.ElementTree.Element):
f0228f56 315 continue
29f7c58a 316 subtitles['en'] = [
f13b1e7d
JMF
317 {
318 'url': connection.get('href'),
319 'ext': 'ttml',
320 },
f13b1e7d 321 ]
29f7c58a 322 break
2e3fd9ec 323 return subtitles
082c6c86 324
d12a1a47
S
325 def _raise_extractor_error(self, media_selection_error):
326 raise ExtractorError(
327 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
328 expected=True)
329
c056efa2 330 def _download_media_selector(self, programme_id):
d12a1a47 331 last_exception = None
29f7c58a 332 for media_set in self._MEDIA_SETS:
d12a1a47
S
333 try:
334 return self._download_media_selector_url(
29f7c58a 335 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
d12a1a47 336 except BBCCoUkIE.MediaSelectionError as e:
d781e293 337 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
338 last_exception = e
339 continue
340 self._raise_extractor_error(e)
341 self._raise_extractor_error(last_exception)
9afa1770
S
342
343 def _download_media_selector_url(self, url, programme_id=None):
29f7c58a 344 media_selection = self._download_json(
345 url, programme_id, 'Downloading media selection JSON',
9283d4ea 346 expected_status=(403, 404))
9afa1770 347 return self._process_media_selector(media_selection, programme_id)
082c6c86 348
9afa1770 349 def _process_media_selector(self, media_selection, programme_id):
082c6c86 350 formats = []
2e3fd9ec 351 subtitles = None
b0af1215 352 urls = []
2e3fd9ec 353
c056efa2
S
354 for media in self._extract_medias(media_selection):
355 kind = media.get('kind')
a7e5f274
RA
356 if kind in ('video', 'audio'):
357 bitrate = int_or_none(media.get('bitrate'))
358 encoding = media.get('encoding')
a7e5f274
RA
359 width = int_or_none(media.get('width'))
360 height = int_or_none(media.get('height'))
361 file_size = int_or_none(media.get('media_file_size'))
362 for connection in self._extract_connections(media):
b0af1215
RA
363 href = connection.get('href')
364 if href in urls:
365 continue
366 if href:
367 urls.append(href)
a7e5f274
RA
368 conn_kind = connection.get('kind')
369 protocol = connection.get('protocol')
370 supplier = connection.get('supplier')
a7e5f274
RA
371 transfer_format = connection.get('transferFormat')
372 format_id = supplier or conn_kind or protocol
a7e5f274
RA
373 # ASX playlist
374 if supplier == 'asx':
375 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
376 formats.append({
377 'url': ref,
378 'format_id': 'ref%s_%s' % (i, format_id),
379 })
380 elif transfer_format == 'dash':
381 formats.extend(self._extract_mpd_formats(
382 href, programme_id, mpd_id=format_id, fatal=False))
383 elif transfer_format == 'hls':
50e93e03 384 # TODO: let expected_status be passed into _extract_xxx_formats() instead
385 try:
386 fmts = self._extract_m3u8_formats(
387 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
388 m3u8_id=format_id, fatal=False)
389 except ExtractorError as e:
ac668111 390 if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
50e93e03 391 and e.exc_info[1].code in (403, 404)):
392 raise
393 fmts = []
394 formats.extend(fmts)
a7e5f274
RA
395 elif transfer_format == 'hds':
396 formats.extend(self._extract_f4m_formats(
397 href, programme_id, f4m_id=format_id, fatal=False))
398 else:
29f7c58a 399 if not supplier and bitrate:
aaa42cf0 400 format_id += '-%d' % bitrate
a7e5f274
RA
401 fmt = {
402 'format_id': format_id,
403 'filesize': file_size,
404 }
405 if kind == 'video':
406 fmt.update({
407 'width': width,
408 'height': height,
6240925b 409 'tbr': bitrate,
a7e5f274
RA
410 'vcodec': encoding,
411 })
412 else:
413 fmt.update({
414 'abr': bitrate,
415 'acodec': encoding,
416 'vcodec': 'none',
417 })
1af959ef 418 if protocol in ('http', 'https'):
a7e5f274
RA
419 # Direct link
420 fmt.update({
421 'url': href,
422 })
423 elif protocol == 'rtmp':
424 application = connection.get('application', 'ondemand')
425 auth_string = connection.get('authString')
426 identifier = connection.get('identifier')
427 server = connection.get('server')
428 fmt.update({
429 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
430 'play_path': identifier,
431 'app': '%s?%s' % (application, auth_string),
432 'page_url': 'http://www.bbc.co.uk',
433 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
434 'rtmp_live': False,
435 'ext': 'flv',
436 })
964744af
S
437 else:
438 continue
a7e5f274 439 formats.append(fmt)
c056efa2 440 elif kind == 'captions':
f13b1e7d 441 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 442 return formats, subtitles
2e3fd9ec 443
ae6986fb
S
444 def _download_playlist(self, playlist_id):
445 try:
446 playlist = self._download_json(
447 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
448 playlist_id, 'Downloading playlist JSON')
c45b8741 449 formats = []
450 subtitles = {}
ae6986fb 451
c45b8741 452 for version in playlist.get('allAvailableVersions', []):
ae6986fb
S
453 smp_config = version['smpConfig']
454 title = smp_config['title']
455 description = smp_config['summary']
456 for item in smp_config['items']:
457 kind = item['kind']
40fcba5e 458 if kind not in ('programme', 'radioProgramme'):
ae6986fb
S
459 continue
460 programme_id = item.get('vpid')
d97f5cd7 461 duration = int_or_none(item.get('duration'))
c45b8741 462 version_formats, version_subtitles = self._download_media_selector(programme_id)
463 types = version['types']
464 for f in version_formats:
465 f['format_note'] = ', '.join(types)
466 if any('AudioDescribed' in x for x in types):
467 f['language_preference'] = -10
468 formats += version_formats
469 for tag, subformats in (version_subtitles or {}).items():
f304da8a 470 subtitles.setdefault(tag, []).extend(subformats)
c45b8741 471
472 return programme_id, title, description, duration, formats, subtitles
ae6986fb 473 except ExtractorError as ee:
f813928e 474 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
475 raise
476
477 # fallback to legacy playlist
9afa1770
S
478 return self._process_legacy_playlist(playlist_id)
479
480 def _process_legacy_playlist_url(self, url, display_id):
481 playlist = self._download_legacy_playlist_url(url, display_id)
482 return self._extract_from_legacy_playlist(playlist, display_id)
483
484 def _process_legacy_playlist(self, playlist_id):
485 return self._process_legacy_playlist_url(
486 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
487
488 def _download_legacy_playlist_url(self, url, playlist_id=None):
489 return self._download_xml(
490 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 491
9afa1770 492 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 493 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
494 if no_items is not None:
495 reason = no_items.get('reason')
496 if reason == 'preAvailability':
497 msg = 'Episode %s is not yet available' % playlist_id
498 elif reason == 'postAvailability':
499 msg = 'Episode %s is no longer available' % playlist_id
500 elif reason == 'noMedia':
501 msg = 'Episode %s is not currently available' % playlist_id
502 else:
503 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
504 raise ExtractorError(msg, expected=True)
505
506 for item in self._extract_items(playlist):
507 kind = item.get('kind')
40fcba5e 508 if kind not in ('programme', 'radioProgramme'):
ae6986fb 509 continue
e6174ee9
S
510 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
511 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 512 description = description_el.text if description_el is not None else None
9afa1770
S
513
514 def get_programme_id(item):
515 def get_from_attributes(item):
32759325 516 for p in ('identifier', 'group'):
9afa1770
S
517 value = item.get(p)
518 if value and re.match(r'^[pb][\da-z]{7}$', value):
519 return value
520 get_from_attributes(item)
e6174ee9 521 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
522 if mediator is not None:
523 return get_from_attributes(mediator)
524
525 programme_id = get_programme_id(item)
d97f5cd7 526 duration = int_or_none(item.get('duration'))
e6174ee9
S
527
528 if programme_id:
529 formats, subtitles = self._download_media_selector(programme_id)
530 else:
531 formats, subtitles = self._process_media_selector(item, playlist_id)
532 programme_id = playlist_id
ae6986fb
S
533
534 return programme_id, title, description, duration, formats, subtitles
535
c056efa2
S
536 def _real_extract(self, url):
537 group_id = self._match_id(url)
538
539 webpage = self._download_webpage(url, group_id, 'Downloading video page')
540
b2ed954f 541 error = self._search_regex(
29f7c58a 542 r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
b2ed954f
S
543 webpage, 'error', default=None)
544 if error:
545 raise ExtractorError(error, expected=True)
546
8683b4d8 547 programme_id = None
679bacf0 548 duration = None
8683b4d8
S
549
550 tviplayer = self._search_regex(
551 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
552 webpage, 'player', default=None)
553
554 if tviplayer:
555 player = self._parse_json(tviplayer, group_id).get('player', {})
556 duration = int_or_none(player.get('duration'))
557 programme_id = player.get('vpid')
558
559 if not programme_id:
560 programme_id = self._search_regex(
22d7368d 561 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 562
c056efa2 563 if programme_id:
c056efa2 564 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 565 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
566 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
567 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 568 description = self._search_regex(
a8534274
S
569 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
570 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
571 webpage, 'description', default=None)
572 if not description:
573 description = self._html_search_meta('description', webpage)
c056efa2 574 else:
ae6986fb 575 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 576
082c6c86
S
577 self._sort_formats(formats)
578
579 return {
2e3fd9ec 580 'id': programme_id,
082c6c86
S
581 'title': title,
582 'description': description,
650cfd0c 583 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
584 'duration': duration,
585 'formats': formats,
2e3fd9ec 586 'subtitles': subtitles,
5f6a1245 587 }
10273d6e 588
589
9afa1770
S
590class BBCIE(BBCCoUkIE):
591 IE_NAME = 'bbc'
592 IE_DESC = 'BBC'
593 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 594
29f7c58a 595 _MEDIA_SETS = [
29f7c58a 596 'pc',
2d997542 597 'mobile-tablet-main',
d12a1a47 598 ]
10273d6e 599
600 _TESTS = [{
6a747190 601 # article with multiple videos embedded with data-playable containing vpids
10273d6e 602 'url': 'http://www.bbc.com/news/world-europe-32668511',
603 'info_dict': {
604 'id': 'world-europe-32668511',
acc86c9a 605 'title': 'Russia stages massive WW2 parade',
9afa1770 606 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 607 },
608 'playlist_count': 2,
a3bfddfa 609 }, {
6a747190 610 # article with multiple videos embedded with data-playable (more videos)
10273d6e 611 'url': 'http://www.bbc.com/news/business-28299555',
612 'info_dict': {
613 'id': 'business-28299555',
614 'title': 'Farnborough Airshow: Video highlights',
9afa1770 615 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 616 },
617 'playlist_count': 9,
9afa1770 618 'skip': 'Save time',
88ed52ae
S
619 }, {
620 # article with multiple videos embedded with `new SMP()`
6a747190 621 # broken
88ed52ae
S
622 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
623 'info_dict': {
624 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 625 'title': 'BUGGER',
88ed52ae
S
626 },
627 'playlist_count': 18,
a3bfddfa 628 }, {
6a747190 629 # single video embedded with data-playable containing vpid
10273d6e 630 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 631 'info_dict': {
632 'id': 'p02mprgb',
55ebae26 633 'ext': 'mp4',
10273d6e 634 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 635 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 636 'duration': 47,
9afa1770 637 'timestamp': 1427219242,
da92eeae 638 'upload_date': '20150324',
10273d6e 639 },
640 'params': {
9afa1770 641 # rtmp download
10273d6e 642 'skip_download': True,
643 }
a3bfddfa 644 }, {
6a747190
S
645 # article with single video embedded with data-playable containing XML playlist
646 # with direct video links as progressiveDownloadUrl (for now these are extracted)
647 # and playlist with f4m and m3u8 as streamingUrl
de939d89 648 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 649 'info_dict': {
9afa1770 650 'id': '150615_telabyad_kentin_cogu',
de939d89 651 'ext': 'mp4',
ad152e2d 652 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
05087d1b 653 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 654 'timestamp': 1434397334,
da92eeae 655 'upload_date': '20150615',
de939d89 656 },
657 'params': {
658 'skip_download': True,
659 }
c936d8cc 660 }, {
6a747190 661 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 662 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 663 'info_dict': {
9afa1770 664 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 665 'ext': 'mp4',
9afa1770 666 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 667 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 668 'timestamp': 1434713142,
da92eeae 669 'upload_date': '20150619',
de939d89 670 },
671 'params': {
672 'skip_download': True,
673 }
a346b1ff
S
674 }, {
675 # single video from video playlist embedded with vxp-playlist-data JSON
676 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
677 'info_dict': {
678 'id': 'p02w6qjc',
55ebae26 679 'ext': 'mp4',
a346b1ff
S
680 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
681 'duration': 56,
0bc4ee60 682 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
683 },
684 'params': {
685 'skip_download': True,
686 }
9afa1770
S
687 }, {
688 # single video story with digitalData
689 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
690 'info_dict': {
691 'id': 'p02q6gc4',
692 'ext': 'flv',
693 'title': 'Sri Lanka’s spicy secret',
694 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
695 'timestamp': 1437674293,
696 'upload_date': '20150723',
697 },
698 'params': {
699 # rtmp download
700 'skip_download': True,
701 }
702 }, {
703 # single video story without digitalData
704 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
705 'info_dict': {
706 'id': 'p018zqqg',
55ebae26 707 'ext': 'mp4',
9afa1770
S
708 'title': 'Hyundai Santa Fe Sport: Rock star',
709 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
710 'timestamp': 1415867444,
711 'upload_date': '20141113',
9afa1770
S
712 },
713 'params': {
714 # rtmp download
715 'skip_download': True,
716 }
9fb64c04
S
717 }, {
718 # single video embedded with Morph
719 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
720 'info_dict': {
721 'id': 'p041vhd0',
722 'ext': 'mp4',
723 'title': "Nigeria v Japan - Men's First Round",
724 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
725 'duration': 7980,
726 'uploader': 'BBC Sport',
727 'uploader_id': 'bbc_sport',
728 },
729 'params': {
730 # m3u8 download
731 'skip_download': True,
9fb64c04
S
732 },
733 'skip': 'Georestricted to UK',
9afa1770 734 }, {
6a747190 735 # single video with playlist.sxml URL in playlist param
9afa1770
S
736 'url': 'http://www.bbc.com/sport/0/football/33653409',
737 'info_dict': {
738 'id': 'p02xycnp',
55ebae26 739 'ext': 'mp4',
9afa1770 740 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 741 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
742 'duration': 140,
743 },
744 'params': {
745 # rtmp download
746 'skip_download': True,
747 }
b5d48cb1 748 }, {
6a747190 749 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
750 'url': 'http://www.bbc.com/sport/0/football/34475836',
751 'info_dict': {
752 'id': '34475836',
450b233c 753 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 754 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
755 },
756 'playlist_count': 3,
450b233c
S
757 }, {
758 # school report article with single video
759 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
760 'info_dict': {
761 'id': '35744779',
762 'title': 'School which breaks down barriers in Jerusalem',
763 },
764 'playlist_count': 1,
9afa1770
S
765 }, {
766 # single video with playlist URL from weather section
767 'url': 'http://www.bbc.com/weather/features/33601775',
768 'only_matching': True,
769 }, {
770 # custom redirection to www.bbc.com
1bdae7d3 771 # also, video with window.__INITIAL_DATA__
9afa1770 772 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
1bdae7d3 773 'info_dict': {
774 'id': 'p02xzws1',
775 'ext': 'mp4',
776 'title': "Pluto may have 'nitrogen glaciers'",
777 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
778 'thumbnail': r're:https?://.+/.+\.jpg',
779 'timestamp': 1437785037,
780 'upload_date': '20150725',
781 },
50e93e03 782 }, {
783 # video with window.__INITIAL_DATA__ and value as JSON string
784 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
785 'info_dict': {
786 'id': 'p0b71qth',
787 'ext': 'mp4',
788 'title': 'Why France is making this woman a national hero',
789 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
790 'thumbnail': r're:https?://.+/.+\.jpg',
791 'timestamp': 1638230731,
792 'upload_date': '20211130',
793 },
a1cf3e38
S
794 }, {
795 # single video article embedded with data-media-vpid
796 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
797 'only_matching': True,
6d155707 798 }, {
50e93e03 799 # bbcthreeConfig
6d155707
S
800 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
801 'info_dict': {
802 'id': 'p06556y7',
803 'ext': 'mp4',
50e93e03 804 'title': 'Things Not To Say to people that live on council estates',
805 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
806 'duration': 360,
807 'thumbnail': r're:https?://.+/.+\.jpg',
6d155707 808 },
b96b4be4
RA
809 }, {
810 # window.__PRELOADED_STATE__
811 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
812 'info_dict': {
813 'id': 'b0b9z4vz',
814 'ext': 'mp4',
815 'title': 'Prom 6: An American in Paris and Turangalila',
816 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
817 'uploader': 'Radio 3',
818 'uploader_id': 'bbc_radio_three',
819 },
373941c5
S
820 }, {
821 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
822 'info_dict': {
823 'id': 'p06w9tws',
824 'ext': 'mp4',
825 'title': 'md5:2fabf12a726603193a2879a055f72514',
826 'description': 'Learn English words and phrases from this story',
827 },
828 'add_ie': [BBCCoUkIE.ie_key()],
3721515b 829 }, {
830 # BBC Reel
831 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
832 'info_dict': {
833 'id': 'p07c6sb9',
834 'ext': 'mp4',
835 'title': 'How positive thinking is harming your happiness',
836 'alt_title': 'The downsides of positive thinking',
837 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
838 'duration': 235,
839 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
840 'upload_date': '20190604',
841 'categories': ['Psychology'],
842 },
10273d6e 843 }]
844
9afa1770
S
845 @classmethod
846 def suitable(cls, url):
1418a043 847 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
ded7511a
S
848 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
849 else super(BBCIE, cls).suitable(url))
9afa1770
S
850
851 def _extract_from_media_meta(self, media_meta, video_id):
852 # Direct links to media in media metadata (e.g.
853 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
854 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
855 source_files = media_meta.get('sourceFiles')
856 if source_files:
857 return [{
858 'url': f['url'],
859 'format_id': format_id,
860 'ext': f.get('encoding'),
861 'tbr': float_or_none(f.get('bitrate'), 1000),
862 'filesize': int_or_none(f.get('filesize')),
863 } for format_id, f in source_files.items() if f.get('url')], []
864
865 programme_id = media_meta.get('externalId')
866 if programme_id:
867 return self._download_media_selector(programme_id)
868
869 # Process playlist.sxml as legacy playlist
870 href = media_meta.get('href')
871 if href:
872 playlist = self._download_legacy_playlist_url(href)
873 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
874 return formats, subtitles
875
876 return [], []
877
baf39a1a
S
878 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
879 programme_id, title, description, duration, formats, subtitles = \
880 self._process_legacy_playlist_url(url, playlist_id)
881 self._sort_formats(formats)
882 return {
883 'id': programme_id,
884 'title': title,
885 'description': description,
886 'duration': duration,
887 'timestamp': timestamp,
888 'formats': formats,
889 'subtitles': subtitles,
890 }
891
10273d6e 892 def _real_extract(self, url):
9afa1770
S
893 playlist_id = self._match_id(url)
894
895 webpage = self._download_webpage(url, playlist_id)
896
522f6c06 897 json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
350e02d4 898 timestamp = json_ld_info.get('timestamp')
0e832c2c 899
350e02d4 900 playlist_title = json_ld_info.get('title')
0e832c2c 901 if not playlist_title:
04f3fd2c 902 playlist_title = (self._og_search_title(webpage, default=None)
903 or self._html_extract_title(webpage, 'playlist title', default=None))
0e832c2c
S
904 if playlist_title:
905 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
906
907 playlist_description = json_ld_info.get(
908 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
909
910 if not timestamp:
911 timestamp = parse_iso8601(self._search_regex(
912 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
913 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 914 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 915 webpage, 'date', default=None))
9afa1770 916
78f9d843
S
917 entries = []
918
de665713
S
919 # article with multiple videos embedded with playlist.sxml (e.g.
920 # http://www.bbc.com/sport/0/football/34475836)
921 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 922 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 923 if playlists:
baf39a1a
S
924 entries = [
925 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
926 for playlist_url in playlists]
de939d89 927
78f9d843
S
928 # news article with multiple videos embedded with data-playable
929 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
930 if data_playables:
931 for _, data_playable_json in data_playables:
932 data_playable = self._parse_json(
933 unescapeHTML(data_playable_json), playlist_id, fatal=False)
934 if not data_playable:
935 continue
baf39a1a
S
936 settings = data_playable.get('settings', {})
937 if settings:
78f9d843
S
938 # data-playable with video vpid in settings.playlistObject.items (e.g.
939 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
940 playlist_object = settings.get('playlistObject', {})
941 if playlist_object:
942 items = playlist_object.get('items')
943 if items and isinstance(items, list):
78f9d843
S
944 title = playlist_object['title']
945 description = playlist_object.get('summary')
baf39a1a
S
946 duration = int_or_none(items[0].get('duration'))
947 programme_id = items[0].get('vpid')
78f9d843
S
948 formats, subtitles = self._download_media_selector(programme_id)
949 self._sort_formats(formats)
950 entries.append({
951 'id': programme_id,
952 'title': title,
953 'description': description,
954 'timestamp': timestamp,
955 'duration': duration,
956 'formats': formats,
957 'subtitles': subtitles,
958 })
959 else:
960 # data-playable without vpid but with a playlist.sxml URLs
961 # in otherSettings.playlist (e.g.
962 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
963 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
964 if playlist:
a7e5f274
RA
965 entry = None
966 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
967 playlist_url = playlist.get('%sUrl' % key)
968 if not playlist_url:
969 continue
970 try:
a7e5f274
RA
971 info = self._extract_from_playlist_sxml(
972 playlist_url, playlist_id, timestamp)
973 if not entry:
974 entry = info
975 else:
976 entry['title'] = info['title']
977 entry['formats'].extend(info['formats'])
3721515b 978 except ExtractorError as e:
05087d1b
S
979 # Some playlist URL may fail with 500, at the same time
980 # the other one may work fine (e.g.
981 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
982 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
983 continue
984 raise
a7e5f274
RA
985 if entry:
986 self._sort_formats(entry['formats'])
987 entries.append(entry)
78f9d843
S
988
989 if entries:
78f9d843
S
990 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
991
373941c5
S
992 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
993 group_id = self._search_regex(
994 r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
995 webpage, 'group id', default=None)
38d70284 996 if group_id:
373941c5
S
997 return self.url_result(
998 'https://www.bbc.co.uk/programmes/%s' % group_id,
999 ie=BBCCoUkIE.ie_key())
1000
78f9d843
S
1001 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1002 programme_id = self._search_regex(
a1cf3e38 1003 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
1004 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
1005 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 1006 webpage, 'vpid', default=None)
dab062fb 1007
9afa1770
S
1008 if programme_id:
1009 formats, subtitles = self._download_media_selector(programme_id)
1010 self._sort_formats(formats)
1011 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1012 digital_data = self._parse_json(
1013 self._search_regex(
1014 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1015 programme_id, fatal=False)
1016 page_info = digital_data.get('page', {}).get('pageInfo', {})
1017 title = page_info.get('pageName') or self._og_search_title(webpage)
1018 description = page_info.get('description') or self._og_search_description(webpage)
1019 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1020 return {
1021 'id': programme_id,
1022 'title': title,
1023 'description': description,
1024 'timestamp': timestamp,
1025 'formats': formats,
1026 'subtitles': subtitles,
1027 }
a3bfddfa 1028
3721515b 1029 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1030 initial_data = self._parse_json(self._html_search_regex(
1031 r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1032 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1033 if initial_data:
1034 init_data = try_get(
1035 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1036 smp_data = init_data.get('smpData') or {}
1037 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1038 version_id = clip_data.get('versionID')
1039 if version_id:
1040 title = smp_data['title']
1041 formats, subtitles = self._download_media_selector(version_id)
1042 self._sort_formats(formats)
1043 image_url = smp_data.get('holdingImageURL')
1044 display_date = init_data.get('displayDate')
1045 topic_title = init_data.get('topicTitle')
1046
1047 return {
1048 'id': version_id,
1049 'title': title,
1050 'formats': formats,
1051 'alt_title': init_data.get('shortTitle'),
1052 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1053 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1054 'upload_date': display_date.replace('-', '') if display_date else None,
1055 'subtitles': subtitles,
1056 'duration': int_or_none(clip_data.get('duration')),
1057 'categories': [topic_title] if topic_title else None,
1058 }
1059
9fb64c04
S
1060 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1061 # There are several setPayload calls may be present but the video
1062 # seems to be always related to the first one
1063 morph_payload = self._parse_json(
1064 self._search_regex(
1065 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1066 webpage, 'morph payload', default='{}'),
1067 playlist_id, fatal=False)
1068 if morph_payload:
1069 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1070 for component in components:
1071 if not isinstance(component, dict):
1072 continue
1073 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1074 if not lead_media:
1075 continue
1076 identifiers = lead_media.get('identifiers')
1077 if not identifiers or not isinstance(identifiers, dict):
1078 continue
1079 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1080 if not programme_id:
1081 continue
1082 title = lead_media.get('title') or self._og_search_title(webpage)
1083 formats, subtitles = self._download_media_selector(programme_id)
1084 self._sort_formats(formats)
1085 description = lead_media.get('summary')
1086 uploader = lead_media.get('masterBrand')
1087 uploader_id = lead_media.get('mid')
1088 duration = None
1089 duration_d = lead_media.get('duration')
1090 if isinstance(duration_d, dict):
1091 duration = parse_duration(dict_get(
1092 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1093 return {
1094 'id': programme_id,
1095 'title': title,
1096 'description': description,
1097 'duration': duration,
1098 'uploader': uploader,
1099 'uploader_id': uploader_id,
1100 'formats': formats,
1101 'subtitles': subtitles,
1102 }
1103
b96b4be4
RA
1104 preload_state = self._parse_json(self._search_regex(
1105 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1106 'preload state', default='{}'), playlist_id, fatal=False)
1107 if preload_state:
1108 current_programme = preload_state.get('programmes', {}).get('current') or {}
1109 programme_id = current_programme.get('id')
1110 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1111 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1112 formats, subtitles = self._download_media_selector(programme_id)
1113 self._sort_formats(formats)
1114 synopses = current_programme.get('synopses') or {}
1115 network = current_programme.get('network') or {}
1116 duration = int_or_none(
1117 current_programme.get('duration', {}).get('value'))
1118 thumbnail = None
1119 image_url = current_programme.get('image_url')
1120 if image_url:
3721515b 1121 thumbnail = image_url.replace('{recipe}', 'raw')
b96b4be4
RA
1122 return {
1123 'id': programme_id,
1124 'title': title,
1125 'description': dict_get(synopses, ('long', 'medium', 'short')),
1126 'thumbnail': thumbnail,
1127 'duration': duration,
1128 'uploader': network.get('short_title'),
1129 'uploader_id': network.get('id'),
1130 'formats': formats,
1131 'subtitles': subtitles,
1132 }
1133
6d155707
S
1134 bbc3_config = self._parse_json(
1135 self._search_regex(
1136 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1137 'bbcthree config', default='{}'),
38d70284 1138 playlist_id, transform_source=js_to_json, fatal=False) or {}
1139 payload = bbc3_config.get('payload') or {}
1140 if payload:
1141 clip = payload.get('currentClip') or {}
1142 clip_vpid = clip.get('vpid')
1143 clip_title = clip.get('title')
1144 if clip_vpid and clip_title:
1145 formats, subtitles = self._download_media_selector(clip_vpid)
1146 self._sort_formats(formats)
1147 return {
1148 'id': clip_vpid,
1149 'title': clip_title,
1150 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1151 'description': clip.get('description'),
1152 'duration': parse_duration(clip.get('duration')),
1153 'formats': formats,
1154 'subtitles': subtitles,
1155 }
6d155707 1156 bbc3_playlist = try_get(
38d70284 1157 payload, lambda x: x['content']['bbcMedia']['playlist'],
6d155707
S
1158 dict)
1159 if bbc3_playlist:
1160 playlist_title = bbc3_playlist.get('title') or playlist_title
1161 thumbnail = bbc3_playlist.get('holdingImageURL')
1162 entries = []
1163 for bbc3_item in bbc3_playlist['items']:
1164 programme_id = bbc3_item.get('versionID')
1165 if not programme_id:
1166 continue
1167 formats, subtitles = self._download_media_selector(programme_id)
1168 self._sort_formats(formats)
1169 entries.append({
1170 'id': programme_id,
1171 'title': playlist_title,
1172 'thumbnail': thumbnail,
1173 'timestamp': timestamp,
1174 'formats': formats,
1175 'subtitles': subtitles,
1176 })
1177 return self.playlist_result(
1178 entries, playlist_id, playlist_title, playlist_description)
1179
50e93e03 1180 initial_data = self._search_regex(
1181 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1182 'quoted preload state', default=None)
1183 if initial_data is None:
1184 initial_data = self._search_regex(
1185 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1186 'preload state', default={})
1187 else:
1188 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1189 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
38d70284 1190 if initial_data:
1191 def parse_media(media):
1192 if not media:
1193 return
1194 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1195 item_id = item.get('id')
1196 item_title = item.get('title')
1197 if not (item_id and item_title):
1198 continue
1199 formats, subtitles = self._download_media_selector(item_id)
1200 self._sort_formats(formats)
1bdae7d3 1201 item_desc = None
1202 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1203 if blocks:
1204 summary = []
1205 for block in blocks:
1206 text = try_get(block, lambda x: x['model']['text'], compat_str)
1207 if text:
1208 summary.append(text)
1209 if summary:
1210 item_desc = '\n\n'.join(summary)
1211 item_time = None
1212 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1213 if try_get(meta, lambda x: x['label']) == 'Published':
1214 item_time = unified_timestamp(meta.get('timestamp'))
1215 break
38d70284 1216 entries.append({
1217 'id': item_id,
1218 'title': item_title,
1219 'thumbnail': item.get('holdingImageUrl'),
1220 'formats': formats,
1221 'subtitles': subtitles,
1bdae7d3 1222 'timestamp': item_time,
1223 'description': strip_or_none(item_desc),
38d70284 1224 })
1225 for resp in (initial_data.get('data') or {}).values():
1226 name = resp.get('name')
1227 if name == 'media-experience':
1228 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1229 elif name == 'article':
50e93e03 1230 for block in (try_get(resp,
1231 (lambda x: x['data']['blocks'],
1232 lambda x: x['data']['content']['model']['blocks'],),
1233 list) or []):
38d70284 1234 if block.get('type') != 'media':
1235 continue
1236 parse_media(block.get('model'))
1237 return self.playlist_result(
1238 entries, playlist_id, playlist_title, playlist_description)
1239
88ed52ae
S
1240 def extract_all(pattern):
1241 return list(filter(None, map(
1242 lambda s: self._parse_json(s, playlist_id, fatal=False),
1243 re.findall(pattern, webpage))))
1244
1245 # Multiple video article (e.g.
1246 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 1247 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
1248 entries = []
1249 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1250 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1251 if embed_url and re.match(EMBED_URL, embed_url):
1252 entries.append(embed_url)
1253 entries.extend(re.findall(
1254 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
1255 if entries:
1256 return self.playlist_result(
aaa42cf0 1257 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
88ed52ae 1258 playlist_id, playlist_title, playlist_description)
9afa1770
S
1259
1260 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 1261 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
1262
1263 if not medias:
1264 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
1265 media_asset = self._search_regex(
1266 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1267 webpage, 'media asset', default=None)
1268 if media_asset:
1269 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1270 medias = []
1271 for video in media_asset_page.get('videos', {}).values():
1272 medias.extend(video.values())
1273
1274 if not medias:
1275 # Multiple video playlist with single `now playing` entry (e.g.
1276 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1277 vxp_playlist = self._parse_json(
9afa1770 1278 self._search_regex(
a346b1ff
S
1279 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1280 webpage, 'playlist data'),
9afa1770 1281 playlist_id)
a346b1ff
S
1282 playlist_medias = []
1283 for item in vxp_playlist:
1284 media = item.get('media')
1285 if not media:
1286 continue
1287 playlist_medias.append(media)
1288 # Download single video if found media with asset id matching the video id from URL
1289 if item.get('advert', {}).get('assetId') == playlist_id:
1290 medias = [media]
1291 break
1292 # Fallback to the whole playlist
1293 if not medias:
1294 medias = playlist_medias
9afa1770
S
1295
1296 entries = []
1297 for num, media_meta in enumerate(medias, start=1):
1298 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
a06916d9 1299 if not formats and not self.get_param('ignore_no_formats'):
9afa1770 1300 continue
10273d6e 1301 self._sort_formats(formats)
1302
9afa1770
S
1303 video_id = media_meta.get('externalId')
1304 if not video_id:
1305 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1306
1307 title = media_meta.get('caption')
1308 if not title:
1309 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1310
1311 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 1312
9afa1770
S
1313 images = []
1314 for image in media_meta.get('images', {}).values():
1315 images.extend(image.values())
1316 if 'image' in media_meta:
1317 images.append(media_meta['image'])
1318
1319 thumbnails = [{
1320 'url': image.get('href'),
1321 'width': int_or_none(image.get('width')),
1322 'height': int_or_none(image.get('height')),
1323 } for image in images]
1324
1325 entries.append({
1326 'id': video_id,
10273d6e 1327 'title': title,
9afa1770 1328 'thumbnails': thumbnails,
10273d6e 1329 'duration': duration,
9afa1770 1330 'timestamp': timestamp,
10273d6e 1331 'formats': formats,
1332 'subtitles': subtitles,
a3bfddfa 1333 })
10273d6e 1334
9afa1770 1335 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
1336
1337
1338class BBCCoUkArticleIE(InfoExtractor):
92519402 1339 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
1340 IE_NAME = 'bbc.co.uk:article'
1341 IE_DESC = 'BBC articles'
1342
1343 _TEST = {
1344 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1345 'info_dict': {
1346 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1347 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1348 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1349 },
1350 'playlist_count': 4,
1351 'add_ie': ['BBCCoUk'],
1352 }
1353
1354 def _real_extract(self, url):
1355 playlist_id = self._match_id(url)
1356
1357 webpage = self._download_webpage(url, playlist_id)
1358
1359 title = self._og_search_title(webpage)
1360 description = self._og_search_description(webpage).strip()
1361
1362 entries = [self.url_result(programme_url) for programme_url in re.findall(
1363 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1364
1365 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
1366
1367
1368class BBCCoUkPlaylistBaseIE(InfoExtractor):
254e64a2
S
1369 def _entries(self, webpage, url, playlist_id):
1370 single_page = 'page' in compat_urlparse.parse_qs(
1371 compat_urlparse.urlparse(url).query)
1372 for page_num in itertools.count(2):
1373 for video_id in re.findall(
1374 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1375 yield self.url_result(
1376 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1377 if single_page:
1378 return
1379 next_page = self._search_regex(
1380 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1381 webpage, 'next page url', default=None, group='url')
1382 if not next_page:
1383 break
1384 webpage = self._download_webpage(
1385 compat_urlparse.urljoin(url, next_page), playlist_id,
1386 'Downloading page %d' % page_num, page_num)
1387
ded7511a
S
1388 def _real_extract(self, url):
1389 playlist_id = self._match_id(url)
1390
1391 webpage = self._download_webpage(url, playlist_id)
1392
ded7511a
S
1393 title, description = self._extract_title_and_description(webpage)
1394
254e64a2
S
1395 return self.playlist_result(
1396 self._entries(webpage, url, playlist_id),
1397 playlist_id, title, description)
ded7511a
S
1398
1399
1418a043 1400class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1401 _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1402
1403 @staticmethod
1404 def _get_default(episode, key, default_key='default'):
1405 return try_get(episode, lambda x: x[key][default_key])
1406
1407 def _get_description(self, data):
1408 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1409 return dict_get(synopsis, ('large', 'medium', 'small'))
1410
1411 def _fetch_page(self, programme_id, per_page, series_id, page):
1412 elements = self._get_elements(self._call_api(
1413 programme_id, per_page, page + 1, series_id))
1414 for element in elements:
1415 episode = self._get_episode(element)
1416 episode_id = episode.get('id')
1417 if not episode_id:
1418 continue
1419 thumbnail = None
1420 image = self._get_episode_image(episode)
1421 if image:
1422 thumbnail = image.replace('{recipe}', 'raw')
1423 category = self._get_default(episode, 'labels', 'category')
1424 yield {
1425 '_type': 'url',
1426 'id': episode_id,
1427 'title': self._get_episode_field(episode, 'subtitle'),
1428 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1429 'thumbnail': thumbnail,
1430 'description': self._get_description(episode),
1431 'categories': [category] if category else None,
1432 'series': self._get_episode_field(episode, 'title'),
1433 'ie_key': BBCCoUkIE.ie_key(),
1434 }
1435
1436 def _real_extract(self, url):
1437 pid = self._match_id(url)
4dfbf869 1438 qs = parse_qs(url)
1418a043 1439 series_id = qs.get('seriesId', [None])[0]
1440 page = qs.get('page', [None])[0]
1441 per_page = 36 if page else self._PAGE_SIZE
1442 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1443 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1444 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1445 return self.playlist_result(
1446 entries, pid, self._get_playlist_title(playlist_data),
1447 self._get_description(playlist_data))
1448
1449
1450class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1451 IE_NAME = 'bbc.co.uk:iplayer:episodes'
1452 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
9158af16 1453 _TESTS = [{
ded7511a
S
1454 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1455 'info_dict': {
1456 'id': 'b05rcz9v',
1457 'title': 'The Disappearance',
1418a043 1458 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
ded7511a 1459 },
1418a043 1460 'playlist_mincount': 8,
9158af16 1461 }, {
1418a043 1462 # all seasons
1463 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1464 'info_dict': {
1465 'id': 'b094m5t9',
1466 'title': 'Doctor Foster',
1467 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1468 },
1469 'playlist_mincount': 10,
1470 }, {
1471 # explicit season
1472 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1473 'info_dict': {
1474 'id': 'b094m5t9',
1475 'title': 'Doctor Foster',
1476 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1477 },
1478 'playlist_mincount': 5,
1479 }, {
1480 # all pages
1481 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1482 'info_dict': {
1483 'id': 'm0004c4v',
1484 'title': 'Beechgrove',
1485 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1486 },
1487 'playlist_mincount': 37,
1488 }, {
1489 # explicit page
1490 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1491 'info_dict': {
1492 'id': 'm0004c4v',
1493 'title': 'Beechgrove',
1494 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1495 },
1496 'playlist_mincount': 1,
1497 }]
1498 _PAGE_SIZE = 100
1499 _DESCRIPTION_KEY = 'synopsis'
1500
1501 def _get_episode_image(self, episode):
1502 return self._get_default(episode, 'image')
1503
1504 def _get_episode_field(self, episode, field):
1505 return self._get_default(episode, field)
1506
1507 @staticmethod
1508 def _get_elements(data):
1509 return data['entities']['results']
1510
1511 @staticmethod
1512 def _get_episode(element):
1513 return element.get('episode') or {}
1514
1515 def _call_api(self, pid, per_page, page=1, series_id=None):
1516 variables = {
1517 'id': pid,
1518 'page': page,
1519 'perPage': per_page,
1520 }
1521 if series_id:
1522 variables['sliceId'] = series_id
1523 return self._download_json(
1524 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1525 'Content-Type': 'application/json'
1526 }, data=json.dumps({
1527 'id': '5692d93d5aac8d796a0305e895e61551',
1528 'variables': variables,
1529 }).encode('utf-8'))['data']['programme']
1530
1531 @staticmethod
1532 def _get_playlist_data(data):
1533 return data
1534
1535 def _get_playlist_title(self, data):
1536 return self._get_default(data, 'title')
1537
1538
1539class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1540 IE_NAME = 'bbc.co.uk:iplayer:group'
1541 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1542 _TESTS = [{
9158af16
S
1543 # Available for over a year unlike 30 days for most other programmes
1544 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1545 'info_dict': {
1546 'id': 'p02tcc32',
1547 'title': 'Bohemian Icons',
1548 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1549 },
1550 'playlist_mincount': 10,
1418a043 1551 }, {
1552 # all pages
1553 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1554 'info_dict': {
1555 'id': 'p081d7j7',
1556 'title': 'Music in Scotland',
1557 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1558 },
1559 'playlist_mincount': 47,
1560 }, {
1561 # explicit page
1562 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1563 'info_dict': {
1564 'id': 'p081d7j7',
1565 'title': 'Music in Scotland',
1566 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1567 },
1568 'playlist_mincount': 11,
9158af16 1569 }]
1418a043 1570 _PAGE_SIZE = 200
1571 _DESCRIPTION_KEY = 'synopses'
1572
1573 def _get_episode_image(self, episode):
1574 return self._get_default(episode, 'images', 'standard')
1575
1576 def _get_episode_field(self, episode, field):
1577 return episode.get(field)
1578
1579 @staticmethod
1580 def _get_elements(data):
1581 return data['elements']
1582
1583 @staticmethod
1584 def _get_episode(element):
1585 return element
1586
1587 def _call_api(self, pid, per_page, page=1, series_id=None):
1588 return self._download_json(
1589 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
1590 pid, query={
1591 'page': page,
1592 'per_page': per_page,
1593 })['group_episodes']
1594
1595 @staticmethod
1596 def _get_playlist_data(data):
1597 return data['group']
ded7511a 1598
1418a043 1599 def _get_playlist_title(self, data):
1600 return data.get('title')
ded7511a
S
1601
1602
1603class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1604 IE_NAME = 'bbc.co.uk:playlist'
1605 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1606 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1607 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1608 _TESTS = [{
1609 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1610 'info_dict': {
1611 'id': 'b05rcz9v',
1612 'title': 'The Disappearance - Clips - BBC Four',
1613 'description': 'French thriller serial about a missing teenager.',
1614 },
1615 'playlist_mincount': 7,
4f640f28
S
1616 }, {
1617 # multipage playlist, explicit page
1618 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1619 'info_dict': {
1620 'id': 'b00mfl7n',
1621 'title': 'Frozen Planet - Clips - BBC One',
1622 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1623 },
1624 'playlist_mincount': 24,
1625 }, {
1626 # multipage playlist, all pages
1627 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1628 'info_dict': {
1629 'id': 'b00mfl7n',
1630 'title': 'Frozen Planet - Clips - BBC One',
1631 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1632 },
1633 'playlist_mincount': 142,
ded7511a
S
1634 }, {
1635 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1636 'only_matching': True,
1637 }, {
1638 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1639 'only_matching': True,
1640 }, {
1641 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1642 'only_matching': True,
1643 }]
1644
1645 def _extract_title_and_description(self, webpage):
1646 title = self._og_search_title(webpage, fatal=False)
1647 description = self._og_search_description(webpage)
1648 return title, description