]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bbc.py
[extractor] Use classmethod/property where possible
[yt-dlp.git] / yt_dlp / extractor / bbc.py
CommitLineData
f9934b96 1import xml.etree.ElementTree
1418a043 2import functools
254e64a2 3import itertools
1418a043 4import json
f0228f56 5import re
082c6c86 6
f13b1e7d 7from .common import InfoExtractor
3721515b 8from ..compat import (
3721515b 9 compat_HTTPError,
1bdae7d3 10 compat_str,
50e93e03 11 compat_urllib_error,
3721515b 12 compat_urlparse,
13)
8683b4d8 14from ..utils import (
3721515b 15 ExtractorError,
1418a043 16 OnDemandPagedList,
97067db2 17 clean_html,
9fb64c04 18 dict_get,
9afa1770 19 float_or_none,
97067db2 20 get_element_by_class,
8683b4d8 21 int_or_none,
6d155707 22 js_to_json,
9afa1770
S
23 parse_duration,
24 parse_iso8601,
4dfbf869 25 parse_qs,
1bdae7d3 26 strip_or_none,
9fb64c04 27 try_get,
dab062fb 28 unescapeHTML,
1bdae7d3 29 unified_timestamp,
f0228f56 30 url_or_none,
97067db2
S
31 urlencode_postdata,
32 urljoin,
8683b4d8 33)
082c6c86 34
d12a1a47 35
f13b1e7d 36class BBCCoUkIE(InfoExtractor):
082c6c86 37 IE_NAME = 'bbc.co.uk'
2e3fd9ec 38 IE_DESC = 'BBC iPlayer'
50e93e03 39 _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
f20a11ed
S
40 _VALID_URL = r'''(?x)
41 https?://
42 (?:www\.)?bbc\.co\.uk/
43 (?:
44 programmes/(?!articles/)|
45 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
72d256c4 46 music/(?:clips|audiovideo/popular)[/#]|
d3d45e0a 47 radio/player/|
b72305f0 48 sounds/play/|
d3d45e0a 49 events/[^/]+/play/[^/]+/
f20a11ed 50 )
ded7511a 51 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 52 ''' % _ID_REGEX
082c6c86 53
97067db2
S
54 _LOGIN_URL = 'https://account.bbc.com/signin'
55 _NETRC_MACHINE = 'bbc'
56
29f7c58a 57 _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
58 _MEDIA_SETS = [
26ccc68b
S
59 # Provides HQ HLS streams with even better quality that pc mediaset but fails
60 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 61 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
29f7c58a 62 'iptv-all',
63 'pc',
d12a1a47 64 ]
a8b081a0 65
e6174ee9
S
66 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
67
2e3fd9ec
S
68 _TESTS = [
69 {
f2d0fc68 70 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 71 'info_dict': {
f2d0fc68 72 'id': 'b039d07m',
b1ea6802 73 'ext': 'flv',
acc86c9a 74 'title': 'Kaleidoscope, Leonard Cohen',
c4914185 75 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
76 },
77 'params': {
b1ea6802 78 # rtmp download
2e3fd9ec
S
79 'skip_download': True,
80 }
082c6c86 81 },
2e3fd9ec
S
82 {
83 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
84 'info_dict': {
85 'id': 'b00yng1d',
86 'ext': 'flv',
87 'title': 'The Man in Black: Series 3: The Printed Name',
88 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
89 'duration': 1800,
90 },
91 'params': {
92 # rtmp download
93 'skip_download': True,
c7f0177f
S
94 },
95 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
96 },
97 {
98 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
99 'info_dict': {
100 'id': 'b00yng1d',
101 'ext': 'flv',
17968e44 102 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 103 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 104 'duration': 5100,
2e3fd9ec
S
105 },
106 'params': {
107 # rtmp download
108 'skip_download': True,
109 },
b1ea6802 110 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
111 },
112 {
113 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
114 'info_dict': {
115 'id': 'b03k3pb7',
116 'ext': 'flv',
117 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
118 'description': '2. Invasion',
119 'duration': 3600,
120 },
121 'params': {
122 # rtmp download
123 'skip_download': True,
124 },
b1ea6802 125 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
126 }, {
127 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
128 'info_dict': {
129 'id': 'b04v209v',
130 'ext': 'flv',
131 'title': 'Pete Tong, The Essential New Tune Special',
132 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
133 'duration': 10800,
134 },
135 'params': {
136 # rtmp download
137 'skip_download': True,
a3ef0e1c
YCH
138 },
139 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 140 }, {
5aa535c3 141 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
142 'note': 'Audio',
143 'info_dict': {
5aa535c3 144 'id': 'p022h44j',
b1ea6802 145 'ext': 'flv',
5aa535c3
S
146 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
147 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
148 'duration': 227,
c7e67594
S
149 },
150 'params': {
b1ea6802 151 # rtmp download
c7e67594
S
152 'skip_download': True,
153 }
154 }, {
155 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
156 'note': 'Video',
157 'info_dict': {
158 'id': 'p025c103',
b1ea6802 159 'ext': 'flv',
c7e67594
S
160 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
161 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
162 'duration': 226,
163 },
164 'params': {
b1ea6802 165 # rtmp download
c7e67594
S
166 'skip_download': True,
167 }
e68ae99a
S
168 }, {
169 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
170 'info_dict': {
171 'id': 'p02n76xf',
172 'ext': 'flv',
173 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
174 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
175 'duration': 3540,
176 },
177 'params': {
178 # rtmp download
179 'skip_download': True,
180 },
b1ea6802 181 'skip': 'geolocation',
25fa8d66
YCH
182 }, {
183 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
184 'info_dict': {
185 'id': 'b05zmgw1',
186 'ext': 'flv',
187 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
188 'title': 'Royal Academy Summer Exhibition',
189 'duration': 3540,
190 },
191 'params': {
192 # rtmp download
193 'skip_download': True,
194 },
b1ea6802 195 'skip': 'geolocation',
54914380
S
196 }, {
197 # iptv-all mediaset fails with geolocation however there is no geo restriction
198 # for this programme at all
5aa535c3 199 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 200 'info_dict': {
5aa535c3 201 'id': 'b06rkms3',
54914380 202 'ext': 'flv',
5aa535c3
S
203 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
204 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
205 },
206 'params': {
207 # rtmp download
208 'skip_download': True,
209 },
b1ea6802 210 'skip': 'Now it\'s really geo-restricted',
1ac6e794 211 }, {
067aa17e 212 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
1ac6e794
S
213 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
214 'info_dict': {
215 'id': 'p028bfkj',
b1ea6802 216 'ext': 'flv',
1ac6e794
S
217 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
218 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
219 },
220 'params': {
b1ea6802 221 # rtmp download
1ac6e794
S
222 'skip_download': True,
223 },
b72305f0
J
224 }, {
225 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
226 'note': 'Audio',
227 'info_dict': {
228 'id': 'm0007jz9',
229 'ext': 'mp4',
230 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
231 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
232 'duration': 9840,
233 },
234 'params': {
235 # rtmp download
236 'skip_download': True,
237 }
31763975
S
238 }, {
239 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
240 'only_matching': True,
c7e67594
S
241 }, {
242 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
243 'only_matching': True,
0692ef86
S
244 }, {
245 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
246 'only_matching': True,
f20a11ed
S
247 }, {
248 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
249 'only_matching': True,
72d256c4
S
250 }, {
251 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
252 'only_matching': True,
53647dfd
S
253 }, {
254 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
255 'only_matching': True,
6f356cbb
S
256 }, {
257 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
258 'only_matching': True,
259 }, {
260 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
261 'only_matching': True,
72d256c4 262 }]
2e3fd9ec 263
52efa4b3 264 def _perform_login(self, username, password):
97067db2
S
265 login_page = self._download_webpage(
266 self._LOGIN_URL, None, 'Downloading signin page')
267
268 login_form = self._hidden_inputs(login_page)
269
270 login_form.update({
271 'username': username,
272 'password': password,
273 })
274
275 post_url = urljoin(self._LOGIN_URL, self._search_regex(
276 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
277 'post url', default=self._LOGIN_URL, group='url'))
278
279 response, urlh = self._download_webpage_handle(
280 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
281 headers={'Referer': self._LOGIN_URL})
282
283 if self._LOGIN_URL in urlh.geturl():
284 error = clean_html(get_element_by_class('form-message', response))
285 if error:
286 raise ExtractorError(
287 'Unable to login: %s' % error, expected=True)
288 raise ExtractorError('Unable to log in')
289
d12a1a47
S
290 class MediaSelectionError(Exception):
291 def __init__(self, id):
292 self.id = id
293
2e3fd9ec
S
294 def _extract_asx_playlist(self, connection, programme_id):
295 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
296 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
297
2e3fd9ec 298 def _extract_items(self, playlist):
e6174ee9
S
299 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
300
2e3fd9ec 301 def _extract_medias(self, media_selection):
29f7c58a 302 error = media_selection.get('result')
303 if error:
304 raise BBCCoUkIE.MediaSelectionError(error)
305 return media_selection.get('media') or []
2e3fd9ec
S
306
307 def _extract_connections(self, media):
29f7c58a 308 return media.get('connection') or []
2e3fd9ec 309
f13b1e7d 310 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
311 subtitles = {}
312 for connection in self._extract_connections(media):
f0228f56
S
313 cc_url = url_or_none(connection.get('href'))
314 if not cc_url:
315 continue
316 captions = self._download_xml(
317 cc_url, programme_id, 'Downloading captions', fatal=False)
f9934b96 318 if not isinstance(captions, xml.etree.ElementTree.Element):
f0228f56 319 continue
29f7c58a 320 subtitles['en'] = [
f13b1e7d
JMF
321 {
322 'url': connection.get('href'),
323 'ext': 'ttml',
324 },
f13b1e7d 325 ]
29f7c58a 326 break
2e3fd9ec 327 return subtitles
082c6c86 328
d12a1a47
S
329 def _raise_extractor_error(self, media_selection_error):
330 raise ExtractorError(
331 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
332 expected=True)
333
c056efa2 334 def _download_media_selector(self, programme_id):
d12a1a47 335 last_exception = None
29f7c58a 336 for media_set in self._MEDIA_SETS:
d12a1a47
S
337 try:
338 return self._download_media_selector_url(
29f7c58a 339 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
d12a1a47 340 except BBCCoUkIE.MediaSelectionError as e:
d781e293 341 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
342 last_exception = e
343 continue
344 self._raise_extractor_error(e)
345 self._raise_extractor_error(last_exception)
9afa1770
S
346
347 def _download_media_selector_url(self, url, programme_id=None):
29f7c58a 348 media_selection = self._download_json(
349 url, programme_id, 'Downloading media selection JSON',
9283d4ea 350 expected_status=(403, 404))
9afa1770 351 return self._process_media_selector(media_selection, programme_id)
082c6c86 352
9afa1770 353 def _process_media_selector(self, media_selection, programme_id):
082c6c86 354 formats = []
2e3fd9ec 355 subtitles = None
b0af1215 356 urls = []
2e3fd9ec 357
c056efa2
S
358 for media in self._extract_medias(media_selection):
359 kind = media.get('kind')
a7e5f274
RA
360 if kind in ('video', 'audio'):
361 bitrate = int_or_none(media.get('bitrate'))
362 encoding = media.get('encoding')
a7e5f274
RA
363 width = int_or_none(media.get('width'))
364 height = int_or_none(media.get('height'))
365 file_size = int_or_none(media.get('media_file_size'))
366 for connection in self._extract_connections(media):
b0af1215
RA
367 href = connection.get('href')
368 if href in urls:
369 continue
370 if href:
371 urls.append(href)
a7e5f274
RA
372 conn_kind = connection.get('kind')
373 protocol = connection.get('protocol')
374 supplier = connection.get('supplier')
a7e5f274
RA
375 transfer_format = connection.get('transferFormat')
376 format_id = supplier or conn_kind or protocol
a7e5f274
RA
377 # ASX playlist
378 if supplier == 'asx':
379 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
380 formats.append({
381 'url': ref,
382 'format_id': 'ref%s_%s' % (i, format_id),
383 })
384 elif transfer_format == 'dash':
385 formats.extend(self._extract_mpd_formats(
386 href, programme_id, mpd_id=format_id, fatal=False))
387 elif transfer_format == 'hls':
50e93e03 388 # TODO: let expected_status be passed into _extract_xxx_formats() instead
389 try:
390 fmts = self._extract_m3u8_formats(
391 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
392 m3u8_id=format_id, fatal=False)
393 except ExtractorError as e:
394 if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
395 and e.exc_info[1].code in (403, 404)):
396 raise
397 fmts = []
398 formats.extend(fmts)
a7e5f274
RA
399 elif transfer_format == 'hds':
400 formats.extend(self._extract_f4m_formats(
401 href, programme_id, f4m_id=format_id, fatal=False))
402 else:
29f7c58a 403 if not supplier and bitrate:
aaa42cf0 404 format_id += '-%d' % bitrate
a7e5f274
RA
405 fmt = {
406 'format_id': format_id,
407 'filesize': file_size,
408 }
409 if kind == 'video':
410 fmt.update({
411 'width': width,
412 'height': height,
6240925b 413 'tbr': bitrate,
a7e5f274
RA
414 'vcodec': encoding,
415 })
416 else:
417 fmt.update({
418 'abr': bitrate,
419 'acodec': encoding,
420 'vcodec': 'none',
421 })
1af959ef 422 if protocol in ('http', 'https'):
a7e5f274
RA
423 # Direct link
424 fmt.update({
425 'url': href,
426 })
427 elif protocol == 'rtmp':
428 application = connection.get('application', 'ondemand')
429 auth_string = connection.get('authString')
430 identifier = connection.get('identifier')
431 server = connection.get('server')
432 fmt.update({
433 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
434 'play_path': identifier,
435 'app': '%s?%s' % (application, auth_string),
436 'page_url': 'http://www.bbc.co.uk',
437 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
438 'rtmp_live': False,
439 'ext': 'flv',
440 })
964744af
S
441 else:
442 continue
a7e5f274 443 formats.append(fmt)
c056efa2 444 elif kind == 'captions':
f13b1e7d 445 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 446 return formats, subtitles
2e3fd9ec 447
ae6986fb
S
448 def _download_playlist(self, playlist_id):
449 try:
450 playlist = self._download_json(
451 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
452 playlist_id, 'Downloading playlist JSON')
c45b8741 453 formats = []
454 subtitles = {}
ae6986fb 455
c45b8741 456 for version in playlist.get('allAvailableVersions', []):
ae6986fb
S
457 smp_config = version['smpConfig']
458 title = smp_config['title']
459 description = smp_config['summary']
460 for item in smp_config['items']:
461 kind = item['kind']
40fcba5e 462 if kind not in ('programme', 'radioProgramme'):
ae6986fb
S
463 continue
464 programme_id = item.get('vpid')
d97f5cd7 465 duration = int_or_none(item.get('duration'))
c45b8741 466 version_formats, version_subtitles = self._download_media_selector(programme_id)
467 types = version['types']
468 for f in version_formats:
469 f['format_note'] = ', '.join(types)
470 if any('AudioDescribed' in x for x in types):
471 f['language_preference'] = -10
472 formats += version_formats
473 for tag, subformats in (version_subtitles or {}).items():
f304da8a 474 subtitles.setdefault(tag, []).extend(subformats)
c45b8741 475
476 return programme_id, title, description, duration, formats, subtitles
ae6986fb 477 except ExtractorError as ee:
f813928e 478 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
479 raise
480
481 # fallback to legacy playlist
9afa1770
S
482 return self._process_legacy_playlist(playlist_id)
483
484 def _process_legacy_playlist_url(self, url, display_id):
485 playlist = self._download_legacy_playlist_url(url, display_id)
486 return self._extract_from_legacy_playlist(playlist, display_id)
487
488 def _process_legacy_playlist(self, playlist_id):
489 return self._process_legacy_playlist_url(
490 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
491
492 def _download_legacy_playlist_url(self, url, playlist_id=None):
493 return self._download_xml(
494 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 495
9afa1770 496 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 497 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
498 if no_items is not None:
499 reason = no_items.get('reason')
500 if reason == 'preAvailability':
501 msg = 'Episode %s is not yet available' % playlist_id
502 elif reason == 'postAvailability':
503 msg = 'Episode %s is no longer available' % playlist_id
504 elif reason == 'noMedia':
505 msg = 'Episode %s is not currently available' % playlist_id
506 else:
507 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
508 raise ExtractorError(msg, expected=True)
509
510 for item in self._extract_items(playlist):
511 kind = item.get('kind')
40fcba5e 512 if kind not in ('programme', 'radioProgramme'):
ae6986fb 513 continue
e6174ee9
S
514 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
515 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 516 description = description_el.text if description_el is not None else None
9afa1770
S
517
518 def get_programme_id(item):
519 def get_from_attributes(item):
32759325 520 for p in ('identifier', 'group'):
9afa1770
S
521 value = item.get(p)
522 if value and re.match(r'^[pb][\da-z]{7}$', value):
523 return value
524 get_from_attributes(item)
e6174ee9 525 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
526 if mediator is not None:
527 return get_from_attributes(mediator)
528
529 programme_id = get_programme_id(item)
d97f5cd7 530 duration = int_or_none(item.get('duration'))
e6174ee9
S
531
532 if programme_id:
533 formats, subtitles = self._download_media_selector(programme_id)
534 else:
535 formats, subtitles = self._process_media_selector(item, playlist_id)
536 programme_id = playlist_id
ae6986fb
S
537
538 return programme_id, title, description, duration, formats, subtitles
539
c056efa2
S
540 def _real_extract(self, url):
541 group_id = self._match_id(url)
542
543 webpage = self._download_webpage(url, group_id, 'Downloading video page')
544
b2ed954f 545 error = self._search_regex(
29f7c58a 546 r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
b2ed954f
S
547 webpage, 'error', default=None)
548 if error:
549 raise ExtractorError(error, expected=True)
550
8683b4d8 551 programme_id = None
679bacf0 552 duration = None
8683b4d8
S
553
554 tviplayer = self._search_regex(
555 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
556 webpage, 'player', default=None)
557
558 if tviplayer:
559 player = self._parse_json(tviplayer, group_id).get('player', {})
560 duration = int_or_none(player.get('duration'))
561 programme_id = player.get('vpid')
562
563 if not programme_id:
564 programme_id = self._search_regex(
22d7368d 565 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 566
c056efa2 567 if programme_id:
c056efa2 568 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 569 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
570 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
571 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 572 description = self._search_regex(
a8534274
S
573 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
574 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
575 webpage, 'description', default=None)
576 if not description:
577 description = self._html_search_meta('description', webpage)
c056efa2 578 else:
ae6986fb 579 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 580
082c6c86
S
581 self._sort_formats(formats)
582
583 return {
2e3fd9ec 584 'id': programme_id,
082c6c86
S
585 'title': title,
586 'description': description,
650cfd0c 587 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
588 'duration': duration,
589 'formats': formats,
2e3fd9ec 590 'subtitles': subtitles,
5f6a1245 591 }
10273d6e 592
593
9afa1770
S
594class BBCIE(BBCCoUkIE):
595 IE_NAME = 'bbc'
596 IE_DESC = 'BBC'
597 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 598
29f7c58a 599 _MEDIA_SETS = [
29f7c58a 600 'pc',
2d997542 601 'mobile-tablet-main',
d12a1a47 602 ]
10273d6e 603
604 _TESTS = [{
6a747190 605 # article with multiple videos embedded with data-playable containing vpids
10273d6e 606 'url': 'http://www.bbc.com/news/world-europe-32668511',
607 'info_dict': {
608 'id': 'world-europe-32668511',
acc86c9a 609 'title': 'Russia stages massive WW2 parade',
9afa1770 610 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 611 },
612 'playlist_count': 2,
a3bfddfa 613 }, {
6a747190 614 # article with multiple videos embedded with data-playable (more videos)
10273d6e 615 'url': 'http://www.bbc.com/news/business-28299555',
616 'info_dict': {
617 'id': 'business-28299555',
618 'title': 'Farnborough Airshow: Video highlights',
9afa1770 619 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 620 },
621 'playlist_count': 9,
9afa1770 622 'skip': 'Save time',
88ed52ae
S
623 }, {
624 # article with multiple videos embedded with `new SMP()`
6a747190 625 # broken
88ed52ae
S
626 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
627 'info_dict': {
628 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 629 'title': 'BUGGER',
88ed52ae
S
630 },
631 'playlist_count': 18,
a3bfddfa 632 }, {
6a747190 633 # single video embedded with data-playable containing vpid
10273d6e 634 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 635 'info_dict': {
636 'id': 'p02mprgb',
55ebae26 637 'ext': 'mp4',
10273d6e 638 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 639 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 640 'duration': 47,
9afa1770 641 'timestamp': 1427219242,
da92eeae 642 'upload_date': '20150324',
10273d6e 643 },
644 'params': {
9afa1770 645 # rtmp download
10273d6e 646 'skip_download': True,
647 }
a3bfddfa 648 }, {
6a747190
S
649 # article with single video embedded with data-playable containing XML playlist
650 # with direct video links as progressiveDownloadUrl (for now these are extracted)
651 # and playlist with f4m and m3u8 as streamingUrl
de939d89 652 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 653 'info_dict': {
9afa1770 654 'id': '150615_telabyad_kentin_cogu',
de939d89 655 'ext': 'mp4',
ad152e2d 656 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
05087d1b 657 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 658 'timestamp': 1434397334,
da92eeae 659 'upload_date': '20150615',
de939d89 660 },
661 'params': {
662 'skip_download': True,
663 }
c936d8cc 664 }, {
6a747190 665 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 666 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 667 'info_dict': {
9afa1770 668 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 669 'ext': 'mp4',
9afa1770 670 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 671 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 672 'timestamp': 1434713142,
da92eeae 673 'upload_date': '20150619',
de939d89 674 },
675 'params': {
676 'skip_download': True,
677 }
a346b1ff
S
678 }, {
679 # single video from video playlist embedded with vxp-playlist-data JSON
680 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
681 'info_dict': {
682 'id': 'p02w6qjc',
55ebae26 683 'ext': 'mp4',
a346b1ff
S
684 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
685 'duration': 56,
0bc4ee60 686 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
687 },
688 'params': {
689 'skip_download': True,
690 }
9afa1770
S
691 }, {
692 # single video story with digitalData
693 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
694 'info_dict': {
695 'id': 'p02q6gc4',
696 'ext': 'flv',
697 'title': 'Sri Lanka’s spicy secret',
698 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
699 'timestamp': 1437674293,
700 'upload_date': '20150723',
701 },
702 'params': {
703 # rtmp download
704 'skip_download': True,
705 }
706 }, {
707 # single video story without digitalData
708 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
709 'info_dict': {
710 'id': 'p018zqqg',
55ebae26 711 'ext': 'mp4',
9afa1770
S
712 'title': 'Hyundai Santa Fe Sport: Rock star',
713 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
714 'timestamp': 1415867444,
715 'upload_date': '20141113',
9afa1770
S
716 },
717 'params': {
718 # rtmp download
719 'skip_download': True,
720 }
9fb64c04
S
721 }, {
722 # single video embedded with Morph
723 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
724 'info_dict': {
725 'id': 'p041vhd0',
726 'ext': 'mp4',
727 'title': "Nigeria v Japan - Men's First Round",
728 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
729 'duration': 7980,
730 'uploader': 'BBC Sport',
731 'uploader_id': 'bbc_sport',
732 },
733 'params': {
734 # m3u8 download
735 'skip_download': True,
9fb64c04
S
736 },
737 'skip': 'Georestricted to UK',
9afa1770 738 }, {
6a747190 739 # single video with playlist.sxml URL in playlist param
9afa1770
S
740 'url': 'http://www.bbc.com/sport/0/football/33653409',
741 'info_dict': {
742 'id': 'p02xycnp',
55ebae26 743 'ext': 'mp4',
9afa1770 744 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 745 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
746 'duration': 140,
747 },
748 'params': {
749 # rtmp download
750 'skip_download': True,
751 }
b5d48cb1 752 }, {
6a747190 753 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
754 'url': 'http://www.bbc.com/sport/0/football/34475836',
755 'info_dict': {
756 'id': '34475836',
450b233c 757 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 758 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
759 },
760 'playlist_count': 3,
450b233c
S
761 }, {
762 # school report article with single video
763 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
764 'info_dict': {
765 'id': '35744779',
766 'title': 'School which breaks down barriers in Jerusalem',
767 },
768 'playlist_count': 1,
9afa1770
S
769 }, {
770 # single video with playlist URL from weather section
771 'url': 'http://www.bbc.com/weather/features/33601775',
772 'only_matching': True,
773 }, {
774 # custom redirection to www.bbc.com
1bdae7d3 775 # also, video with window.__INITIAL_DATA__
9afa1770 776 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
1bdae7d3 777 'info_dict': {
778 'id': 'p02xzws1',
779 'ext': 'mp4',
780 'title': "Pluto may have 'nitrogen glaciers'",
781 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
782 'thumbnail': r're:https?://.+/.+\.jpg',
783 'timestamp': 1437785037,
784 'upload_date': '20150725',
785 },
50e93e03 786 }, {
787 # video with window.__INITIAL_DATA__ and value as JSON string
788 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
789 'info_dict': {
790 'id': 'p0b71qth',
791 'ext': 'mp4',
792 'title': 'Why France is making this woman a national hero',
793 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
794 'thumbnail': r're:https?://.+/.+\.jpg',
795 'timestamp': 1638230731,
796 'upload_date': '20211130',
797 },
a1cf3e38
S
798 }, {
799 # single video article embedded with data-media-vpid
800 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
801 'only_matching': True,
6d155707 802 }, {
50e93e03 803 # bbcthreeConfig
6d155707
S
804 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
805 'info_dict': {
806 'id': 'p06556y7',
807 'ext': 'mp4',
50e93e03 808 'title': 'Things Not To Say to people that live on council estates',
809 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
810 'duration': 360,
811 'thumbnail': r're:https?://.+/.+\.jpg',
6d155707 812 },
b96b4be4
RA
813 }, {
814 # window.__PRELOADED_STATE__
815 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
816 'info_dict': {
817 'id': 'b0b9z4vz',
818 'ext': 'mp4',
819 'title': 'Prom 6: An American in Paris and Turangalila',
820 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
821 'uploader': 'Radio 3',
822 'uploader_id': 'bbc_radio_three',
823 },
373941c5
S
824 }, {
825 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
826 'info_dict': {
827 'id': 'p06w9tws',
828 'ext': 'mp4',
829 'title': 'md5:2fabf12a726603193a2879a055f72514',
830 'description': 'Learn English words and phrases from this story',
831 },
832 'add_ie': [BBCCoUkIE.ie_key()],
3721515b 833 }, {
834 # BBC Reel
835 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
836 'info_dict': {
837 'id': 'p07c6sb9',
838 'ext': 'mp4',
839 'title': 'How positive thinking is harming your happiness',
840 'alt_title': 'The downsides of positive thinking',
841 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
842 'duration': 235,
843 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
844 'upload_date': '20190604',
845 'categories': ['Psychology'],
846 },
10273d6e 847 }]
848
9afa1770
S
849 @classmethod
850 def suitable(cls, url):
1418a043 851 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
ded7511a
S
852 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
853 else super(BBCIE, cls).suitable(url))
9afa1770
S
854
855 def _extract_from_media_meta(self, media_meta, video_id):
856 # Direct links to media in media metadata (e.g.
857 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
858 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
859 source_files = media_meta.get('sourceFiles')
860 if source_files:
861 return [{
862 'url': f['url'],
863 'format_id': format_id,
864 'ext': f.get('encoding'),
865 'tbr': float_or_none(f.get('bitrate'), 1000),
866 'filesize': int_or_none(f.get('filesize')),
867 } for format_id, f in source_files.items() if f.get('url')], []
868
869 programme_id = media_meta.get('externalId')
870 if programme_id:
871 return self._download_media_selector(programme_id)
872
873 # Process playlist.sxml as legacy playlist
874 href = media_meta.get('href')
875 if href:
876 playlist = self._download_legacy_playlist_url(href)
877 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
878 return formats, subtitles
879
880 return [], []
881
baf39a1a
S
882 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
883 programme_id, title, description, duration, formats, subtitles = \
884 self._process_legacy_playlist_url(url, playlist_id)
885 self._sort_formats(formats)
886 return {
887 'id': programme_id,
888 'title': title,
889 'description': description,
890 'duration': duration,
891 'timestamp': timestamp,
892 'formats': formats,
893 'subtitles': subtitles,
894 }
895
10273d6e 896 def _real_extract(self, url):
9afa1770
S
897 playlist_id = self._match_id(url)
898
899 webpage = self._download_webpage(url, playlist_id)
900
522f6c06 901 json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
350e02d4 902 timestamp = json_ld_info.get('timestamp')
0e832c2c 903
350e02d4 904 playlist_title = json_ld_info.get('title')
0e832c2c 905 if not playlist_title:
04f3fd2c 906 playlist_title = (self._og_search_title(webpage, default=None)
907 or self._html_extract_title(webpage, 'playlist title', default=None))
0e832c2c
S
908 if playlist_title:
909 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
910
911 playlist_description = json_ld_info.get(
912 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
913
914 if not timestamp:
915 timestamp = parse_iso8601(self._search_regex(
916 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
917 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 918 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 919 webpage, 'date', default=None))
9afa1770 920
78f9d843
S
921 entries = []
922
de665713
S
923 # article with multiple videos embedded with playlist.sxml (e.g.
924 # http://www.bbc.com/sport/0/football/34475836)
925 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 926 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 927 if playlists:
baf39a1a
S
928 entries = [
929 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
930 for playlist_url in playlists]
de939d89 931
78f9d843
S
932 # news article with multiple videos embedded with data-playable
933 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
934 if data_playables:
935 for _, data_playable_json in data_playables:
936 data_playable = self._parse_json(
937 unescapeHTML(data_playable_json), playlist_id, fatal=False)
938 if not data_playable:
939 continue
baf39a1a
S
940 settings = data_playable.get('settings', {})
941 if settings:
78f9d843
S
942 # data-playable with video vpid in settings.playlistObject.items (e.g.
943 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
944 playlist_object = settings.get('playlistObject', {})
945 if playlist_object:
946 items = playlist_object.get('items')
947 if items and isinstance(items, list):
78f9d843
S
948 title = playlist_object['title']
949 description = playlist_object.get('summary')
baf39a1a
S
950 duration = int_or_none(items[0].get('duration'))
951 programme_id = items[0].get('vpid')
78f9d843
S
952 formats, subtitles = self._download_media_selector(programme_id)
953 self._sort_formats(formats)
954 entries.append({
955 'id': programme_id,
956 'title': title,
957 'description': description,
958 'timestamp': timestamp,
959 'duration': duration,
960 'formats': formats,
961 'subtitles': subtitles,
962 })
963 else:
964 # data-playable without vpid but with a playlist.sxml URLs
965 # in otherSettings.playlist (e.g.
966 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
967 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
968 if playlist:
a7e5f274
RA
969 entry = None
970 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
971 playlist_url = playlist.get('%sUrl' % key)
972 if not playlist_url:
973 continue
974 try:
a7e5f274
RA
975 info = self._extract_from_playlist_sxml(
976 playlist_url, playlist_id, timestamp)
977 if not entry:
978 entry = info
979 else:
980 entry['title'] = info['title']
981 entry['formats'].extend(info['formats'])
3721515b 982 except ExtractorError as e:
05087d1b
S
983 # Some playlist URL may fail with 500, at the same time
984 # the other one may work fine (e.g.
985 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
986 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
987 continue
988 raise
a7e5f274
RA
989 if entry:
990 self._sort_formats(entry['formats'])
991 entries.append(entry)
78f9d843
S
992
993 if entries:
78f9d843
S
994 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
995
373941c5
S
996 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
997 group_id = self._search_regex(
998 r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
999 webpage, 'group id', default=None)
38d70284 1000 if group_id:
373941c5
S
1001 return self.url_result(
1002 'https://www.bbc.co.uk/programmes/%s' % group_id,
1003 ie=BBCCoUkIE.ie_key())
1004
78f9d843
S
1005 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1006 programme_id = self._search_regex(
a1cf3e38 1007 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
1008 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
1009 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 1010 webpage, 'vpid', default=None)
dab062fb 1011
9afa1770
S
1012 if programme_id:
1013 formats, subtitles = self._download_media_selector(programme_id)
1014 self._sort_formats(formats)
1015 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1016 digital_data = self._parse_json(
1017 self._search_regex(
1018 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1019 programme_id, fatal=False)
1020 page_info = digital_data.get('page', {}).get('pageInfo', {})
1021 title = page_info.get('pageName') or self._og_search_title(webpage)
1022 description = page_info.get('description') or self._og_search_description(webpage)
1023 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1024 return {
1025 'id': programme_id,
1026 'title': title,
1027 'description': description,
1028 'timestamp': timestamp,
1029 'formats': formats,
1030 'subtitles': subtitles,
1031 }
a3bfddfa 1032
3721515b 1033 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1034 initial_data = self._parse_json(self._html_search_regex(
1035 r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1036 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1037 if initial_data:
1038 init_data = try_get(
1039 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1040 smp_data = init_data.get('smpData') or {}
1041 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1042 version_id = clip_data.get('versionID')
1043 if version_id:
1044 title = smp_data['title']
1045 formats, subtitles = self._download_media_selector(version_id)
1046 self._sort_formats(formats)
1047 image_url = smp_data.get('holdingImageURL')
1048 display_date = init_data.get('displayDate')
1049 topic_title = init_data.get('topicTitle')
1050
1051 return {
1052 'id': version_id,
1053 'title': title,
1054 'formats': formats,
1055 'alt_title': init_data.get('shortTitle'),
1056 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1057 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1058 'upload_date': display_date.replace('-', '') if display_date else None,
1059 'subtitles': subtitles,
1060 'duration': int_or_none(clip_data.get('duration')),
1061 'categories': [topic_title] if topic_title else None,
1062 }
1063
9fb64c04
S
1064 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1065 # There are several setPayload calls may be present but the video
1066 # seems to be always related to the first one
1067 morph_payload = self._parse_json(
1068 self._search_regex(
1069 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1070 webpage, 'morph payload', default='{}'),
1071 playlist_id, fatal=False)
1072 if morph_payload:
1073 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1074 for component in components:
1075 if not isinstance(component, dict):
1076 continue
1077 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1078 if not lead_media:
1079 continue
1080 identifiers = lead_media.get('identifiers')
1081 if not identifiers or not isinstance(identifiers, dict):
1082 continue
1083 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1084 if not programme_id:
1085 continue
1086 title = lead_media.get('title') or self._og_search_title(webpage)
1087 formats, subtitles = self._download_media_selector(programme_id)
1088 self._sort_formats(formats)
1089 description = lead_media.get('summary')
1090 uploader = lead_media.get('masterBrand')
1091 uploader_id = lead_media.get('mid')
1092 duration = None
1093 duration_d = lead_media.get('duration')
1094 if isinstance(duration_d, dict):
1095 duration = parse_duration(dict_get(
1096 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1097 return {
1098 'id': programme_id,
1099 'title': title,
1100 'description': description,
1101 'duration': duration,
1102 'uploader': uploader,
1103 'uploader_id': uploader_id,
1104 'formats': formats,
1105 'subtitles': subtitles,
1106 }
1107
b96b4be4
RA
1108 preload_state = self._parse_json(self._search_regex(
1109 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1110 'preload state', default='{}'), playlist_id, fatal=False)
1111 if preload_state:
1112 current_programme = preload_state.get('programmes', {}).get('current') or {}
1113 programme_id = current_programme.get('id')
1114 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1115 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1116 formats, subtitles = self._download_media_selector(programme_id)
1117 self._sort_formats(formats)
1118 synopses = current_programme.get('synopses') or {}
1119 network = current_programme.get('network') or {}
1120 duration = int_or_none(
1121 current_programme.get('duration', {}).get('value'))
1122 thumbnail = None
1123 image_url = current_programme.get('image_url')
1124 if image_url:
3721515b 1125 thumbnail = image_url.replace('{recipe}', 'raw')
b96b4be4
RA
1126 return {
1127 'id': programme_id,
1128 'title': title,
1129 'description': dict_get(synopses, ('long', 'medium', 'short')),
1130 'thumbnail': thumbnail,
1131 'duration': duration,
1132 'uploader': network.get('short_title'),
1133 'uploader_id': network.get('id'),
1134 'formats': formats,
1135 'subtitles': subtitles,
1136 }
1137
6d155707
S
1138 bbc3_config = self._parse_json(
1139 self._search_regex(
1140 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1141 'bbcthree config', default='{}'),
38d70284 1142 playlist_id, transform_source=js_to_json, fatal=False) or {}
1143 payload = bbc3_config.get('payload') or {}
1144 if payload:
1145 clip = payload.get('currentClip') or {}
1146 clip_vpid = clip.get('vpid')
1147 clip_title = clip.get('title')
1148 if clip_vpid and clip_title:
1149 formats, subtitles = self._download_media_selector(clip_vpid)
1150 self._sort_formats(formats)
1151 return {
1152 'id': clip_vpid,
1153 'title': clip_title,
1154 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1155 'description': clip.get('description'),
1156 'duration': parse_duration(clip.get('duration')),
1157 'formats': formats,
1158 'subtitles': subtitles,
1159 }
6d155707 1160 bbc3_playlist = try_get(
38d70284 1161 payload, lambda x: x['content']['bbcMedia']['playlist'],
6d155707
S
1162 dict)
1163 if bbc3_playlist:
1164 playlist_title = bbc3_playlist.get('title') or playlist_title
1165 thumbnail = bbc3_playlist.get('holdingImageURL')
1166 entries = []
1167 for bbc3_item in bbc3_playlist['items']:
1168 programme_id = bbc3_item.get('versionID')
1169 if not programme_id:
1170 continue
1171 formats, subtitles = self._download_media_selector(programme_id)
1172 self._sort_formats(formats)
1173 entries.append({
1174 'id': programme_id,
1175 'title': playlist_title,
1176 'thumbnail': thumbnail,
1177 'timestamp': timestamp,
1178 'formats': formats,
1179 'subtitles': subtitles,
1180 })
1181 return self.playlist_result(
1182 entries, playlist_id, playlist_title, playlist_description)
1183
50e93e03 1184 initial_data = self._search_regex(
1185 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1186 'quoted preload state', default=None)
1187 if initial_data is None:
1188 initial_data = self._search_regex(
1189 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1190 'preload state', default={})
1191 else:
1192 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1193 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
38d70284 1194 if initial_data:
1195 def parse_media(media):
1196 if not media:
1197 return
1198 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1199 item_id = item.get('id')
1200 item_title = item.get('title')
1201 if not (item_id and item_title):
1202 continue
1203 formats, subtitles = self._download_media_selector(item_id)
1204 self._sort_formats(formats)
1bdae7d3 1205 item_desc = None
1206 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1207 if blocks:
1208 summary = []
1209 for block in blocks:
1210 text = try_get(block, lambda x: x['model']['text'], compat_str)
1211 if text:
1212 summary.append(text)
1213 if summary:
1214 item_desc = '\n\n'.join(summary)
1215 item_time = None
1216 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1217 if try_get(meta, lambda x: x['label']) == 'Published':
1218 item_time = unified_timestamp(meta.get('timestamp'))
1219 break
38d70284 1220 entries.append({
1221 'id': item_id,
1222 'title': item_title,
1223 'thumbnail': item.get('holdingImageUrl'),
1224 'formats': formats,
1225 'subtitles': subtitles,
1bdae7d3 1226 'timestamp': item_time,
1227 'description': strip_or_none(item_desc),
38d70284 1228 })
1229 for resp in (initial_data.get('data') or {}).values():
1230 name = resp.get('name')
1231 if name == 'media-experience':
1232 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1233 elif name == 'article':
50e93e03 1234 for block in (try_get(resp,
1235 (lambda x: x['data']['blocks'],
1236 lambda x: x['data']['content']['model']['blocks'],),
1237 list) or []):
38d70284 1238 if block.get('type') != 'media':
1239 continue
1240 parse_media(block.get('model'))
1241 return self.playlist_result(
1242 entries, playlist_id, playlist_title, playlist_description)
1243
88ed52ae
S
1244 def extract_all(pattern):
1245 return list(filter(None, map(
1246 lambda s: self._parse_json(s, playlist_id, fatal=False),
1247 re.findall(pattern, webpage))))
1248
1249 # Multiple video article (e.g.
1250 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 1251 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
1252 entries = []
1253 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1254 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1255 if embed_url and re.match(EMBED_URL, embed_url):
1256 entries.append(embed_url)
1257 entries.extend(re.findall(
1258 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
1259 if entries:
1260 return self.playlist_result(
aaa42cf0 1261 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
88ed52ae 1262 playlist_id, playlist_title, playlist_description)
9afa1770
S
1263
1264 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 1265 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
1266
1267 if not medias:
1268 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
1269 media_asset = self._search_regex(
1270 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1271 webpage, 'media asset', default=None)
1272 if media_asset:
1273 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1274 medias = []
1275 for video in media_asset_page.get('videos', {}).values():
1276 medias.extend(video.values())
1277
1278 if not medias:
1279 # Multiple video playlist with single `now playing` entry (e.g.
1280 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1281 vxp_playlist = self._parse_json(
9afa1770 1282 self._search_regex(
a346b1ff
S
1283 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1284 webpage, 'playlist data'),
9afa1770 1285 playlist_id)
a346b1ff
S
1286 playlist_medias = []
1287 for item in vxp_playlist:
1288 media = item.get('media')
1289 if not media:
1290 continue
1291 playlist_medias.append(media)
1292 # Download single video if found media with asset id matching the video id from URL
1293 if item.get('advert', {}).get('assetId') == playlist_id:
1294 medias = [media]
1295 break
1296 # Fallback to the whole playlist
1297 if not medias:
1298 medias = playlist_medias
9afa1770
S
1299
1300 entries = []
1301 for num, media_meta in enumerate(medias, start=1):
1302 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
a06916d9 1303 if not formats and not self.get_param('ignore_no_formats'):
9afa1770 1304 continue
10273d6e 1305 self._sort_formats(formats)
1306
9afa1770
S
1307 video_id = media_meta.get('externalId')
1308 if not video_id:
1309 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1310
1311 title = media_meta.get('caption')
1312 if not title:
1313 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1314
1315 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 1316
9afa1770
S
1317 images = []
1318 for image in media_meta.get('images', {}).values():
1319 images.extend(image.values())
1320 if 'image' in media_meta:
1321 images.append(media_meta['image'])
1322
1323 thumbnails = [{
1324 'url': image.get('href'),
1325 'width': int_or_none(image.get('width')),
1326 'height': int_or_none(image.get('height')),
1327 } for image in images]
1328
1329 entries.append({
1330 'id': video_id,
10273d6e 1331 'title': title,
9afa1770 1332 'thumbnails': thumbnails,
10273d6e 1333 'duration': duration,
9afa1770 1334 'timestamp': timestamp,
10273d6e 1335 'formats': formats,
1336 'subtitles': subtitles,
a3bfddfa 1337 })
10273d6e 1338
9afa1770 1339 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
1340
1341
1342class BBCCoUkArticleIE(InfoExtractor):
92519402 1343 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
1344 IE_NAME = 'bbc.co.uk:article'
1345 IE_DESC = 'BBC articles'
1346
1347 _TEST = {
1348 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1349 'info_dict': {
1350 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1351 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1352 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1353 },
1354 'playlist_count': 4,
1355 'add_ie': ['BBCCoUk'],
1356 }
1357
1358 def _real_extract(self, url):
1359 playlist_id = self._match_id(url)
1360
1361 webpage = self._download_webpage(url, playlist_id)
1362
1363 title = self._og_search_title(webpage)
1364 description = self._og_search_description(webpage).strip()
1365
1366 entries = [self.url_result(programme_url) for programme_url in re.findall(
1367 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1368
1369 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
1370
1371
1372class BBCCoUkPlaylistBaseIE(InfoExtractor):
254e64a2
S
1373 def _entries(self, webpage, url, playlist_id):
1374 single_page = 'page' in compat_urlparse.parse_qs(
1375 compat_urlparse.urlparse(url).query)
1376 for page_num in itertools.count(2):
1377 for video_id in re.findall(
1378 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1379 yield self.url_result(
1380 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1381 if single_page:
1382 return
1383 next_page = self._search_regex(
1384 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1385 webpage, 'next page url', default=None, group='url')
1386 if not next_page:
1387 break
1388 webpage = self._download_webpage(
1389 compat_urlparse.urljoin(url, next_page), playlist_id,
1390 'Downloading page %d' % page_num, page_num)
1391
ded7511a
S
1392 def _real_extract(self, url):
1393 playlist_id = self._match_id(url)
1394
1395 webpage = self._download_webpage(url, playlist_id)
1396
ded7511a
S
1397 title, description = self._extract_title_and_description(webpage)
1398
254e64a2
S
1399 return self.playlist_result(
1400 self._entries(webpage, url, playlist_id),
1401 playlist_id, title, description)
ded7511a
S
1402
1403
1418a043 1404class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1405 _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1406
1407 @staticmethod
1408 def _get_default(episode, key, default_key='default'):
1409 return try_get(episode, lambda x: x[key][default_key])
1410
1411 def _get_description(self, data):
1412 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1413 return dict_get(synopsis, ('large', 'medium', 'small'))
1414
1415 def _fetch_page(self, programme_id, per_page, series_id, page):
1416 elements = self._get_elements(self._call_api(
1417 programme_id, per_page, page + 1, series_id))
1418 for element in elements:
1419 episode = self._get_episode(element)
1420 episode_id = episode.get('id')
1421 if not episode_id:
1422 continue
1423 thumbnail = None
1424 image = self._get_episode_image(episode)
1425 if image:
1426 thumbnail = image.replace('{recipe}', 'raw')
1427 category = self._get_default(episode, 'labels', 'category')
1428 yield {
1429 '_type': 'url',
1430 'id': episode_id,
1431 'title': self._get_episode_field(episode, 'subtitle'),
1432 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1433 'thumbnail': thumbnail,
1434 'description': self._get_description(episode),
1435 'categories': [category] if category else None,
1436 'series': self._get_episode_field(episode, 'title'),
1437 'ie_key': BBCCoUkIE.ie_key(),
1438 }
1439
1440 def _real_extract(self, url):
1441 pid = self._match_id(url)
4dfbf869 1442 qs = parse_qs(url)
1418a043 1443 series_id = qs.get('seriesId', [None])[0]
1444 page = qs.get('page', [None])[0]
1445 per_page = 36 if page else self._PAGE_SIZE
1446 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1447 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1448 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1449 return self.playlist_result(
1450 entries, pid, self._get_playlist_title(playlist_data),
1451 self._get_description(playlist_data))
1452
1453
1454class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1455 IE_NAME = 'bbc.co.uk:iplayer:episodes'
1456 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
9158af16 1457 _TESTS = [{
ded7511a
S
1458 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1459 'info_dict': {
1460 'id': 'b05rcz9v',
1461 'title': 'The Disappearance',
1418a043 1462 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
ded7511a 1463 },
1418a043 1464 'playlist_mincount': 8,
9158af16 1465 }, {
1418a043 1466 # all seasons
1467 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1468 'info_dict': {
1469 'id': 'b094m5t9',
1470 'title': 'Doctor Foster',
1471 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1472 },
1473 'playlist_mincount': 10,
1474 }, {
1475 # explicit season
1476 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1477 'info_dict': {
1478 'id': 'b094m5t9',
1479 'title': 'Doctor Foster',
1480 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1481 },
1482 'playlist_mincount': 5,
1483 }, {
1484 # all pages
1485 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1486 'info_dict': {
1487 'id': 'm0004c4v',
1488 'title': 'Beechgrove',
1489 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1490 },
1491 'playlist_mincount': 37,
1492 }, {
1493 # explicit page
1494 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1495 'info_dict': {
1496 'id': 'm0004c4v',
1497 'title': 'Beechgrove',
1498 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1499 },
1500 'playlist_mincount': 1,
1501 }]
1502 _PAGE_SIZE = 100
1503 _DESCRIPTION_KEY = 'synopsis'
1504
1505 def _get_episode_image(self, episode):
1506 return self._get_default(episode, 'image')
1507
1508 def _get_episode_field(self, episode, field):
1509 return self._get_default(episode, field)
1510
1511 @staticmethod
1512 def _get_elements(data):
1513 return data['entities']['results']
1514
1515 @staticmethod
1516 def _get_episode(element):
1517 return element.get('episode') or {}
1518
1519 def _call_api(self, pid, per_page, page=1, series_id=None):
1520 variables = {
1521 'id': pid,
1522 'page': page,
1523 'perPage': per_page,
1524 }
1525 if series_id:
1526 variables['sliceId'] = series_id
1527 return self._download_json(
1528 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1529 'Content-Type': 'application/json'
1530 }, data=json.dumps({
1531 'id': '5692d93d5aac8d796a0305e895e61551',
1532 'variables': variables,
1533 }).encode('utf-8'))['data']['programme']
1534
1535 @staticmethod
1536 def _get_playlist_data(data):
1537 return data
1538
1539 def _get_playlist_title(self, data):
1540 return self._get_default(data, 'title')
1541
1542
1543class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1544 IE_NAME = 'bbc.co.uk:iplayer:group'
1545 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1546 _TESTS = [{
9158af16
S
1547 # Available for over a year unlike 30 days for most other programmes
1548 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1549 'info_dict': {
1550 'id': 'p02tcc32',
1551 'title': 'Bohemian Icons',
1552 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1553 },
1554 'playlist_mincount': 10,
1418a043 1555 }, {
1556 # all pages
1557 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1558 'info_dict': {
1559 'id': 'p081d7j7',
1560 'title': 'Music in Scotland',
1561 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1562 },
1563 'playlist_mincount': 47,
1564 }, {
1565 # explicit page
1566 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1567 'info_dict': {
1568 'id': 'p081d7j7',
1569 'title': 'Music in Scotland',
1570 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1571 },
1572 'playlist_mincount': 11,
9158af16 1573 }]
1418a043 1574 _PAGE_SIZE = 200
1575 _DESCRIPTION_KEY = 'synopses'
1576
1577 def _get_episode_image(self, episode):
1578 return self._get_default(episode, 'images', 'standard')
1579
1580 def _get_episode_field(self, episode, field):
1581 return episode.get(field)
1582
1583 @staticmethod
1584 def _get_elements(data):
1585 return data['elements']
1586
1587 @staticmethod
1588 def _get_episode(element):
1589 return element
1590
1591 def _call_api(self, pid, per_page, page=1, series_id=None):
1592 return self._download_json(
1593 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
1594 pid, query={
1595 'page': page,
1596 'per_page': per_page,
1597 })['group_episodes']
1598
1599 @staticmethod
1600 def _get_playlist_data(data):
1601 return data['group']
ded7511a 1602
1418a043 1603 def _get_playlist_title(self, data):
1604 return data.get('title')
ded7511a
S
1605
1606
1607class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1608 IE_NAME = 'bbc.co.uk:playlist'
1609 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1610 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1611 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1612 _TESTS = [{
1613 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1614 'info_dict': {
1615 'id': 'b05rcz9v',
1616 'title': 'The Disappearance - Clips - BBC Four',
1617 'description': 'French thriller serial about a missing teenager.',
1618 },
1619 'playlist_mincount': 7,
4f640f28
S
1620 }, {
1621 # multipage playlist, explicit page
1622 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1623 'info_dict': {
1624 'id': 'b00mfl7n',
1625 'title': 'Frozen Planet - Clips - BBC One',
1626 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1627 },
1628 'playlist_mincount': 24,
1629 }, {
1630 # multipage playlist, all pages
1631 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1632 'info_dict': {
1633 'id': 'b00mfl7n',
1634 'title': 'Frozen Planet - Clips - BBC One',
1635 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1636 },
1637 'playlist_mincount': 142,
ded7511a
S
1638 }, {
1639 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1640 'only_matching': True,
1641 }, {
1642 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1643 'only_matching': True,
1644 }, {
1645 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1646 'only_matching': True,
1647 }]
1648
1649 def _extract_title_and_description(self, webpage):
1650 title = self._og_search_title(webpage, fatal=False)
1651 description = self._og_search_description(webpage)
1652 return title, description