]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bbc.py
[extractor] Add `_perform_login` function (#2943)
[yt-dlp.git] / yt_dlp / extractor / bbc.py
CommitLineData
9afa1770 1# coding: utf-8
082c6c86
S
2from __future__ import unicode_literals
3
1418a043 4import functools
254e64a2 5import itertools
1418a043 6import json
f0228f56 7import re
082c6c86 8
f13b1e7d 9from .common import InfoExtractor
3721515b 10from ..compat import (
11 compat_etree_Element,
12 compat_HTTPError,
1bdae7d3 13 compat_str,
50e93e03 14 compat_urllib_error,
3721515b 15 compat_urlparse,
16)
8683b4d8 17from ..utils import (
3721515b 18 ExtractorError,
1418a043 19 OnDemandPagedList,
97067db2 20 clean_html,
9fb64c04 21 dict_get,
9afa1770 22 float_or_none,
97067db2 23 get_element_by_class,
8683b4d8 24 int_or_none,
6d155707 25 js_to_json,
9afa1770
S
26 parse_duration,
27 parse_iso8601,
4dfbf869 28 parse_qs,
1bdae7d3 29 strip_or_none,
9fb64c04 30 try_get,
dab062fb 31 unescapeHTML,
1bdae7d3 32 unified_timestamp,
f0228f56 33 url_or_none,
97067db2
S
34 urlencode_postdata,
35 urljoin,
8683b4d8 36)
082c6c86 37
d12a1a47 38
f13b1e7d 39class BBCCoUkIE(InfoExtractor):
082c6c86 40 IE_NAME = 'bbc.co.uk'
2e3fd9ec 41 IE_DESC = 'BBC iPlayer'
50e93e03 42 _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
f20a11ed
S
43 _VALID_URL = r'''(?x)
44 https?://
45 (?:www\.)?bbc\.co\.uk/
46 (?:
47 programmes/(?!articles/)|
48 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
72d256c4 49 music/(?:clips|audiovideo/popular)[/#]|
d3d45e0a 50 radio/player/|
b72305f0 51 sounds/play/|
d3d45e0a 52 events/[^/]+/play/[^/]+/
f20a11ed 53 )
ded7511a 54 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 55 ''' % _ID_REGEX
082c6c86 56
97067db2
S
57 _LOGIN_URL = 'https://account.bbc.com/signin'
58 _NETRC_MACHINE = 'bbc'
59
29f7c58a 60 _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
61 _MEDIA_SETS = [
26ccc68b
S
62 # Provides HQ HLS streams with even better quality that pc mediaset but fails
63 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 64 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
29f7c58a 65 'iptv-all',
66 'pc',
d12a1a47 67 ]
a8b081a0 68
e6174ee9
S
69 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
70
2e3fd9ec
S
71 _TESTS = [
72 {
f2d0fc68 73 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 74 'info_dict': {
f2d0fc68 75 'id': 'b039d07m',
b1ea6802 76 'ext': 'flv',
acc86c9a 77 'title': 'Kaleidoscope, Leonard Cohen',
c4914185 78 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
79 },
80 'params': {
b1ea6802 81 # rtmp download
2e3fd9ec
S
82 'skip_download': True,
83 }
082c6c86 84 },
2e3fd9ec
S
85 {
86 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
87 'info_dict': {
88 'id': 'b00yng1d',
89 'ext': 'flv',
90 'title': 'The Man in Black: Series 3: The Printed Name',
91 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
92 'duration': 1800,
93 },
94 'params': {
95 # rtmp download
96 'skip_download': True,
c7f0177f
S
97 },
98 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
99 },
100 {
101 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
102 'info_dict': {
103 'id': 'b00yng1d',
104 'ext': 'flv',
17968e44 105 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 106 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 107 'duration': 5100,
2e3fd9ec
S
108 },
109 'params': {
110 # rtmp download
111 'skip_download': True,
112 },
b1ea6802 113 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
114 },
115 {
116 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
117 'info_dict': {
118 'id': 'b03k3pb7',
119 'ext': 'flv',
120 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
121 'description': '2. Invasion',
122 'duration': 3600,
123 },
124 'params': {
125 # rtmp download
126 'skip_download': True,
127 },
b1ea6802 128 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
129 }, {
130 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
131 'info_dict': {
132 'id': 'b04v209v',
133 'ext': 'flv',
134 'title': 'Pete Tong, The Essential New Tune Special',
135 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
136 'duration': 10800,
137 },
138 'params': {
139 # rtmp download
140 'skip_download': True,
a3ef0e1c
YCH
141 },
142 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 143 }, {
5aa535c3 144 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
145 'note': 'Audio',
146 'info_dict': {
5aa535c3 147 'id': 'p022h44j',
b1ea6802 148 'ext': 'flv',
5aa535c3
S
149 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
150 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
151 'duration': 227,
c7e67594
S
152 },
153 'params': {
b1ea6802 154 # rtmp download
c7e67594
S
155 'skip_download': True,
156 }
157 }, {
158 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
159 'note': 'Video',
160 'info_dict': {
161 'id': 'p025c103',
b1ea6802 162 'ext': 'flv',
c7e67594
S
163 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
164 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
165 'duration': 226,
166 },
167 'params': {
b1ea6802 168 # rtmp download
c7e67594
S
169 'skip_download': True,
170 }
e68ae99a
S
171 }, {
172 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
173 'info_dict': {
174 'id': 'p02n76xf',
175 'ext': 'flv',
176 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
177 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
178 'duration': 3540,
179 },
180 'params': {
181 # rtmp download
182 'skip_download': True,
183 },
b1ea6802 184 'skip': 'geolocation',
25fa8d66
YCH
185 }, {
186 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
187 'info_dict': {
188 'id': 'b05zmgw1',
189 'ext': 'flv',
190 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
191 'title': 'Royal Academy Summer Exhibition',
192 'duration': 3540,
193 },
194 'params': {
195 # rtmp download
196 'skip_download': True,
197 },
b1ea6802 198 'skip': 'geolocation',
54914380
S
199 }, {
200 # iptv-all mediaset fails with geolocation however there is no geo restriction
201 # for this programme at all
5aa535c3 202 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 203 'info_dict': {
5aa535c3 204 'id': 'b06rkms3',
54914380 205 'ext': 'flv',
5aa535c3
S
206 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
207 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
208 },
209 'params': {
210 # rtmp download
211 'skip_download': True,
212 },
b1ea6802 213 'skip': 'Now it\'s really geo-restricted',
1ac6e794 214 }, {
067aa17e 215 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
1ac6e794
S
216 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
217 'info_dict': {
218 'id': 'p028bfkj',
b1ea6802 219 'ext': 'flv',
1ac6e794
S
220 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
221 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
222 },
223 'params': {
b1ea6802 224 # rtmp download
1ac6e794
S
225 'skip_download': True,
226 },
b72305f0
J
227 }, {
228 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
229 'note': 'Audio',
230 'info_dict': {
231 'id': 'm0007jz9',
232 'ext': 'mp4',
233 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
234 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
235 'duration': 9840,
236 },
237 'params': {
238 # rtmp download
239 'skip_download': True,
240 }
31763975
S
241 }, {
242 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
243 'only_matching': True,
c7e67594
S
244 }, {
245 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
246 'only_matching': True,
0692ef86
S
247 }, {
248 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
249 'only_matching': True,
f20a11ed
S
250 }, {
251 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
252 'only_matching': True,
72d256c4
S
253 }, {
254 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
255 'only_matching': True,
53647dfd
S
256 }, {
257 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
258 'only_matching': True,
6f356cbb
S
259 }, {
260 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
261 'only_matching': True,
262 }, {
263 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
264 'only_matching': True,
72d256c4 265 }]
2e3fd9ec 266
52efa4b3 267 def _perform_login(self, username, password):
97067db2
S
268 login_page = self._download_webpage(
269 self._LOGIN_URL, None, 'Downloading signin page')
270
271 login_form = self._hidden_inputs(login_page)
272
273 login_form.update({
274 'username': username,
275 'password': password,
276 })
277
278 post_url = urljoin(self._LOGIN_URL, self._search_regex(
279 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
280 'post url', default=self._LOGIN_URL, group='url'))
281
282 response, urlh = self._download_webpage_handle(
283 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
284 headers={'Referer': self._LOGIN_URL})
285
286 if self._LOGIN_URL in urlh.geturl():
287 error = clean_html(get_element_by_class('form-message', response))
288 if error:
289 raise ExtractorError(
290 'Unable to login: %s' % error, expected=True)
291 raise ExtractorError('Unable to log in')
292
d12a1a47
S
293 class MediaSelectionError(Exception):
294 def __init__(self, id):
295 self.id = id
296
2e3fd9ec
S
297 def _extract_asx_playlist(self, connection, programme_id):
298 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
299 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
300
2e3fd9ec 301 def _extract_items(self, playlist):
e6174ee9
S
302 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
303
2e3fd9ec 304 def _extract_medias(self, media_selection):
29f7c58a 305 error = media_selection.get('result')
306 if error:
307 raise BBCCoUkIE.MediaSelectionError(error)
308 return media_selection.get('media') or []
2e3fd9ec
S
309
310 def _extract_connections(self, media):
29f7c58a 311 return media.get('connection') or []
2e3fd9ec 312
f13b1e7d 313 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
314 subtitles = {}
315 for connection in self._extract_connections(media):
f0228f56
S
316 cc_url = url_or_none(connection.get('href'))
317 if not cc_url:
318 continue
319 captions = self._download_xml(
320 cc_url, programme_id, 'Downloading captions', fatal=False)
ee0ba927 321 if not isinstance(captions, compat_etree_Element):
f0228f56 322 continue
29f7c58a 323 subtitles['en'] = [
f13b1e7d
JMF
324 {
325 'url': connection.get('href'),
326 'ext': 'ttml',
327 },
f13b1e7d 328 ]
29f7c58a 329 break
2e3fd9ec 330 return subtitles
082c6c86 331
d12a1a47
S
332 def _raise_extractor_error(self, media_selection_error):
333 raise ExtractorError(
334 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
335 expected=True)
336
c056efa2 337 def _download_media_selector(self, programme_id):
d12a1a47 338 last_exception = None
29f7c58a 339 for media_set in self._MEDIA_SETS:
d12a1a47
S
340 try:
341 return self._download_media_selector_url(
29f7c58a 342 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
d12a1a47 343 except BBCCoUkIE.MediaSelectionError as e:
d781e293 344 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
345 last_exception = e
346 continue
347 self._raise_extractor_error(e)
348 self._raise_extractor_error(last_exception)
9afa1770
S
349
350 def _download_media_selector_url(self, url, programme_id=None):
29f7c58a 351 media_selection = self._download_json(
352 url, programme_id, 'Downloading media selection JSON',
9283d4ea 353 expected_status=(403, 404))
9afa1770 354 return self._process_media_selector(media_selection, programme_id)
082c6c86 355
9afa1770 356 def _process_media_selector(self, media_selection, programme_id):
082c6c86 357 formats = []
2e3fd9ec 358 subtitles = None
b0af1215 359 urls = []
2e3fd9ec 360
c056efa2
S
361 for media in self._extract_medias(media_selection):
362 kind = media.get('kind')
a7e5f274
RA
363 if kind in ('video', 'audio'):
364 bitrate = int_or_none(media.get('bitrate'))
365 encoding = media.get('encoding')
a7e5f274
RA
366 width = int_or_none(media.get('width'))
367 height = int_or_none(media.get('height'))
368 file_size = int_or_none(media.get('media_file_size'))
369 for connection in self._extract_connections(media):
b0af1215
RA
370 href = connection.get('href')
371 if href in urls:
372 continue
373 if href:
374 urls.append(href)
a7e5f274
RA
375 conn_kind = connection.get('kind')
376 protocol = connection.get('protocol')
377 supplier = connection.get('supplier')
a7e5f274
RA
378 transfer_format = connection.get('transferFormat')
379 format_id = supplier or conn_kind or protocol
a7e5f274
RA
380 # ASX playlist
381 if supplier == 'asx':
382 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
383 formats.append({
384 'url': ref,
385 'format_id': 'ref%s_%s' % (i, format_id),
386 })
387 elif transfer_format == 'dash':
388 formats.extend(self._extract_mpd_formats(
389 href, programme_id, mpd_id=format_id, fatal=False))
390 elif transfer_format == 'hls':
50e93e03 391 # TODO: let expected_status be passed into _extract_xxx_formats() instead
392 try:
393 fmts = self._extract_m3u8_formats(
394 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
395 m3u8_id=format_id, fatal=False)
396 except ExtractorError as e:
397 if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
398 and e.exc_info[1].code in (403, 404)):
399 raise
400 fmts = []
401 formats.extend(fmts)
a7e5f274
RA
402 elif transfer_format == 'hds':
403 formats.extend(self._extract_f4m_formats(
404 href, programme_id, f4m_id=format_id, fatal=False))
405 else:
29f7c58a 406 if not supplier and bitrate:
aaa42cf0 407 format_id += '-%d' % bitrate
a7e5f274
RA
408 fmt = {
409 'format_id': format_id,
410 'filesize': file_size,
411 }
412 if kind == 'video':
413 fmt.update({
414 'width': width,
415 'height': height,
6240925b 416 'tbr': bitrate,
a7e5f274
RA
417 'vcodec': encoding,
418 })
419 else:
420 fmt.update({
421 'abr': bitrate,
422 'acodec': encoding,
423 'vcodec': 'none',
424 })
1af959ef 425 if protocol in ('http', 'https'):
a7e5f274
RA
426 # Direct link
427 fmt.update({
428 'url': href,
429 })
430 elif protocol == 'rtmp':
431 application = connection.get('application', 'ondemand')
432 auth_string = connection.get('authString')
433 identifier = connection.get('identifier')
434 server = connection.get('server')
435 fmt.update({
436 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
437 'play_path': identifier,
438 'app': '%s?%s' % (application, auth_string),
439 'page_url': 'http://www.bbc.co.uk',
440 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
441 'rtmp_live': False,
442 'ext': 'flv',
443 })
964744af
S
444 else:
445 continue
a7e5f274 446 formats.append(fmt)
c056efa2 447 elif kind == 'captions':
f13b1e7d 448 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 449 return formats, subtitles
2e3fd9ec 450
ae6986fb
S
451 def _download_playlist(self, playlist_id):
452 try:
453 playlist = self._download_json(
454 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
455 playlist_id, 'Downloading playlist JSON')
c45b8741 456 formats = []
457 subtitles = {}
ae6986fb 458
c45b8741 459 for version in playlist.get('allAvailableVersions', []):
ae6986fb
S
460 smp_config = version['smpConfig']
461 title = smp_config['title']
462 description = smp_config['summary']
463 for item in smp_config['items']:
464 kind = item['kind']
40fcba5e 465 if kind not in ('programme', 'radioProgramme'):
ae6986fb
S
466 continue
467 programme_id = item.get('vpid')
d97f5cd7 468 duration = int_or_none(item.get('duration'))
c45b8741 469 version_formats, version_subtitles = self._download_media_selector(programme_id)
470 types = version['types']
471 for f in version_formats:
472 f['format_note'] = ', '.join(types)
473 if any('AudioDescribed' in x for x in types):
474 f['language_preference'] = -10
475 formats += version_formats
476 for tag, subformats in (version_subtitles or {}).items():
f304da8a 477 subtitles.setdefault(tag, []).extend(subformats)
c45b8741 478
479 return programme_id, title, description, duration, formats, subtitles
ae6986fb 480 except ExtractorError as ee:
f813928e 481 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
482 raise
483
484 # fallback to legacy playlist
9afa1770
S
485 return self._process_legacy_playlist(playlist_id)
486
487 def _process_legacy_playlist_url(self, url, display_id):
488 playlist = self._download_legacy_playlist_url(url, display_id)
489 return self._extract_from_legacy_playlist(playlist, display_id)
490
491 def _process_legacy_playlist(self, playlist_id):
492 return self._process_legacy_playlist_url(
493 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
494
495 def _download_legacy_playlist_url(self, url, playlist_id=None):
496 return self._download_xml(
497 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 498
9afa1770 499 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 500 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
501 if no_items is not None:
502 reason = no_items.get('reason')
503 if reason == 'preAvailability':
504 msg = 'Episode %s is not yet available' % playlist_id
505 elif reason == 'postAvailability':
506 msg = 'Episode %s is no longer available' % playlist_id
507 elif reason == 'noMedia':
508 msg = 'Episode %s is not currently available' % playlist_id
509 else:
510 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
511 raise ExtractorError(msg, expected=True)
512
513 for item in self._extract_items(playlist):
514 kind = item.get('kind')
40fcba5e 515 if kind not in ('programme', 'radioProgramme'):
ae6986fb 516 continue
e6174ee9
S
517 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
518 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 519 description = description_el.text if description_el is not None else None
9afa1770
S
520
521 def get_programme_id(item):
522 def get_from_attributes(item):
32759325 523 for p in ('identifier', 'group'):
9afa1770
S
524 value = item.get(p)
525 if value and re.match(r'^[pb][\da-z]{7}$', value):
526 return value
527 get_from_attributes(item)
e6174ee9 528 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
529 if mediator is not None:
530 return get_from_attributes(mediator)
531
532 programme_id = get_programme_id(item)
d97f5cd7 533 duration = int_or_none(item.get('duration'))
e6174ee9
S
534
535 if programme_id:
536 formats, subtitles = self._download_media_selector(programme_id)
537 else:
538 formats, subtitles = self._process_media_selector(item, playlist_id)
539 programme_id = playlist_id
ae6986fb
S
540
541 return programme_id, title, description, duration, formats, subtitles
542
c056efa2
S
543 def _real_extract(self, url):
544 group_id = self._match_id(url)
545
546 webpage = self._download_webpage(url, group_id, 'Downloading video page')
547
b2ed954f 548 error = self._search_regex(
29f7c58a 549 r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
b2ed954f
S
550 webpage, 'error', default=None)
551 if error:
552 raise ExtractorError(error, expected=True)
553
8683b4d8 554 programme_id = None
679bacf0 555 duration = None
8683b4d8
S
556
557 tviplayer = self._search_regex(
558 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
559 webpage, 'player', default=None)
560
561 if tviplayer:
562 player = self._parse_json(tviplayer, group_id).get('player', {})
563 duration = int_or_none(player.get('duration'))
564 programme_id = player.get('vpid')
565
566 if not programme_id:
567 programme_id = self._search_regex(
22d7368d 568 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 569
c056efa2 570 if programme_id:
c056efa2 571 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 572 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
573 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
574 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 575 description = self._search_regex(
a8534274
S
576 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
577 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
578 webpage, 'description', default=None)
579 if not description:
580 description = self._html_search_meta('description', webpage)
c056efa2 581 else:
ae6986fb 582 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 583
082c6c86
S
584 self._sort_formats(formats)
585
586 return {
2e3fd9ec 587 'id': programme_id,
082c6c86
S
588 'title': title,
589 'description': description,
650cfd0c 590 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
591 'duration': duration,
592 'formats': formats,
2e3fd9ec 593 'subtitles': subtitles,
5f6a1245 594 }
10273d6e 595
596
9afa1770
S
597class BBCIE(BBCCoUkIE):
598 IE_NAME = 'bbc'
599 IE_DESC = 'BBC'
600 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 601
29f7c58a 602 _MEDIA_SETS = [
29f7c58a 603 'pc',
2d997542 604 'mobile-tablet-main',
d12a1a47 605 ]
10273d6e 606
607 _TESTS = [{
6a747190 608 # article with multiple videos embedded with data-playable containing vpids
10273d6e 609 'url': 'http://www.bbc.com/news/world-europe-32668511',
610 'info_dict': {
611 'id': 'world-europe-32668511',
acc86c9a 612 'title': 'Russia stages massive WW2 parade',
9afa1770 613 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 614 },
615 'playlist_count': 2,
a3bfddfa 616 }, {
6a747190 617 # article with multiple videos embedded with data-playable (more videos)
10273d6e 618 'url': 'http://www.bbc.com/news/business-28299555',
619 'info_dict': {
620 'id': 'business-28299555',
621 'title': 'Farnborough Airshow: Video highlights',
9afa1770 622 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 623 },
624 'playlist_count': 9,
9afa1770 625 'skip': 'Save time',
88ed52ae
S
626 }, {
627 # article with multiple videos embedded with `new SMP()`
6a747190 628 # broken
88ed52ae
S
629 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
630 'info_dict': {
631 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 632 'title': 'BUGGER',
88ed52ae
S
633 },
634 'playlist_count': 18,
a3bfddfa 635 }, {
6a747190 636 # single video embedded with data-playable containing vpid
10273d6e 637 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 638 'info_dict': {
639 'id': 'p02mprgb',
55ebae26 640 'ext': 'mp4',
10273d6e 641 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 642 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 643 'duration': 47,
9afa1770 644 'timestamp': 1427219242,
da92eeae 645 'upload_date': '20150324',
10273d6e 646 },
647 'params': {
9afa1770 648 # rtmp download
10273d6e 649 'skip_download': True,
650 }
a3bfddfa 651 }, {
6a747190
S
652 # article with single video embedded with data-playable containing XML playlist
653 # with direct video links as progressiveDownloadUrl (for now these are extracted)
654 # and playlist with f4m and m3u8 as streamingUrl
de939d89 655 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 656 'info_dict': {
9afa1770 657 'id': '150615_telabyad_kentin_cogu',
de939d89 658 'ext': 'mp4',
ad152e2d 659 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
05087d1b 660 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 661 'timestamp': 1434397334,
da92eeae 662 'upload_date': '20150615',
de939d89 663 },
664 'params': {
665 'skip_download': True,
666 }
c936d8cc 667 }, {
6a747190 668 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 669 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 670 'info_dict': {
9afa1770 671 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 672 'ext': 'mp4',
9afa1770 673 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 674 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 675 'timestamp': 1434713142,
da92eeae 676 'upload_date': '20150619',
de939d89 677 },
678 'params': {
679 'skip_download': True,
680 }
a346b1ff
S
681 }, {
682 # single video from video playlist embedded with vxp-playlist-data JSON
683 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
684 'info_dict': {
685 'id': 'p02w6qjc',
55ebae26 686 'ext': 'mp4',
a346b1ff
S
687 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
688 'duration': 56,
0bc4ee60 689 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
690 },
691 'params': {
692 'skip_download': True,
693 }
9afa1770
S
694 }, {
695 # single video story with digitalData
696 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
697 'info_dict': {
698 'id': 'p02q6gc4',
699 'ext': 'flv',
700 'title': 'Sri Lanka’s spicy secret',
701 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
702 'timestamp': 1437674293,
703 'upload_date': '20150723',
704 },
705 'params': {
706 # rtmp download
707 'skip_download': True,
708 }
709 }, {
710 # single video story without digitalData
711 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
712 'info_dict': {
713 'id': 'p018zqqg',
55ebae26 714 'ext': 'mp4',
9afa1770
S
715 'title': 'Hyundai Santa Fe Sport: Rock star',
716 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
717 'timestamp': 1415867444,
718 'upload_date': '20141113',
9afa1770
S
719 },
720 'params': {
721 # rtmp download
722 'skip_download': True,
723 }
9fb64c04
S
724 }, {
725 # single video embedded with Morph
726 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
727 'info_dict': {
728 'id': 'p041vhd0',
729 'ext': 'mp4',
730 'title': "Nigeria v Japan - Men's First Round",
731 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
732 'duration': 7980,
733 'uploader': 'BBC Sport',
734 'uploader_id': 'bbc_sport',
735 },
736 'params': {
737 # m3u8 download
738 'skip_download': True,
9fb64c04
S
739 },
740 'skip': 'Georestricted to UK',
9afa1770 741 }, {
6a747190 742 # single video with playlist.sxml URL in playlist param
9afa1770
S
743 'url': 'http://www.bbc.com/sport/0/football/33653409',
744 'info_dict': {
745 'id': 'p02xycnp',
55ebae26 746 'ext': 'mp4',
9afa1770 747 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 748 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
749 'duration': 140,
750 },
751 'params': {
752 # rtmp download
753 'skip_download': True,
754 }
b5d48cb1 755 }, {
6a747190 756 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
757 'url': 'http://www.bbc.com/sport/0/football/34475836',
758 'info_dict': {
759 'id': '34475836',
450b233c 760 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 761 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
762 },
763 'playlist_count': 3,
450b233c
S
764 }, {
765 # school report article with single video
766 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
767 'info_dict': {
768 'id': '35744779',
769 'title': 'School which breaks down barriers in Jerusalem',
770 },
771 'playlist_count': 1,
9afa1770
S
772 }, {
773 # single video with playlist URL from weather section
774 'url': 'http://www.bbc.com/weather/features/33601775',
775 'only_matching': True,
776 }, {
777 # custom redirection to www.bbc.com
1bdae7d3 778 # also, video with window.__INITIAL_DATA__
9afa1770 779 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
1bdae7d3 780 'info_dict': {
781 'id': 'p02xzws1',
782 'ext': 'mp4',
783 'title': "Pluto may have 'nitrogen glaciers'",
784 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
785 'thumbnail': r're:https?://.+/.+\.jpg',
786 'timestamp': 1437785037,
787 'upload_date': '20150725',
788 },
50e93e03 789 }, {
790 # video with window.__INITIAL_DATA__ and value as JSON string
791 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
792 'info_dict': {
793 'id': 'p0b71qth',
794 'ext': 'mp4',
795 'title': 'Why France is making this woman a national hero',
796 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
797 'thumbnail': r're:https?://.+/.+\.jpg',
798 'timestamp': 1638230731,
799 'upload_date': '20211130',
800 },
a1cf3e38
S
801 }, {
802 # single video article embedded with data-media-vpid
803 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
804 'only_matching': True,
6d155707 805 }, {
50e93e03 806 # bbcthreeConfig
6d155707
S
807 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
808 'info_dict': {
809 'id': 'p06556y7',
810 'ext': 'mp4',
50e93e03 811 'title': 'Things Not To Say to people that live on council estates',
812 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
813 'duration': 360,
814 'thumbnail': r're:https?://.+/.+\.jpg',
6d155707 815 },
b96b4be4
RA
816 }, {
817 # window.__PRELOADED_STATE__
818 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
819 'info_dict': {
820 'id': 'b0b9z4vz',
821 'ext': 'mp4',
822 'title': 'Prom 6: An American in Paris and Turangalila',
823 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
824 'uploader': 'Radio 3',
825 'uploader_id': 'bbc_radio_three',
826 },
373941c5
S
827 }, {
828 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
829 'info_dict': {
830 'id': 'p06w9tws',
831 'ext': 'mp4',
832 'title': 'md5:2fabf12a726603193a2879a055f72514',
833 'description': 'Learn English words and phrases from this story',
834 },
835 'add_ie': [BBCCoUkIE.ie_key()],
3721515b 836 }, {
837 # BBC Reel
838 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
839 'info_dict': {
840 'id': 'p07c6sb9',
841 'ext': 'mp4',
842 'title': 'How positive thinking is harming your happiness',
843 'alt_title': 'The downsides of positive thinking',
844 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
845 'duration': 235,
846 'thumbnail': r're:https?://.+/p07c9dsr.jpg',
847 'upload_date': '20190604',
848 'categories': ['Psychology'],
849 },
10273d6e 850 }]
851
9afa1770
S
852 @classmethod
853 def suitable(cls, url):
1418a043 854 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
ded7511a
S
855 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
856 else super(BBCIE, cls).suitable(url))
9afa1770
S
857
858 def _extract_from_media_meta(self, media_meta, video_id):
859 # Direct links to media in media metadata (e.g.
860 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
861 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
862 source_files = media_meta.get('sourceFiles')
863 if source_files:
864 return [{
865 'url': f['url'],
866 'format_id': format_id,
867 'ext': f.get('encoding'),
868 'tbr': float_or_none(f.get('bitrate'), 1000),
869 'filesize': int_or_none(f.get('filesize')),
870 } for format_id, f in source_files.items() if f.get('url')], []
871
872 programme_id = media_meta.get('externalId')
873 if programme_id:
874 return self._download_media_selector(programme_id)
875
876 # Process playlist.sxml as legacy playlist
877 href = media_meta.get('href')
878 if href:
879 playlist = self._download_legacy_playlist_url(href)
880 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
881 return formats, subtitles
882
883 return [], []
884
baf39a1a
S
885 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
886 programme_id, title, description, duration, formats, subtitles = \
887 self._process_legacy_playlist_url(url, playlist_id)
888 self._sort_formats(formats)
889 return {
890 'id': programme_id,
891 'title': title,
892 'description': description,
893 'duration': duration,
894 'timestamp': timestamp,
895 'formats': formats,
896 'subtitles': subtitles,
897 }
898
10273d6e 899 def _real_extract(self, url):
9afa1770
S
900 playlist_id = self._match_id(url)
901
902 webpage = self._download_webpage(url, playlist_id)
903
522f6c06 904 json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
350e02d4 905 timestamp = json_ld_info.get('timestamp')
0e832c2c 906
350e02d4 907 playlist_title = json_ld_info.get('title')
0e832c2c
S
908 if not playlist_title:
909 playlist_title = self._og_search_title(
910 webpage, default=None) or self._html_search_regex(
911 r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
912 if playlist_title:
913 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
914
915 playlist_description = json_ld_info.get(
916 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
917
918 if not timestamp:
919 timestamp = parse_iso8601(self._search_regex(
920 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
921 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 922 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 923 webpage, 'date', default=None))
9afa1770 924
78f9d843
S
925 entries = []
926
de665713
S
927 # article with multiple videos embedded with playlist.sxml (e.g.
928 # http://www.bbc.com/sport/0/football/34475836)
929 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 930 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 931 if playlists:
baf39a1a
S
932 entries = [
933 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
934 for playlist_url in playlists]
de939d89 935
78f9d843
S
936 # news article with multiple videos embedded with data-playable
937 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
938 if data_playables:
939 for _, data_playable_json in data_playables:
940 data_playable = self._parse_json(
941 unescapeHTML(data_playable_json), playlist_id, fatal=False)
942 if not data_playable:
943 continue
baf39a1a
S
944 settings = data_playable.get('settings', {})
945 if settings:
78f9d843
S
946 # data-playable with video vpid in settings.playlistObject.items (e.g.
947 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
948 playlist_object = settings.get('playlistObject', {})
949 if playlist_object:
950 items = playlist_object.get('items')
951 if items and isinstance(items, list):
78f9d843
S
952 title = playlist_object['title']
953 description = playlist_object.get('summary')
baf39a1a
S
954 duration = int_or_none(items[0].get('duration'))
955 programme_id = items[0].get('vpid')
78f9d843
S
956 formats, subtitles = self._download_media_selector(programme_id)
957 self._sort_formats(formats)
958 entries.append({
959 'id': programme_id,
960 'title': title,
961 'description': description,
962 'timestamp': timestamp,
963 'duration': duration,
964 'formats': formats,
965 'subtitles': subtitles,
966 })
967 else:
968 # data-playable without vpid but with a playlist.sxml URLs
969 # in otherSettings.playlist (e.g.
970 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
971 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
972 if playlist:
a7e5f274
RA
973 entry = None
974 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
975 playlist_url = playlist.get('%sUrl' % key)
976 if not playlist_url:
977 continue
978 try:
a7e5f274
RA
979 info = self._extract_from_playlist_sxml(
980 playlist_url, playlist_id, timestamp)
981 if not entry:
982 entry = info
983 else:
984 entry['title'] = info['title']
985 entry['formats'].extend(info['formats'])
3721515b 986 except ExtractorError as e:
05087d1b
S
987 # Some playlist URL may fail with 500, at the same time
988 # the other one may work fine (e.g.
989 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
990 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
991 continue
992 raise
a7e5f274
RA
993 if entry:
994 self._sort_formats(entry['formats'])
995 entries.append(entry)
78f9d843
S
996
997 if entries:
78f9d843
S
998 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
999
373941c5
S
1000 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
1001 group_id = self._search_regex(
1002 r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
1003 webpage, 'group id', default=None)
38d70284 1004 if group_id:
373941c5
S
1005 return self.url_result(
1006 'https://www.bbc.co.uk/programmes/%s' % group_id,
1007 ie=BBCCoUkIE.ie_key())
1008
78f9d843
S
1009 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1010 programme_id = self._search_regex(
a1cf3e38 1011 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
1012 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
1013 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 1014 webpage, 'vpid', default=None)
dab062fb 1015
9afa1770
S
1016 if programme_id:
1017 formats, subtitles = self._download_media_selector(programme_id)
1018 self._sort_formats(formats)
1019 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1020 digital_data = self._parse_json(
1021 self._search_regex(
1022 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1023 programme_id, fatal=False)
1024 page_info = digital_data.get('page', {}).get('pageInfo', {})
1025 title = page_info.get('pageName') or self._og_search_title(webpage)
1026 description = page_info.get('description') or self._og_search_description(webpage)
1027 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1028 return {
1029 'id': programme_id,
1030 'title': title,
1031 'description': description,
1032 'timestamp': timestamp,
1033 'formats': formats,
1034 'subtitles': subtitles,
1035 }
a3bfddfa 1036
3721515b 1037 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1038 initial_data = self._parse_json(self._html_search_regex(
1039 r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1040 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1041 if initial_data:
1042 init_data = try_get(
1043 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1044 smp_data = init_data.get('smpData') or {}
1045 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1046 version_id = clip_data.get('versionID')
1047 if version_id:
1048 title = smp_data['title']
1049 formats, subtitles = self._download_media_selector(version_id)
1050 self._sort_formats(formats)
1051 image_url = smp_data.get('holdingImageURL')
1052 display_date = init_data.get('displayDate')
1053 topic_title = init_data.get('topicTitle')
1054
1055 return {
1056 'id': version_id,
1057 'title': title,
1058 'formats': formats,
1059 'alt_title': init_data.get('shortTitle'),
1060 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1061 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1062 'upload_date': display_date.replace('-', '') if display_date else None,
1063 'subtitles': subtitles,
1064 'duration': int_or_none(clip_data.get('duration')),
1065 'categories': [topic_title] if topic_title else None,
1066 }
1067
9fb64c04
S
1068 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1069 # There are several setPayload calls may be present but the video
1070 # seems to be always related to the first one
1071 morph_payload = self._parse_json(
1072 self._search_regex(
1073 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1074 webpage, 'morph payload', default='{}'),
1075 playlist_id, fatal=False)
1076 if morph_payload:
1077 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1078 for component in components:
1079 if not isinstance(component, dict):
1080 continue
1081 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1082 if not lead_media:
1083 continue
1084 identifiers = lead_media.get('identifiers')
1085 if not identifiers or not isinstance(identifiers, dict):
1086 continue
1087 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1088 if not programme_id:
1089 continue
1090 title = lead_media.get('title') or self._og_search_title(webpage)
1091 formats, subtitles = self._download_media_selector(programme_id)
1092 self._sort_formats(formats)
1093 description = lead_media.get('summary')
1094 uploader = lead_media.get('masterBrand')
1095 uploader_id = lead_media.get('mid')
1096 duration = None
1097 duration_d = lead_media.get('duration')
1098 if isinstance(duration_d, dict):
1099 duration = parse_duration(dict_get(
1100 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1101 return {
1102 'id': programme_id,
1103 'title': title,
1104 'description': description,
1105 'duration': duration,
1106 'uploader': uploader,
1107 'uploader_id': uploader_id,
1108 'formats': formats,
1109 'subtitles': subtitles,
1110 }
1111
b96b4be4
RA
1112 preload_state = self._parse_json(self._search_regex(
1113 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1114 'preload state', default='{}'), playlist_id, fatal=False)
1115 if preload_state:
1116 current_programme = preload_state.get('programmes', {}).get('current') or {}
1117 programme_id = current_programme.get('id')
1118 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1119 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1120 formats, subtitles = self._download_media_selector(programme_id)
1121 self._sort_formats(formats)
1122 synopses = current_programme.get('synopses') or {}
1123 network = current_programme.get('network') or {}
1124 duration = int_or_none(
1125 current_programme.get('duration', {}).get('value'))
1126 thumbnail = None
1127 image_url = current_programme.get('image_url')
1128 if image_url:
3721515b 1129 thumbnail = image_url.replace('{recipe}', 'raw')
b96b4be4
RA
1130 return {
1131 'id': programme_id,
1132 'title': title,
1133 'description': dict_get(synopses, ('long', 'medium', 'short')),
1134 'thumbnail': thumbnail,
1135 'duration': duration,
1136 'uploader': network.get('short_title'),
1137 'uploader_id': network.get('id'),
1138 'formats': formats,
1139 'subtitles': subtitles,
1140 }
1141
6d155707
S
1142 bbc3_config = self._parse_json(
1143 self._search_regex(
1144 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1145 'bbcthree config', default='{}'),
38d70284 1146 playlist_id, transform_source=js_to_json, fatal=False) or {}
1147 payload = bbc3_config.get('payload') or {}
1148 if payload:
1149 clip = payload.get('currentClip') or {}
1150 clip_vpid = clip.get('vpid')
1151 clip_title = clip.get('title')
1152 if clip_vpid and clip_title:
1153 formats, subtitles = self._download_media_selector(clip_vpid)
1154 self._sort_formats(formats)
1155 return {
1156 'id': clip_vpid,
1157 'title': clip_title,
1158 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1159 'description': clip.get('description'),
1160 'duration': parse_duration(clip.get('duration')),
1161 'formats': formats,
1162 'subtitles': subtitles,
1163 }
6d155707 1164 bbc3_playlist = try_get(
38d70284 1165 payload, lambda x: x['content']['bbcMedia']['playlist'],
6d155707
S
1166 dict)
1167 if bbc3_playlist:
1168 playlist_title = bbc3_playlist.get('title') or playlist_title
1169 thumbnail = bbc3_playlist.get('holdingImageURL')
1170 entries = []
1171 for bbc3_item in bbc3_playlist['items']:
1172 programme_id = bbc3_item.get('versionID')
1173 if not programme_id:
1174 continue
1175 formats, subtitles = self._download_media_selector(programme_id)
1176 self._sort_formats(formats)
1177 entries.append({
1178 'id': programme_id,
1179 'title': playlist_title,
1180 'thumbnail': thumbnail,
1181 'timestamp': timestamp,
1182 'formats': formats,
1183 'subtitles': subtitles,
1184 })
1185 return self.playlist_result(
1186 entries, playlist_id, playlist_title, playlist_description)
1187
50e93e03 1188 initial_data = self._search_regex(
1189 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1190 'quoted preload state', default=None)
1191 if initial_data is None:
1192 initial_data = self._search_regex(
1193 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1194 'preload state', default={})
1195 else:
1196 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1197 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
38d70284 1198 if initial_data:
1199 def parse_media(media):
1200 if not media:
1201 return
1202 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1203 item_id = item.get('id')
1204 item_title = item.get('title')
1205 if not (item_id and item_title):
1206 continue
1207 formats, subtitles = self._download_media_selector(item_id)
1208 self._sort_formats(formats)
1bdae7d3 1209 item_desc = None
1210 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1211 if blocks:
1212 summary = []
1213 for block in blocks:
1214 text = try_get(block, lambda x: x['model']['text'], compat_str)
1215 if text:
1216 summary.append(text)
1217 if summary:
1218 item_desc = '\n\n'.join(summary)
1219 item_time = None
1220 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1221 if try_get(meta, lambda x: x['label']) == 'Published':
1222 item_time = unified_timestamp(meta.get('timestamp'))
1223 break
38d70284 1224 entries.append({
1225 'id': item_id,
1226 'title': item_title,
1227 'thumbnail': item.get('holdingImageUrl'),
1228 'formats': formats,
1229 'subtitles': subtitles,
1bdae7d3 1230 'timestamp': item_time,
1231 'description': strip_or_none(item_desc),
38d70284 1232 })
1233 for resp in (initial_data.get('data') or {}).values():
1234 name = resp.get('name')
1235 if name == 'media-experience':
1236 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1237 elif name == 'article':
50e93e03 1238 for block in (try_get(resp,
1239 (lambda x: x['data']['blocks'],
1240 lambda x: x['data']['content']['model']['blocks'],),
1241 list) or []):
38d70284 1242 if block.get('type') != 'media':
1243 continue
1244 parse_media(block.get('model'))
1245 return self.playlist_result(
1246 entries, playlist_id, playlist_title, playlist_description)
1247
88ed52ae
S
1248 def extract_all(pattern):
1249 return list(filter(None, map(
1250 lambda s: self._parse_json(s, playlist_id, fatal=False),
1251 re.findall(pattern, webpage))))
1252
1253 # Multiple video article (e.g.
1254 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 1255 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
1256 entries = []
1257 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1258 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1259 if embed_url and re.match(EMBED_URL, embed_url):
1260 entries.append(embed_url)
1261 entries.extend(re.findall(
1262 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
1263 if entries:
1264 return self.playlist_result(
aaa42cf0 1265 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
88ed52ae 1266 playlist_id, playlist_title, playlist_description)
9afa1770
S
1267
1268 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 1269 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
1270
1271 if not medias:
1272 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
1273 media_asset = self._search_regex(
1274 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1275 webpage, 'media asset', default=None)
1276 if media_asset:
1277 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1278 medias = []
1279 for video in media_asset_page.get('videos', {}).values():
1280 medias.extend(video.values())
1281
1282 if not medias:
1283 # Multiple video playlist with single `now playing` entry (e.g.
1284 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1285 vxp_playlist = self._parse_json(
9afa1770 1286 self._search_regex(
a346b1ff
S
1287 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1288 webpage, 'playlist data'),
9afa1770 1289 playlist_id)
a346b1ff
S
1290 playlist_medias = []
1291 for item in vxp_playlist:
1292 media = item.get('media')
1293 if not media:
1294 continue
1295 playlist_medias.append(media)
1296 # Download single video if found media with asset id matching the video id from URL
1297 if item.get('advert', {}).get('assetId') == playlist_id:
1298 medias = [media]
1299 break
1300 # Fallback to the whole playlist
1301 if not medias:
1302 medias = playlist_medias
9afa1770
S
1303
1304 entries = []
1305 for num, media_meta in enumerate(medias, start=1):
1306 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
a06916d9 1307 if not formats and not self.get_param('ignore_no_formats'):
9afa1770 1308 continue
10273d6e 1309 self._sort_formats(formats)
1310
9afa1770
S
1311 video_id = media_meta.get('externalId')
1312 if not video_id:
1313 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1314
1315 title = media_meta.get('caption')
1316 if not title:
1317 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1318
1319 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 1320
9afa1770
S
1321 images = []
1322 for image in media_meta.get('images', {}).values():
1323 images.extend(image.values())
1324 if 'image' in media_meta:
1325 images.append(media_meta['image'])
1326
1327 thumbnails = [{
1328 'url': image.get('href'),
1329 'width': int_or_none(image.get('width')),
1330 'height': int_or_none(image.get('height')),
1331 } for image in images]
1332
1333 entries.append({
1334 'id': video_id,
10273d6e 1335 'title': title,
9afa1770 1336 'thumbnails': thumbnails,
10273d6e 1337 'duration': duration,
9afa1770 1338 'timestamp': timestamp,
10273d6e 1339 'formats': formats,
1340 'subtitles': subtitles,
a3bfddfa 1341 })
10273d6e 1342
9afa1770 1343 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
1344
1345
1346class BBCCoUkArticleIE(InfoExtractor):
92519402 1347 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
1348 IE_NAME = 'bbc.co.uk:article'
1349 IE_DESC = 'BBC articles'
1350
1351 _TEST = {
1352 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1353 'info_dict': {
1354 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1355 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1356 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1357 },
1358 'playlist_count': 4,
1359 'add_ie': ['BBCCoUk'],
1360 }
1361
1362 def _real_extract(self, url):
1363 playlist_id = self._match_id(url)
1364
1365 webpage = self._download_webpage(url, playlist_id)
1366
1367 title = self._og_search_title(webpage)
1368 description = self._og_search_description(webpage).strip()
1369
1370 entries = [self.url_result(programme_url) for programme_url in re.findall(
1371 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1372
1373 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
1374
1375
1376class BBCCoUkPlaylistBaseIE(InfoExtractor):
254e64a2
S
1377 def _entries(self, webpage, url, playlist_id):
1378 single_page = 'page' in compat_urlparse.parse_qs(
1379 compat_urlparse.urlparse(url).query)
1380 for page_num in itertools.count(2):
1381 for video_id in re.findall(
1382 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1383 yield self.url_result(
1384 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1385 if single_page:
1386 return
1387 next_page = self._search_regex(
1388 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1389 webpage, 'next page url', default=None, group='url')
1390 if not next_page:
1391 break
1392 webpage = self._download_webpage(
1393 compat_urlparse.urljoin(url, next_page), playlist_id,
1394 'Downloading page %d' % page_num, page_num)
1395
ded7511a
S
1396 def _real_extract(self, url):
1397 playlist_id = self._match_id(url)
1398
1399 webpage = self._download_webpage(url, playlist_id)
1400
ded7511a
S
1401 title, description = self._extract_title_and_description(webpage)
1402
254e64a2
S
1403 return self.playlist_result(
1404 self._entries(webpage, url, playlist_id),
1405 playlist_id, title, description)
ded7511a
S
1406
1407
1418a043 1408class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1409 _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1410
1411 @staticmethod
1412 def _get_default(episode, key, default_key='default'):
1413 return try_get(episode, lambda x: x[key][default_key])
1414
1415 def _get_description(self, data):
1416 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1417 return dict_get(synopsis, ('large', 'medium', 'small'))
1418
1419 def _fetch_page(self, programme_id, per_page, series_id, page):
1420 elements = self._get_elements(self._call_api(
1421 programme_id, per_page, page + 1, series_id))
1422 for element in elements:
1423 episode = self._get_episode(element)
1424 episode_id = episode.get('id')
1425 if not episode_id:
1426 continue
1427 thumbnail = None
1428 image = self._get_episode_image(episode)
1429 if image:
1430 thumbnail = image.replace('{recipe}', 'raw')
1431 category = self._get_default(episode, 'labels', 'category')
1432 yield {
1433 '_type': 'url',
1434 'id': episode_id,
1435 'title': self._get_episode_field(episode, 'subtitle'),
1436 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1437 'thumbnail': thumbnail,
1438 'description': self._get_description(episode),
1439 'categories': [category] if category else None,
1440 'series': self._get_episode_field(episode, 'title'),
1441 'ie_key': BBCCoUkIE.ie_key(),
1442 }
1443
1444 def _real_extract(self, url):
1445 pid = self._match_id(url)
4dfbf869 1446 qs = parse_qs(url)
1418a043 1447 series_id = qs.get('seriesId', [None])[0]
1448 page = qs.get('page', [None])[0]
1449 per_page = 36 if page else self._PAGE_SIZE
1450 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1451 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1452 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1453 return self.playlist_result(
1454 entries, pid, self._get_playlist_title(playlist_data),
1455 self._get_description(playlist_data))
1456
1457
1458class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1459 IE_NAME = 'bbc.co.uk:iplayer:episodes'
1460 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
9158af16 1461 _TESTS = [{
ded7511a
S
1462 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1463 'info_dict': {
1464 'id': 'b05rcz9v',
1465 'title': 'The Disappearance',
1418a043 1466 'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
ded7511a 1467 },
1418a043 1468 'playlist_mincount': 8,
9158af16 1469 }, {
1418a043 1470 # all seasons
1471 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1472 'info_dict': {
1473 'id': 'b094m5t9',
1474 'title': 'Doctor Foster',
1475 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1476 },
1477 'playlist_mincount': 10,
1478 }, {
1479 # explicit season
1480 'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1481 'info_dict': {
1482 'id': 'b094m5t9',
1483 'title': 'Doctor Foster',
1484 'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1485 },
1486 'playlist_mincount': 5,
1487 }, {
1488 # all pages
1489 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1490 'info_dict': {
1491 'id': 'm0004c4v',
1492 'title': 'Beechgrove',
1493 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1494 },
1495 'playlist_mincount': 37,
1496 }, {
1497 # explicit page
1498 'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1499 'info_dict': {
1500 'id': 'm0004c4v',
1501 'title': 'Beechgrove',
1502 'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1503 },
1504 'playlist_mincount': 1,
1505 }]
1506 _PAGE_SIZE = 100
1507 _DESCRIPTION_KEY = 'synopsis'
1508
1509 def _get_episode_image(self, episode):
1510 return self._get_default(episode, 'image')
1511
1512 def _get_episode_field(self, episode, field):
1513 return self._get_default(episode, field)
1514
1515 @staticmethod
1516 def _get_elements(data):
1517 return data['entities']['results']
1518
1519 @staticmethod
1520 def _get_episode(element):
1521 return element.get('episode') or {}
1522
1523 def _call_api(self, pid, per_page, page=1, series_id=None):
1524 variables = {
1525 'id': pid,
1526 'page': page,
1527 'perPage': per_page,
1528 }
1529 if series_id:
1530 variables['sliceId'] = series_id
1531 return self._download_json(
1532 'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1533 'Content-Type': 'application/json'
1534 }, data=json.dumps({
1535 'id': '5692d93d5aac8d796a0305e895e61551',
1536 'variables': variables,
1537 }).encode('utf-8'))['data']['programme']
1538
1539 @staticmethod
1540 def _get_playlist_data(data):
1541 return data
1542
1543 def _get_playlist_title(self, data):
1544 return self._get_default(data, 'title')
1545
1546
1547class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1548 IE_NAME = 'bbc.co.uk:iplayer:group'
1549 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1550 _TESTS = [{
9158af16
S
1551 # Available for over a year unlike 30 days for most other programmes
1552 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1553 'info_dict': {
1554 'id': 'p02tcc32',
1555 'title': 'Bohemian Icons',
1556 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1557 },
1558 'playlist_mincount': 10,
1418a043 1559 }, {
1560 # all pages
1561 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1562 'info_dict': {
1563 'id': 'p081d7j7',
1564 'title': 'Music in Scotland',
1565 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1566 },
1567 'playlist_mincount': 47,
1568 }, {
1569 # explicit page
1570 'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1571 'info_dict': {
1572 'id': 'p081d7j7',
1573 'title': 'Music in Scotland',
1574 'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1575 },
1576 'playlist_mincount': 11,
9158af16 1577 }]
1418a043 1578 _PAGE_SIZE = 200
1579 _DESCRIPTION_KEY = 'synopses'
1580
1581 def _get_episode_image(self, episode):
1582 return self._get_default(episode, 'images', 'standard')
1583
1584 def _get_episode_field(self, episode, field):
1585 return episode.get(field)
1586
1587 @staticmethod
1588 def _get_elements(data):
1589 return data['elements']
1590
1591 @staticmethod
1592 def _get_episode(element):
1593 return element
1594
1595 def _call_api(self, pid, per_page, page=1, series_id=None):
1596 return self._download_json(
1597 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
1598 pid, query={
1599 'page': page,
1600 'per_page': per_page,
1601 })['group_episodes']
1602
1603 @staticmethod
1604 def _get_playlist_data(data):
1605 return data['group']
ded7511a 1606
1418a043 1607 def _get_playlist_title(self, data):
1608 return data.get('title')
ded7511a
S
1609
1610
1611class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1612 IE_NAME = 'bbc.co.uk:playlist'
1613 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1614 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1615 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1616 _TESTS = [{
1617 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1618 'info_dict': {
1619 'id': 'b05rcz9v',
1620 'title': 'The Disappearance - Clips - BBC Four',
1621 'description': 'French thriller serial about a missing teenager.',
1622 },
1623 'playlist_mincount': 7,
4f640f28
S
1624 }, {
1625 # multipage playlist, explicit page
1626 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1627 'info_dict': {
1628 'id': 'b00mfl7n',
1629 'title': 'Frozen Planet - Clips - BBC One',
1630 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1631 },
1632 'playlist_mincount': 24,
1633 }, {
1634 # multipage playlist, all pages
1635 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1636 'info_dict': {
1637 'id': 'b00mfl7n',
1638 'title': 'Frozen Planet - Clips - BBC One',
1639 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1640 },
1641 'playlist_mincount': 142,
ded7511a
S
1642 }, {
1643 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1644 'only_matching': True,
1645 }, {
1646 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1647 'only_matching': True,
1648 }, {
1649 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1650 'only_matching': True,
1651 }]
1652
1653 def _extract_title_and_description(self, webpage):
1654 title = self._og_search_title(webpage, fatal=False)
1655 description = self._og_search_description(webpage)
1656 return title, description