]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/bbc.py
[youjizz] Relax _VALID_URL (Closes #10131)
[yt-dlp.git] / youtube_dl / extractor / bbc.py
CommitLineData
9afa1770 1# coding: utf-8
082c6c86
S
2from __future__ import unicode_literals
3
9afa1770 4import re
082c6c86 5
f13b1e7d 6from .common import InfoExtractor
8683b4d8
S
7from ..utils import (
8 ExtractorError,
9afa1770 9 float_or_none,
8683b4d8 10 int_or_none,
9afa1770
S
11 parse_duration,
12 parse_iso8601,
dab062fb 13 unescapeHTML,
8683b4d8 14)
36e6f62c
JMF
15from ..compat import (
16 compat_etree_fromstring,
17 compat_HTTPError,
18)
082c6c86 19
d12a1a47 20
f13b1e7d 21class BBCCoUkIE(InfoExtractor):
082c6c86 22 IE_NAME = 'bbc.co.uk'
2e3fd9ec 23 IE_DESC = 'BBC iPlayer'
22d7368d 24 _ID_REGEX = r'[pb][\da-z]{7}'
f20a11ed
S
25 _VALID_URL = r'''(?x)
26 https?://
27 (?:www\.)?bbc\.co\.uk/
28 (?:
29 programmes/(?!articles/)|
30 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
31 music/clips[/#]|
32 radio/player/
33 )
ded7511a 34 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 35 ''' % _ID_REGEX
082c6c86 36
d12a1a47 37 _MEDIASELECTOR_URLS = [
26ccc68b
S
38 # Provides HQ HLS streams with even better quality that pc mediaset but fails
39 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 40 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
d1c694ea 41 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
d12a1a47
S
42 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
43 ]
a8b081a0 44
e6174ee9
S
45 _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
46 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
47
48 _NAMESPACES = (
49 _MEDIASELECTION_NS,
50 _EMP_PLAYLIST_NS,
51 )
52
2e3fd9ec
S
53 _TESTS = [
54 {
f2d0fc68 55 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 56 'info_dict': {
f2d0fc68 57 'id': 'b039d07m',
b1ea6802 58 'ext': 'flv',
679bacf0 59 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
c4914185 60 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
61 },
62 'params': {
b1ea6802 63 # rtmp download
2e3fd9ec
S
64 'skip_download': True,
65 }
082c6c86 66 },
2e3fd9ec
S
67 {
68 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
69 'info_dict': {
70 'id': 'b00yng1d',
71 'ext': 'flv',
72 'title': 'The Man in Black: Series 3: The Printed Name',
73 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
74 'duration': 1800,
75 },
76 'params': {
77 # rtmp download
78 'skip_download': True,
c7f0177f
S
79 },
80 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
81 },
82 {
83 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
84 'info_dict': {
85 'id': 'b00yng1d',
86 'ext': 'flv',
17968e44 87 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 88 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 89 'duration': 5100,
2e3fd9ec
S
90 },
91 'params': {
92 # rtmp download
93 'skip_download': True,
94 },
b1ea6802 95 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
96 },
97 {
98 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
99 'info_dict': {
100 'id': 'b03k3pb7',
101 'ext': 'flv',
102 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
103 'description': '2. Invasion',
104 'duration': 3600,
105 },
106 'params': {
107 # rtmp download
108 'skip_download': True,
109 },
b1ea6802 110 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
111 }, {
112 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
113 'info_dict': {
114 'id': 'b04v209v',
115 'ext': 'flv',
116 'title': 'Pete Tong, The Essential New Tune Special',
117 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
118 'duration': 10800,
119 },
120 'params': {
121 # rtmp download
122 'skip_download': True,
a3ef0e1c
YCH
123 },
124 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 125 }, {
5aa535c3 126 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
127 'note': 'Audio',
128 'info_dict': {
5aa535c3 129 'id': 'p022h44j',
b1ea6802 130 'ext': 'flv',
5aa535c3
S
131 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
132 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
133 'duration': 227,
c7e67594
S
134 },
135 'params': {
b1ea6802 136 # rtmp download
c7e67594
S
137 'skip_download': True,
138 }
139 }, {
140 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
141 'note': 'Video',
142 'info_dict': {
143 'id': 'p025c103',
b1ea6802 144 'ext': 'flv',
c7e67594
S
145 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
146 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
147 'duration': 226,
148 },
149 'params': {
b1ea6802 150 # rtmp download
c7e67594
S
151 'skip_download': True,
152 }
e68ae99a
S
153 }, {
154 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
155 'info_dict': {
156 'id': 'p02n76xf',
157 'ext': 'flv',
158 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
159 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
160 'duration': 3540,
161 },
162 'params': {
163 # rtmp download
164 'skip_download': True,
165 },
b1ea6802 166 'skip': 'geolocation',
25fa8d66
YCH
167 }, {
168 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
169 'info_dict': {
170 'id': 'b05zmgw1',
171 'ext': 'flv',
172 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
173 'title': 'Royal Academy Summer Exhibition',
174 'duration': 3540,
175 },
176 'params': {
177 # rtmp download
178 'skip_download': True,
179 },
b1ea6802 180 'skip': 'geolocation',
54914380
S
181 }, {
182 # iptv-all mediaset fails with geolocation however there is no geo restriction
183 # for this programme at all
5aa535c3 184 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 185 'info_dict': {
5aa535c3 186 'id': 'b06rkms3',
54914380 187 'ext': 'flv',
5aa535c3
S
188 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
189 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
190 },
191 'params': {
192 # rtmp download
193 'skip_download': True,
194 },
b1ea6802 195 'skip': 'Now it\'s really geo-restricted',
1ac6e794
S
196 }, {
197 # compact player (https://github.com/rg3/youtube-dl/issues/8147)
198 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
199 'info_dict': {
200 'id': 'p028bfkj',
b1ea6802 201 'ext': 'flv',
1ac6e794
S
202 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
203 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
204 },
205 'params': {
b1ea6802 206 # rtmp download
1ac6e794
S
207 'skip_download': True,
208 },
31763975
S
209 }, {
210 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
211 'only_matching': True,
c7e67594
S
212 }, {
213 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
214 'only_matching': True,
0692ef86
S
215 }, {
216 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
217 'only_matching': True,
f20a11ed
S
218 }, {
219 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
220 'only_matching': True,
ae6986fb 221 }
2e3fd9ec
S
222 ]
223
d12a1a47
S
224 class MediaSelectionError(Exception):
225 def __init__(self, id):
226 self.id = id
227
2e3fd9ec
S
228 def _extract_asx_playlist(self, connection, programme_id):
229 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
230 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
231
232 def _extract_connection(self, connection, programme_id):
233 formats = []
e6174ee9 234 kind = connection.get('kind')
2e3fd9ec
S
235 protocol = connection.get('protocol')
236 supplier = connection.get('supplier')
237 if protocol == 'http':
238 href = connection.get('href')
7a896817 239 transfer_format = connection.get('transferFormat')
2e3fd9ec
S
240 # ASX playlist
241 if supplier == 'asx':
242 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
243 formats.append({
244 'url': ref,
245 'format_id': 'ref%s_%s' % (i, supplier),
246 })
7a896817
S
247 # Skip DASH until supported
248 elif transfer_format == 'dash':
249 pass
d1c694ea 250 elif transfer_format == 'hls':
35ec8668 251 formats.extend(self._extract_m3u8_formats(
b1ea6802 252 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
35ec8668 253 m3u8_id=supplier, fatal=False))
2e3fd9ec
S
254 # Direct link
255 else:
256 formats.append({
257 'url': href,
e6174ee9 258 'format_id': supplier or kind or protocol,
2e3fd9ec
S
259 })
260 elif protocol == 'rtmp':
261 application = connection.get('application', 'ondemand')
262 auth_string = connection.get('authString')
263 identifier = connection.get('identifier')
264 server = connection.get('server')
265 formats.append({
266 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
267 'play_path': identifier,
268 'app': '%s?%s' % (application, auth_string),
269 'page_url': 'http://www.bbc.co.uk',
270 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
271 'rtmp_live': False,
272 'ext': 'flv',
273 'format_id': supplier,
274 })
275 return formats
276
277 def _extract_items(self, playlist):
e6174ee9
S
278 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
279
280 def _findall_ns(self, element, xpath):
281 elements = []
282 for ns in self._NAMESPACES:
283 elements.extend(element.findall(xpath % ns))
284 return elements
2e3fd9ec
S
285
286 def _extract_medias(self, media_selection):
e6174ee9
S
287 error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
288 if error is None:
289 media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
c056efa2 290 if error is not None:
d12a1a47 291 raise BBCCoUkIE.MediaSelectionError(error.get('id'))
e6174ee9 292 return self._findall_ns(media_selection, './{%s}media')
2e3fd9ec
S
293
294 def _extract_connections(self, media):
e6174ee9 295 return self._findall_ns(media, './{%s}connection')
2e3fd9ec
S
296
297 def _extract_video(self, media, programme_id):
298 formats = []
d97f5cd7 299 vbr = int_or_none(media.get('bitrate'))
2e3fd9ec
S
300 vcodec = media.get('encoding')
301 service = media.get('service')
d97f5cd7
S
302 width = int_or_none(media.get('width'))
303 height = int_or_none(media.get('height'))
304 file_size = int_or_none(media.get('media_file_size'))
2e3fd9ec
S
305 for connection in self._extract_connections(media):
306 conn_formats = self._extract_connection(connection, programme_id)
307 for format in conn_formats:
b1ea6802
S
308 format.update({
309 'width': width,
310 'height': height,
311 'vbr': vbr,
312 'vcodec': vcodec,
313 'filesize': file_size,
314 })
e6174ee9
S
315 if service:
316 format['format_id'] = '%s_%s' % (service, format['format_id'])
2e3fd9ec
S
317 formats.extend(conn_formats)
318 return formats
319
320 def _extract_audio(self, media, programme_id):
321 formats = []
d97f5cd7 322 abr = int_or_none(media.get('bitrate'))
2e3fd9ec
S
323 acodec = media.get('encoding')
324 service = media.get('service')
325 for connection in self._extract_connections(media):
326 conn_formats = self._extract_connection(connection, programme_id)
327 for format in conn_formats:
328 format.update({
329 'format_id': '%s_%s' % (service, format['format_id']),
330 'abr': abr,
331 'acodec': acodec,
bbc26c8a 332 'vcodec': 'none',
2e3fd9ec
S
333 })
334 formats.extend(conn_formats)
335 return formats
336
f13b1e7d 337 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
338 subtitles = {}
339 for connection in self._extract_connections(media):
340 captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
341 lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
f13b1e7d
JMF
342 subtitles[lang] = [
343 {
344 'url': connection.get('href'),
345 'ext': 'ttml',
346 },
f13b1e7d 347 ]
2e3fd9ec 348 return subtitles
082c6c86 349
d12a1a47
S
350 def _raise_extractor_error(self, media_selection_error):
351 raise ExtractorError(
352 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
353 expected=True)
354
c056efa2 355 def _download_media_selector(self, programme_id):
d12a1a47
S
356 last_exception = None
357 for mediaselector_url in self._MEDIASELECTOR_URLS:
358 try:
359 return self._download_media_selector_url(
360 mediaselector_url % programme_id, programme_id)
361 except BBCCoUkIE.MediaSelectionError as e:
d781e293 362 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
363 last_exception = e
364 continue
365 self._raise_extractor_error(e)
366 self._raise_extractor_error(last_exception)
9afa1770
S
367
368 def _download_media_selector_url(self, url, programme_id=None):
c056efa2
S
369 try:
370 media_selection = self._download_xml(
9afa1770 371 url, programme_id, 'Downloading media selection XML')
c056efa2 372 except ExtractorError as ee:
d781e293 373 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
36e6f62c 374 media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
2e3fd9ec 375 else:
c056efa2 376 raise
9afa1770 377 return self._process_media_selector(media_selection, programme_id)
082c6c86 378
9afa1770 379 def _process_media_selector(self, media_selection, programme_id):
082c6c86 380 formats = []
2e3fd9ec
S
381 subtitles = None
382
c056efa2
S
383 for media in self._extract_medias(media_selection):
384 kind = media.get('kind')
385 if kind == 'audio':
386 formats.extend(self._extract_audio(media, programme_id))
387 elif kind == 'video':
388 formats.extend(self._extract_video(media, programme_id))
389 elif kind == 'captions':
f13b1e7d 390 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 391 return formats, subtitles
2e3fd9ec 392
ae6986fb
S
393 def _download_playlist(self, playlist_id):
394 try:
395 playlist = self._download_json(
396 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
397 playlist_id, 'Downloading playlist JSON')
398
399 version = playlist.get('defaultAvailableVersion')
400 if version:
401 smp_config = version['smpConfig']
402 title = smp_config['title']
403 description = smp_config['summary']
404 for item in smp_config['items']:
405 kind = item['kind']
406 if kind != 'programme' and kind != 'radioProgramme':
407 continue
408 programme_id = item.get('vpid')
d97f5cd7 409 duration = int_or_none(item.get('duration'))
ae6986fb
S
410 formats, subtitles = self._download_media_selector(programme_id)
411 return programme_id, title, description, duration, formats, subtitles
412 except ExtractorError as ee:
f813928e 413 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
414 raise
415
416 # fallback to legacy playlist
9afa1770
S
417 return self._process_legacy_playlist(playlist_id)
418
419 def _process_legacy_playlist_url(self, url, display_id):
420 playlist = self._download_legacy_playlist_url(url, display_id)
421 return self._extract_from_legacy_playlist(playlist, display_id)
422
423 def _process_legacy_playlist(self, playlist_id):
424 return self._process_legacy_playlist_url(
425 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
426
427 def _download_legacy_playlist_url(self, url, playlist_id=None):
428 return self._download_xml(
429 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 430
9afa1770 431 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 432 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
433 if no_items is not None:
434 reason = no_items.get('reason')
435 if reason == 'preAvailability':
436 msg = 'Episode %s is not yet available' % playlist_id
437 elif reason == 'postAvailability':
438 msg = 'Episode %s is no longer available' % playlist_id
439 elif reason == 'noMedia':
440 msg = 'Episode %s is not currently available' % playlist_id
441 else:
442 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
443 raise ExtractorError(msg, expected=True)
444
445 for item in self._extract_items(playlist):
446 kind = item.get('kind')
447 if kind != 'programme' and kind != 'radioProgramme':
448 continue
e6174ee9
S
449 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
450 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 451 description = description_el.text if description_el is not None else None
9afa1770
S
452
453 def get_programme_id(item):
454 def get_from_attributes(item):
455 for p in('identifier', 'group'):
456 value = item.get(p)
457 if value and re.match(r'^[pb][\da-z]{7}$', value):
458 return value
459 get_from_attributes(item)
e6174ee9 460 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
461 if mediator is not None:
462 return get_from_attributes(mediator)
463
464 programme_id = get_programme_id(item)
d97f5cd7 465 duration = int_or_none(item.get('duration'))
e6174ee9
S
466
467 if programme_id:
468 formats, subtitles = self._download_media_selector(programme_id)
469 else:
470 formats, subtitles = self._process_media_selector(item, playlist_id)
471 programme_id = playlist_id
ae6986fb
S
472
473 return programme_id, title, description, duration, formats, subtitles
474
c056efa2
S
475 def _real_extract(self, url):
476 group_id = self._match_id(url)
477
478 webpage = self._download_webpage(url, group_id, 'Downloading video page')
479
8683b4d8 480 programme_id = None
679bacf0 481 duration = None
8683b4d8
S
482
483 tviplayer = self._search_regex(
484 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
485 webpage, 'player', default=None)
486
487 if tviplayer:
488 player = self._parse_json(tviplayer, group_id).get('player', {})
489 duration = int_or_none(player.get('duration'))
490 programme_id = player.get('vpid')
491
492 if not programme_id:
493 programme_id = self._search_regex(
22d7368d 494 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 495
c056efa2 496 if programme_id:
c056efa2 497 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 498 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
499 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
500 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 501 description = self._search_regex(
a8534274
S
502 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
503 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
504 webpage, 'description', default=None)
505 if not description:
506 description = self._html_search_meta('description', webpage)
c056efa2 507 else:
ae6986fb 508 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 509
082c6c86
S
510 self._sort_formats(formats)
511
512 return {
2e3fd9ec 513 'id': programme_id,
082c6c86
S
514 'title': title,
515 'description': description,
650cfd0c 516 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
517 'duration': duration,
518 'formats': formats,
2e3fd9ec 519 'subtitles': subtitles,
5f6a1245 520 }
10273d6e 521
522
9afa1770
S
523class BBCIE(BBCCoUkIE):
524 IE_NAME = 'bbc'
525 IE_DESC = 'BBC'
526 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 527
d12a1a47 528 _MEDIASELECTOR_URLS = [
55ebae26
S
529 # Provides HQ HLS streams but fails with geolocation in some cases when it's
530 # even not geo restricted at all
531 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
d12a1a47
S
532 # Provides more formats, namely direct mp4 links, but fails on some videos with
533 # notukerror for non UK (?) users (e.g.
534 # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
535 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
536 # Provides fewer formats, but works everywhere for everybody (hopefully)
537 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
538 ]
10273d6e 539
540 _TESTS = [{
6a747190 541 # article with multiple videos embedded with data-playable containing vpids
10273d6e 542 'url': 'http://www.bbc.com/news/world-europe-32668511',
543 'info_dict': {
544 'id': 'world-europe-32668511',
545 'title': 'Russia stages massive WW2 parade despite Western boycott',
9afa1770 546 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 547 },
548 'playlist_count': 2,
a3bfddfa 549 }, {
6a747190 550 # article with multiple videos embedded with data-playable (more videos)
10273d6e 551 'url': 'http://www.bbc.com/news/business-28299555',
552 'info_dict': {
553 'id': 'business-28299555',
554 'title': 'Farnborough Airshow: Video highlights',
9afa1770 555 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 556 },
557 'playlist_count': 9,
9afa1770 558 'skip': 'Save time',
88ed52ae
S
559 }, {
560 # article with multiple videos embedded with `new SMP()`
6a747190 561 # broken
88ed52ae
S
562 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
563 'info_dict': {
564 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 565 'title': 'BUGGER',
88ed52ae
S
566 },
567 'playlist_count': 18,
a3bfddfa 568 }, {
6a747190 569 # single video embedded with data-playable containing vpid
10273d6e 570 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 571 'info_dict': {
572 'id': 'p02mprgb',
55ebae26 573 'ext': 'mp4',
10273d6e 574 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 575 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 576 'duration': 47,
9afa1770 577 'timestamp': 1427219242,
da92eeae 578 'upload_date': '20150324',
10273d6e 579 },
580 'params': {
9afa1770 581 # rtmp download
10273d6e 582 'skip_download': True,
583 }
a3bfddfa 584 }, {
6a747190
S
585 # article with single video embedded with data-playable containing XML playlist
586 # with direct video links as progressiveDownloadUrl (for now these are extracted)
587 # and playlist with f4m and m3u8 as streamingUrl
de939d89 588 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 589 'info_dict': {
9afa1770 590 'id': '150615_telabyad_kentin_cogu',
de939d89 591 'ext': 'mp4',
05087d1b
S
592 'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
593 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 594 'timestamp': 1434397334,
da92eeae 595 'upload_date': '20150615',
de939d89 596 },
597 'params': {
598 'skip_download': True,
599 }
c936d8cc 600 }, {
6a747190 601 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 602 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 603 'info_dict': {
9afa1770 604 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 605 'ext': 'mp4',
9afa1770 606 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 607 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 608 'timestamp': 1434713142,
da92eeae 609 'upload_date': '20150619',
de939d89 610 },
611 'params': {
612 'skip_download': True,
613 }
a346b1ff
S
614 }, {
615 # single video from video playlist embedded with vxp-playlist-data JSON
616 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
617 'info_dict': {
618 'id': 'p02w6qjc',
55ebae26 619 'ext': 'mp4',
a346b1ff
S
620 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
621 'duration': 56,
0bc4ee60 622 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
623 },
624 'params': {
625 'skip_download': True,
626 }
9afa1770
S
627 }, {
628 # single video story with digitalData
629 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
630 'info_dict': {
631 'id': 'p02q6gc4',
632 'ext': 'flv',
633 'title': 'Sri Lanka’s spicy secret',
634 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
635 'timestamp': 1437674293,
636 'upload_date': '20150723',
637 },
638 'params': {
639 # rtmp download
640 'skip_download': True,
641 }
642 }, {
643 # single video story without digitalData
644 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
645 'info_dict': {
646 'id': 'p018zqqg',
55ebae26 647 'ext': 'mp4',
9afa1770
S
648 'title': 'Hyundai Santa Fe Sport: Rock star',
649 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
650 'timestamp': 1415867444,
651 'upload_date': '20141113',
9afa1770
S
652 },
653 'params': {
654 # rtmp download
655 'skip_download': True,
656 }
657 }, {
6a747190 658 # single video with playlist.sxml URL in playlist param
9afa1770
S
659 'url': 'http://www.bbc.com/sport/0/football/33653409',
660 'info_dict': {
661 'id': 'p02xycnp',
55ebae26 662 'ext': 'mp4',
9afa1770 663 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 664 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
665 'duration': 140,
666 },
667 'params': {
668 # rtmp download
669 'skip_download': True,
670 }
b5d48cb1 671 }, {
6a747190 672 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
673 'url': 'http://www.bbc.com/sport/0/football/34475836',
674 'info_dict': {
675 'id': '34475836',
450b233c 676 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 677 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
678 },
679 'playlist_count': 3,
450b233c
S
680 }, {
681 # school report article with single video
682 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
683 'info_dict': {
684 'id': '35744779',
685 'title': 'School which breaks down barriers in Jerusalem',
686 },
687 'playlist_count': 1,
9afa1770
S
688 }, {
689 # single video with playlist URL from weather section
690 'url': 'http://www.bbc.com/weather/features/33601775',
691 'only_matching': True,
692 }, {
693 # custom redirection to www.bbc.com
694 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
695 'only_matching': True,
a1cf3e38
S
696 }, {
697 # single video article embedded with data-media-vpid
698 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
699 'only_matching': True,
10273d6e 700 }]
701
9afa1770
S
702 @classmethod
703 def suitable(cls, url):
ded7511a
S
704 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
705 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
706 else super(BBCIE, cls).suitable(url))
9afa1770
S
707
708 def _extract_from_media_meta(self, media_meta, video_id):
709 # Direct links to media in media metadata (e.g.
710 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
711 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
712 source_files = media_meta.get('sourceFiles')
713 if source_files:
714 return [{
715 'url': f['url'],
716 'format_id': format_id,
717 'ext': f.get('encoding'),
718 'tbr': float_or_none(f.get('bitrate'), 1000),
719 'filesize': int_or_none(f.get('filesize')),
720 } for format_id, f in source_files.items() if f.get('url')], []
721
722 programme_id = media_meta.get('externalId')
723 if programme_id:
724 return self._download_media_selector(programme_id)
725
726 # Process playlist.sxml as legacy playlist
727 href = media_meta.get('href')
728 if href:
729 playlist = self._download_legacy_playlist_url(href)
730 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
731 return formats, subtitles
732
733 return [], []
734
baf39a1a
S
735 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
736 programme_id, title, description, duration, formats, subtitles = \
737 self._process_legacy_playlist_url(url, playlist_id)
738 self._sort_formats(formats)
739 return {
740 'id': programme_id,
741 'title': title,
742 'description': description,
743 'duration': duration,
744 'timestamp': timestamp,
745 'formats': formats,
746 'subtitles': subtitles,
747 }
748
10273d6e 749 def _real_extract(self, url):
9afa1770
S
750 playlist_id = self._match_id(url)
751
752 webpage = self._download_webpage(url, playlist_id)
753
350e02d4
YCH
754 json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
755 timestamp = json_ld_info.get('timestamp')
0e832c2c 756
350e02d4 757 playlist_title = json_ld_info.get('title')
0e832c2c
S
758 if not playlist_title:
759 playlist_title = self._og_search_title(
760 webpage, default=None) or self._html_search_regex(
761 r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
762 if playlist_title:
763 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
764
765 playlist_description = json_ld_info.get(
766 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
767
768 if not timestamp:
769 timestamp = parse_iso8601(self._search_regex(
770 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
771 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 772 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 773 webpage, 'date', default=None))
9afa1770 774
78f9d843
S
775 entries = []
776
de665713
S
777 # article with multiple videos embedded with playlist.sxml (e.g.
778 # http://www.bbc.com/sport/0/football/34475836)
779 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 780 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 781 if playlists:
baf39a1a
S
782 entries = [
783 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
784 for playlist_url in playlists]
de939d89 785
78f9d843
S
786 # news article with multiple videos embedded with data-playable
787 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
788 if data_playables:
789 for _, data_playable_json in data_playables:
790 data_playable = self._parse_json(
791 unescapeHTML(data_playable_json), playlist_id, fatal=False)
792 if not data_playable:
793 continue
baf39a1a
S
794 settings = data_playable.get('settings', {})
795 if settings:
78f9d843
S
796 # data-playable with video vpid in settings.playlistObject.items (e.g.
797 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
798 playlist_object = settings.get('playlistObject', {})
799 if playlist_object:
800 items = playlist_object.get('items')
801 if items and isinstance(items, list):
78f9d843
S
802 title = playlist_object['title']
803 description = playlist_object.get('summary')
baf39a1a
S
804 duration = int_or_none(items[0].get('duration'))
805 programme_id = items[0].get('vpid')
78f9d843
S
806 formats, subtitles = self._download_media_selector(programme_id)
807 self._sort_formats(formats)
808 entries.append({
809 'id': programme_id,
810 'title': title,
811 'description': description,
812 'timestamp': timestamp,
813 'duration': duration,
814 'formats': formats,
815 'subtitles': subtitles,
816 })
817 else:
818 # data-playable without vpid but with a playlist.sxml URLs
819 # in otherSettings.playlist (e.g.
820 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
821 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
822 if playlist:
05087d1b
S
823 for key in ('progressiveDownload', 'streaming'):
824 playlist_url = playlist.get('%sUrl' % key)
825 if not playlist_url:
826 continue
827 try:
828 entries.append(self._extract_from_playlist_sxml(
829 playlist_url, playlist_id, timestamp))
830 except Exception as e:
831 # Some playlist URL may fail with 500, at the same time
832 # the other one may work fine (e.g.
833 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
834 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
835 continue
836 raise
78f9d843
S
837
838 if entries:
78f9d843
S
839 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
840
841 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
842 programme_id = self._search_regex(
a1cf3e38 843 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
844 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
845 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 846 webpage, 'vpid', default=None)
dab062fb 847
9afa1770
S
848 if programme_id:
849 formats, subtitles = self._download_media_selector(programme_id)
850 self._sort_formats(formats)
851 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
852 digital_data = self._parse_json(
853 self._search_regex(
854 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
855 programme_id, fatal=False)
856 page_info = digital_data.get('page', {}).get('pageInfo', {})
857 title = page_info.get('pageName') or self._og_search_title(webpage)
858 description = page_info.get('description') or self._og_search_description(webpage)
859 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
860 return {
861 'id': programme_id,
862 'title': title,
863 'description': description,
864 'timestamp': timestamp,
865 'formats': formats,
866 'subtitles': subtitles,
867 }
a3bfddfa 868
88ed52ae
S
869 def extract_all(pattern):
870 return list(filter(None, map(
871 lambda s: self._parse_json(s, playlist_id, fatal=False),
872 re.findall(pattern, webpage))))
873
874 # Multiple video article (e.g.
875 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 876 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
877 entries = []
878 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
879 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
880 if embed_url and re.match(EMBED_URL, embed_url):
881 entries.append(embed_url)
882 entries.extend(re.findall(
883 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
884 if entries:
885 return self.playlist_result(
886 [self.url_result(entry, 'BBCCoUk') for entry in entries],
887 playlist_id, playlist_title, playlist_description)
9afa1770
S
888
889 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 890 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
891
892 if not medias:
893 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
894 media_asset = self._search_regex(
895 r'mediaAssetPage\.init\(\s*({.+?}), "/',
896 webpage, 'media asset', default=None)
897 if media_asset:
898 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
899 medias = []
900 for video in media_asset_page.get('videos', {}).values():
901 medias.extend(video.values())
902
903 if not medias:
904 # Multiple video playlist with single `now playing` entry (e.g.
905 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
906 vxp_playlist = self._parse_json(
9afa1770 907 self._search_regex(
a346b1ff
S
908 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
909 webpage, 'playlist data'),
9afa1770 910 playlist_id)
a346b1ff
S
911 playlist_medias = []
912 for item in vxp_playlist:
913 media = item.get('media')
914 if not media:
915 continue
916 playlist_medias.append(media)
917 # Download single video if found media with asset id matching the video id from URL
918 if item.get('advert', {}).get('assetId') == playlist_id:
919 medias = [media]
920 break
921 # Fallback to the whole playlist
922 if not medias:
923 medias = playlist_medias
9afa1770
S
924
925 entries = []
926 for num, media_meta in enumerate(medias, start=1):
927 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
928 if not formats:
929 continue
10273d6e 930 self._sort_formats(formats)
931
9afa1770
S
932 video_id = media_meta.get('externalId')
933 if not video_id:
934 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
935
936 title = media_meta.get('caption')
937 if not title:
938 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
939
940 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 941
9afa1770
S
942 images = []
943 for image in media_meta.get('images', {}).values():
944 images.extend(image.values())
945 if 'image' in media_meta:
946 images.append(media_meta['image'])
947
948 thumbnails = [{
949 'url': image.get('href'),
950 'width': int_or_none(image.get('width')),
951 'height': int_or_none(image.get('height')),
952 } for image in images]
953
954 entries.append({
955 'id': video_id,
10273d6e 956 'title': title,
9afa1770 957 'thumbnails': thumbnails,
10273d6e 958 'duration': duration,
9afa1770 959 'timestamp': timestamp,
10273d6e 960 'formats': formats,
961 'subtitles': subtitles,
a3bfddfa 962 })
10273d6e 963
9afa1770 964 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
965
966
967class BBCCoUkArticleIE(InfoExtractor):
5886b38d 968 _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
969 IE_NAME = 'bbc.co.uk:article'
970 IE_DESC = 'BBC articles'
971
972 _TEST = {
973 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
974 'info_dict': {
975 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
976 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
977 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
978 },
979 'playlist_count': 4,
980 'add_ie': ['BBCCoUk'],
981 }
982
983 def _real_extract(self, url):
984 playlist_id = self._match_id(url)
985
986 webpage = self._download_webpage(url, playlist_id)
987
988 title = self._og_search_title(webpage)
989 description = self._og_search_description(webpage).strip()
990
991 entries = [self.url_result(programme_url) for programme_url in re.findall(
992 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
993
994 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
995
996
997class BBCCoUkPlaylistBaseIE(InfoExtractor):
998 def _real_extract(self, url):
999 playlist_id = self._match_id(url)
1000
1001 webpage = self._download_webpage(url, playlist_id)
1002
1003 entries = [
1004 self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1005 for video_id in re.findall(
1006 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
1007
1008 title, description = self._extract_title_and_description(webpage)
1009
1010 return self.playlist_result(entries, playlist_id, title, description)
1011
1012
1013class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
1014 IE_NAME = 'bbc.co.uk:iplayer:playlist'
1015 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
1016 _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
1017 _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
1018 _TEST = {
1019 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1020 'info_dict': {
1021 'id': 'b05rcz9v',
1022 'title': 'The Disappearance',
1023 'description': 'French thriller serial about a missing teenager.',
1024 },
1025 'playlist_mincount': 6,
1026 }
1027
1028 def _extract_title_and_description(self, webpage):
1029 title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
1030 description = self._search_regex(
1031 r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
1032 webpage, 'description', fatal=False, group='value')
1033 return title, description
1034
1035
1036class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1037 IE_NAME = 'bbc.co.uk:playlist'
1038 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1039 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1040 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1041 _TESTS = [{
1042 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1043 'info_dict': {
1044 'id': 'b05rcz9v',
1045 'title': 'The Disappearance - Clips - BBC Four',
1046 'description': 'French thriller serial about a missing teenager.',
1047 },
1048 'playlist_mincount': 7,
1049 }, {
1050 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1051 'only_matching': True,
1052 }, {
1053 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1054 'only_matching': True,
1055 }, {
1056 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1057 'only_matching': True,
1058 }]
1059
1060 def _extract_title_and_description(self, webpage):
1061 title = self._og_search_title(webpage, fatal=False)
1062 description = self._og_search_description(webpage)
1063 return title, description