]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/bbc.py
[common] extract partOfTVSeries info in json-ld
[yt-dlp.git] / youtube_dl / extractor / bbc.py
CommitLineData
9afa1770 1# coding: utf-8
082c6c86
S
2from __future__ import unicode_literals
3
9afa1770 4import re
082c6c86 5
f13b1e7d 6from .common import InfoExtractor
8683b4d8
S
7from ..utils import (
8 ExtractorError,
9afa1770 9 float_or_none,
8683b4d8 10 int_or_none,
9afa1770
S
11 parse_duration,
12 parse_iso8601,
dab062fb 13 unescapeHTML,
8683b4d8 14)
36e6f62c
JMF
15from ..compat import (
16 compat_etree_fromstring,
17 compat_HTTPError,
18)
082c6c86 19
d12a1a47 20
f13b1e7d 21class BBCCoUkIE(InfoExtractor):
082c6c86 22 IE_NAME = 'bbc.co.uk'
2e3fd9ec 23 IE_DESC = 'BBC iPlayer'
22d7368d 24 _ID_REGEX = r'[pb][\da-z]{7}'
f20a11ed
S
25 _VALID_URL = r'''(?x)
26 https?://
27 (?:www\.)?bbc\.co\.uk/
28 (?:
29 programmes/(?!articles/)|
30 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
31 music/clips[/#]|
32 radio/player/
33 )
ded7511a 34 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
f20a11ed 35 ''' % _ID_REGEX
082c6c86 36
d12a1a47 37 _MEDIASELECTOR_URLS = [
26ccc68b
S
38 # Provides HQ HLS streams with even better quality that pc mediaset but fails
39 # with geolocation in some cases when it's even not geo restricted at all (e.g.
d781e293 40 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
d1c694ea 41 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
d12a1a47
S
42 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
43 ]
a8b081a0 44
e6174ee9
S
45 _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
46 _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
47
48 _NAMESPACES = (
49 _MEDIASELECTION_NS,
50 _EMP_PLAYLIST_NS,
51 )
52
2e3fd9ec
S
53 _TESTS = [
54 {
f2d0fc68 55 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
2e3fd9ec 56 'info_dict': {
f2d0fc68 57 'id': 'b039d07m',
b1ea6802 58 'ext': 'flv',
679bacf0 59 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
c4914185 60 'description': 'The Canadian poet and songwriter reflects on his musical career.',
2e3fd9ec
S
61 },
62 'params': {
b1ea6802 63 # rtmp download
2e3fd9ec
S
64 'skip_download': True,
65 }
082c6c86 66 },
2e3fd9ec
S
67 {
68 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
69 'info_dict': {
70 'id': 'b00yng1d',
71 'ext': 'flv',
72 'title': 'The Man in Black: Series 3: The Printed Name',
73 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
74 'duration': 1800,
75 },
76 'params': {
77 # rtmp download
78 'skip_download': True,
c7f0177f
S
79 },
80 'skip': 'Episode is no longer available on BBC iPlayer Radio',
2e3fd9ec
S
81 },
82 {
83 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
84 'info_dict': {
85 'id': 'b00yng1d',
86 'ext': 'flv',
17968e44 87 'title': 'The Voice UK: Series 3: Blind Auditions 5',
611c1dd9 88 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
17968e44 89 'duration': 5100,
2e3fd9ec
S
90 },
91 'params': {
92 # rtmp download
93 'skip_download': True,
94 },
b1ea6802 95 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
c056efa2
S
96 },
97 {
98 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
99 'info_dict': {
100 'id': 'b03k3pb7',
101 'ext': 'flv',
102 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
103 'description': '2. Invasion',
104 'duration': 3600,
105 },
106 'params': {
107 # rtmp download
108 'skip_download': True,
109 },
b1ea6802 110 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
ae6986fb
S
111 }, {
112 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
113 'info_dict': {
114 'id': 'b04v209v',
115 'ext': 'flv',
116 'title': 'Pete Tong, The Essential New Tune Special',
117 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
118 'duration': 10800,
119 },
120 'params': {
121 # rtmp download
122 'skip_download': True,
a3ef0e1c
YCH
123 },
124 'skip': 'Episode is no longer available on BBC iPlayer Radio',
c7e67594 125 }, {
5aa535c3 126 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
c7e67594
S
127 'note': 'Audio',
128 'info_dict': {
5aa535c3 129 'id': 'p022h44j',
b1ea6802 130 'ext': 'flv',
5aa535c3
S
131 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
132 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
133 'duration': 227,
c7e67594
S
134 },
135 'params': {
b1ea6802 136 # rtmp download
c7e67594
S
137 'skip_download': True,
138 }
139 }, {
140 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
141 'note': 'Video',
142 'info_dict': {
143 'id': 'p025c103',
b1ea6802 144 'ext': 'flv',
c7e67594
S
145 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
146 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
147 'duration': 226,
148 },
149 'params': {
b1ea6802 150 # rtmp download
c7e67594
S
151 'skip_download': True,
152 }
e68ae99a
S
153 }, {
154 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
155 'info_dict': {
156 'id': 'p02n76xf',
157 'ext': 'flv',
158 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
159 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
160 'duration': 3540,
161 },
162 'params': {
163 # rtmp download
164 'skip_download': True,
165 },
b1ea6802 166 'skip': 'geolocation',
25fa8d66
YCH
167 }, {
168 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
169 'info_dict': {
170 'id': 'b05zmgw1',
171 'ext': 'flv',
172 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
173 'title': 'Royal Academy Summer Exhibition',
174 'duration': 3540,
175 },
176 'params': {
177 # rtmp download
178 'skip_download': True,
179 },
b1ea6802 180 'skip': 'geolocation',
54914380
S
181 }, {
182 # iptv-all mediaset fails with geolocation however there is no geo restriction
183 # for this programme at all
5aa535c3 184 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
54914380 185 'info_dict': {
5aa535c3 186 'id': 'b06rkms3',
54914380 187 'ext': 'flv',
5aa535c3
S
188 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
189 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
54914380
S
190 },
191 'params': {
192 # rtmp download
193 'skip_download': True,
194 },
b1ea6802 195 'skip': 'Now it\'s really geo-restricted',
1ac6e794
S
196 }, {
197 # compact player (https://github.com/rg3/youtube-dl/issues/8147)
198 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
199 'info_dict': {
200 'id': 'p028bfkj',
b1ea6802 201 'ext': 'flv',
1ac6e794
S
202 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
203 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
204 },
205 'params': {
b1ea6802 206 # rtmp download
1ac6e794
S
207 'skip_download': True,
208 },
31763975
S
209 }, {
210 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
211 'only_matching': True,
c7e67594
S
212 }, {
213 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
214 'only_matching': True,
0692ef86
S
215 }, {
216 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
217 'only_matching': True,
f20a11ed
S
218 }, {
219 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
220 'only_matching': True,
ae6986fb 221 }
2e3fd9ec
S
222 ]
223
d12a1a47
S
224 class MediaSelectionError(Exception):
225 def __init__(self, id):
226 self.id = id
227
2e3fd9ec
S
228 def _extract_asx_playlist(self, connection, programme_id):
229 asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
230 return [ref.get('href') for ref in asx.findall('./Entry/ref')]
231
2e3fd9ec 232 def _extract_items(self, playlist):
e6174ee9
S
233 return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
234
235 def _findall_ns(self, element, xpath):
236 elements = []
237 for ns in self._NAMESPACES:
238 elements.extend(element.findall(xpath % ns))
239 return elements
2e3fd9ec
S
240
241 def _extract_medias(self, media_selection):
e6174ee9
S
242 error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
243 if error is None:
244 media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
c056efa2 245 if error is not None:
d12a1a47 246 raise BBCCoUkIE.MediaSelectionError(error.get('id'))
e6174ee9 247 return self._findall_ns(media_selection, './{%s}media')
2e3fd9ec
S
248
249 def _extract_connections(self, media):
e6174ee9 250 return self._findall_ns(media, './{%s}connection')
2e3fd9ec 251
f13b1e7d 252 def _get_subtitles(self, media, programme_id):
2e3fd9ec
S
253 subtitles = {}
254 for connection in self._extract_connections(media):
255 captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
256 lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
f13b1e7d
JMF
257 subtitles[lang] = [
258 {
259 'url': connection.get('href'),
260 'ext': 'ttml',
261 },
f13b1e7d 262 ]
2e3fd9ec 263 return subtitles
082c6c86 264
d12a1a47
S
265 def _raise_extractor_error(self, media_selection_error):
266 raise ExtractorError(
267 '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
268 expected=True)
269
c056efa2 270 def _download_media_selector(self, programme_id):
d12a1a47
S
271 last_exception = None
272 for mediaselector_url in self._MEDIASELECTOR_URLS:
273 try:
274 return self._download_media_selector_url(
275 mediaselector_url % programme_id, programme_id)
276 except BBCCoUkIE.MediaSelectionError as e:
d781e293 277 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
d12a1a47
S
278 last_exception = e
279 continue
280 self._raise_extractor_error(e)
281 self._raise_extractor_error(last_exception)
9afa1770
S
282
283 def _download_media_selector_url(self, url, programme_id=None):
c056efa2
S
284 try:
285 media_selection = self._download_xml(
9afa1770 286 url, programme_id, 'Downloading media selection XML')
c056efa2 287 except ExtractorError as ee:
d781e293 288 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code in (403, 404):
36e6f62c 289 media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
2e3fd9ec 290 else:
c056efa2 291 raise
9afa1770 292 return self._process_media_selector(media_selection, programme_id)
082c6c86 293
9afa1770 294 def _process_media_selector(self, media_selection, programme_id):
082c6c86 295 formats = []
2e3fd9ec
S
296 subtitles = None
297
c056efa2
S
298 for media in self._extract_medias(media_selection):
299 kind = media.get('kind')
a7e5f274
RA
300 if kind in ('video', 'audio'):
301 bitrate = int_or_none(media.get('bitrate'))
302 encoding = media.get('encoding')
303 service = media.get('service')
304 width = int_or_none(media.get('width'))
305 height = int_or_none(media.get('height'))
306 file_size = int_or_none(media.get('media_file_size'))
307 for connection in self._extract_connections(media):
308 conn_kind = connection.get('kind')
309 protocol = connection.get('protocol')
310 supplier = connection.get('supplier')
311 href = connection.get('href')
312 transfer_format = connection.get('transferFormat')
313 format_id = supplier or conn_kind or protocol
314 if service:
315 format_id = '%s_%s' % (service, format_id)
316 # ASX playlist
317 if supplier == 'asx':
318 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
319 formats.append({
320 'url': ref,
321 'format_id': 'ref%s_%s' % (i, format_id),
322 })
323 elif transfer_format == 'dash':
324 formats.extend(self._extract_mpd_formats(
325 href, programme_id, mpd_id=format_id, fatal=False))
326 elif transfer_format == 'hls':
327 formats.extend(self._extract_m3u8_formats(
328 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
329 m3u8_id=format_id, fatal=False))
330 elif transfer_format == 'hds':
331 formats.extend(self._extract_f4m_formats(
332 href, programme_id, f4m_id=format_id, fatal=False))
333 else:
334 fmt = {
335 'format_id': format_id,
336 'filesize': file_size,
337 }
338 if kind == 'video':
339 fmt.update({
340 'width': width,
341 'height': height,
342 'vbr': bitrate,
343 'vcodec': encoding,
344 })
345 else:
346 fmt.update({
347 'abr': bitrate,
348 'acodec': encoding,
349 'vcodec': 'none',
350 })
351 if protocol == 'http':
352 # Direct link
353 fmt.update({
354 'url': href,
355 })
356 elif protocol == 'rtmp':
357 application = connection.get('application', 'ondemand')
358 auth_string = connection.get('authString')
359 identifier = connection.get('identifier')
360 server = connection.get('server')
361 fmt.update({
362 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
363 'play_path': identifier,
364 'app': '%s?%s' % (application, auth_string),
365 'page_url': 'http://www.bbc.co.uk',
366 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
367 'rtmp_live': False,
368 'ext': 'flv',
369 })
370 formats.append(fmt)
c056efa2 371 elif kind == 'captions':
f13b1e7d 372 subtitles = self.extract_subtitles(media, programme_id)
c056efa2 373 return formats, subtitles
2e3fd9ec 374
ae6986fb
S
375 def _download_playlist(self, playlist_id):
376 try:
377 playlist = self._download_json(
378 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
379 playlist_id, 'Downloading playlist JSON')
380
381 version = playlist.get('defaultAvailableVersion')
382 if version:
383 smp_config = version['smpConfig']
384 title = smp_config['title']
385 description = smp_config['summary']
386 for item in smp_config['items']:
387 kind = item['kind']
388 if kind != 'programme' and kind != 'radioProgramme':
389 continue
390 programme_id = item.get('vpid')
d97f5cd7 391 duration = int_or_none(item.get('duration'))
ae6986fb
S
392 formats, subtitles = self._download_media_selector(programme_id)
393 return programme_id, title, description, duration, formats, subtitles
394 except ExtractorError as ee:
f813928e 395 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
ae6986fb
S
396 raise
397
398 # fallback to legacy playlist
9afa1770
S
399 return self._process_legacy_playlist(playlist_id)
400
401 def _process_legacy_playlist_url(self, url, display_id):
402 playlist = self._download_legacy_playlist_url(url, display_id)
403 return self._extract_from_legacy_playlist(playlist, display_id)
404
405 def _process_legacy_playlist(self, playlist_id):
406 return self._process_legacy_playlist_url(
407 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
408
409 def _download_legacy_playlist_url(self, url, playlist_id=None):
410 return self._download_xml(
411 url, playlist_id, 'Downloading legacy playlist XML')
ae6986fb 412
9afa1770 413 def _extract_from_legacy_playlist(self, playlist, playlist_id):
e6174ee9 414 no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
ae6986fb
S
415 if no_items is not None:
416 reason = no_items.get('reason')
417 if reason == 'preAvailability':
418 msg = 'Episode %s is not yet available' % playlist_id
419 elif reason == 'postAvailability':
420 msg = 'Episode %s is no longer available' % playlist_id
421 elif reason == 'noMedia':
422 msg = 'Episode %s is not currently available' % playlist_id
423 else:
424 msg = 'Episode %s is not available: %s' % (playlist_id, reason)
425 raise ExtractorError(msg, expected=True)
426
427 for item in self._extract_items(playlist):
428 kind = item.get('kind')
429 if kind != 'programme' and kind != 'radioProgramme':
430 continue
e6174ee9
S
431 title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
432 description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
8daeeedc 433 description = description_el.text if description_el is not None else None
9afa1770
S
434
435 def get_programme_id(item):
436 def get_from_attributes(item):
437 for p in('identifier', 'group'):
438 value = item.get(p)
439 if value and re.match(r'^[pb][\da-z]{7}$', value):
440 return value
441 get_from_attributes(item)
e6174ee9 442 mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
9afa1770
S
443 if mediator is not None:
444 return get_from_attributes(mediator)
445
446 programme_id = get_programme_id(item)
d97f5cd7 447 duration = int_or_none(item.get('duration'))
e6174ee9
S
448
449 if programme_id:
450 formats, subtitles = self._download_media_selector(programme_id)
451 else:
452 formats, subtitles = self._process_media_selector(item, playlist_id)
453 programme_id = playlist_id
ae6986fb
S
454
455 return programme_id, title, description, duration, formats, subtitles
456
c056efa2
S
457 def _real_extract(self, url):
458 group_id = self._match_id(url)
459
460 webpage = self._download_webpage(url, group_id, 'Downloading video page')
461
8683b4d8 462 programme_id = None
679bacf0 463 duration = None
8683b4d8
S
464
465 tviplayer = self._search_regex(
466 r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
467 webpage, 'player', default=None)
468
469 if tviplayer:
470 player = self._parse_json(tviplayer, group_id).get('player', {})
471 duration = int_or_none(player.get('duration'))
472 programme_id = player.get('vpid')
473
474 if not programme_id:
475 programme_id = self._search_regex(
22d7368d 476 r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
8683b4d8 477
c056efa2 478 if programme_id:
c056efa2 479 formats, subtitles = self._download_media_selector(programme_id)
88fb59d9 480 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
50e989e2
S
481 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
482 r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
8683b4d8 483 description = self._search_regex(
a8534274
S
484 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
485 r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
679bacf0
YCH
486 webpage, 'description', default=None)
487 if not description:
488 description = self._html_search_meta('description', webpage)
c056efa2 489 else:
ae6986fb 490 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
2e3fd9ec 491
082c6c86
S
492 self._sort_formats(formats)
493
494 return {
2e3fd9ec 495 'id': programme_id,
082c6c86
S
496 'title': title,
497 'description': description,
650cfd0c 498 'thumbnail': self._og_search_thumbnail(webpage, default=None),
082c6c86
S
499 'duration': duration,
500 'formats': formats,
2e3fd9ec 501 'subtitles': subtitles,
5f6a1245 502 }
10273d6e 503
504
9afa1770
S
505class BBCIE(BBCCoUkIE):
506 IE_NAME = 'bbc'
507 IE_DESC = 'BBC'
508 _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
10273d6e 509
d12a1a47 510 _MEDIASELECTOR_URLS = [
55ebae26
S
511 # Provides HQ HLS streams but fails with geolocation in some cases when it's
512 # even not geo restricted at all
513 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
d12a1a47
S
514 # Provides more formats, namely direct mp4 links, but fails on some videos with
515 # notukerror for non UK (?) users (e.g.
516 # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
517 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
518 # Provides fewer formats, but works everywhere for everybody (hopefully)
519 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
520 ]
10273d6e 521
522 _TESTS = [{
6a747190 523 # article with multiple videos embedded with data-playable containing vpids
10273d6e 524 'url': 'http://www.bbc.com/news/world-europe-32668511',
525 'info_dict': {
526 'id': 'world-europe-32668511',
527 'title': 'Russia stages massive WW2 parade despite Western boycott',
9afa1770 528 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
10273d6e 529 },
530 'playlist_count': 2,
a3bfddfa 531 }, {
6a747190 532 # article with multiple videos embedded with data-playable (more videos)
10273d6e 533 'url': 'http://www.bbc.com/news/business-28299555',
534 'info_dict': {
535 'id': 'business-28299555',
536 'title': 'Farnborough Airshow: Video highlights',
9afa1770 537 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
10273d6e 538 },
539 'playlist_count': 9,
9afa1770 540 'skip': 'Save time',
88ed52ae
S
541 }, {
542 # article with multiple videos embedded with `new SMP()`
6a747190 543 # broken
88ed52ae
S
544 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
545 'info_dict': {
546 'id': '3662a707-0af9-3149-963f-47bea720b460',
b7d7674f 547 'title': 'BUGGER',
88ed52ae
S
548 },
549 'playlist_count': 18,
a3bfddfa 550 }, {
6a747190 551 # single video embedded with data-playable containing vpid
10273d6e 552 'url': 'http://www.bbc.com/news/world-europe-32041533',
10273d6e 553 'info_dict': {
554 'id': 'p02mprgb',
55ebae26 555 'ext': 'mp4',
10273d6e 556 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
55ebae26 557 'description': 'md5:2868290467291b37feda7863f7a83f54',
10273d6e 558 'duration': 47,
9afa1770 559 'timestamp': 1427219242,
da92eeae 560 'upload_date': '20150324',
10273d6e 561 },
562 'params': {
9afa1770 563 # rtmp download
10273d6e 564 'skip_download': True,
565 }
a3bfddfa 566 }, {
6a747190
S
567 # article with single video embedded with data-playable containing XML playlist
568 # with direct video links as progressiveDownloadUrl (for now these are extracted)
569 # and playlist with f4m and m3u8 as streamingUrl
de939d89 570 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
de939d89 571 'info_dict': {
9afa1770 572 'id': '150615_telabyad_kentin_cogu',
de939d89 573 'ext': 'mp4',
05087d1b
S
574 'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
575 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
9afa1770 576 'timestamp': 1434397334,
da92eeae 577 'upload_date': '20150615',
de939d89 578 },
579 'params': {
580 'skip_download': True,
581 }
c936d8cc 582 }, {
6a747190 583 # single video embedded with data-playable containing XML playlists (regional section)
de939d89 584 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 585 'info_dict': {
9afa1770 586 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
de939d89 587 'ext': 'mp4',
9afa1770 588 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
05087d1b 589 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
9afa1770 590 'timestamp': 1434713142,
da92eeae 591 'upload_date': '20150619',
de939d89 592 },
593 'params': {
594 'skip_download': True,
595 }
a346b1ff
S
596 }, {
597 # single video from video playlist embedded with vxp-playlist-data JSON
598 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
599 'info_dict': {
600 'id': 'p02w6qjc',
55ebae26 601 'ext': 'mp4',
a346b1ff
S
602 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
603 'duration': 56,
0bc4ee60 604 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
a346b1ff
S
605 },
606 'params': {
607 'skip_download': True,
608 }
9afa1770
S
609 }, {
610 # single video story with digitalData
611 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
612 'info_dict': {
613 'id': 'p02q6gc4',
614 'ext': 'flv',
615 'title': 'Sri Lanka’s spicy secret',
616 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
617 'timestamp': 1437674293,
618 'upload_date': '20150723',
619 },
620 'params': {
621 # rtmp download
622 'skip_download': True,
623 }
624 }, {
625 # single video story without digitalData
626 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
627 'info_dict': {
628 'id': 'p018zqqg',
55ebae26 629 'ext': 'mp4',
9afa1770
S
630 'title': 'Hyundai Santa Fe Sport: Rock star',
631 'description': 'md5:b042a26142c4154a6e472933cf20793d',
ae8bdfd1
S
632 'timestamp': 1415867444,
633 'upload_date': '20141113',
9afa1770
S
634 },
635 'params': {
636 # rtmp download
637 'skip_download': True,
638 }
639 }, {
6a747190 640 # single video with playlist.sxml URL in playlist param
9afa1770
S
641 'url': 'http://www.bbc.com/sport/0/football/33653409',
642 'info_dict': {
643 'id': 'p02xycnp',
55ebae26 644 'ext': 'mp4',
9afa1770 645 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
7033bc1a 646 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
9afa1770
S
647 'duration': 140,
648 },
649 'params': {
650 # rtmp download
651 'skip_download': True,
652 }
b5d48cb1 653 }, {
6a747190 654 # article with multiple videos embedded with playlist.sxml in playlist param
b5d48cb1
S
655 'url': 'http://www.bbc.com/sport/0/football/34475836',
656 'info_dict': {
657 'id': '34475836',
450b233c 658 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
8c65e4a5 659 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
b5d48cb1
S
660 },
661 'playlist_count': 3,
450b233c
S
662 }, {
663 # school report article with single video
664 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
665 'info_dict': {
666 'id': '35744779',
667 'title': 'School which breaks down barriers in Jerusalem',
668 },
669 'playlist_count': 1,
9afa1770
S
670 }, {
671 # single video with playlist URL from weather section
672 'url': 'http://www.bbc.com/weather/features/33601775',
673 'only_matching': True,
674 }, {
675 # custom redirection to www.bbc.com
676 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
677 'only_matching': True,
a1cf3e38
S
678 }, {
679 # single video article embedded with data-media-vpid
680 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
681 'only_matching': True,
10273d6e 682 }]
683
9afa1770
S
684 @classmethod
685 def suitable(cls, url):
ded7511a
S
686 EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
687 return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
688 else super(BBCIE, cls).suitable(url))
9afa1770
S
689
690 def _extract_from_media_meta(self, media_meta, video_id):
691 # Direct links to media in media metadata (e.g.
692 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
693 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
694 source_files = media_meta.get('sourceFiles')
695 if source_files:
696 return [{
697 'url': f['url'],
698 'format_id': format_id,
699 'ext': f.get('encoding'),
700 'tbr': float_or_none(f.get('bitrate'), 1000),
701 'filesize': int_or_none(f.get('filesize')),
702 } for format_id, f in source_files.items() if f.get('url')], []
703
704 programme_id = media_meta.get('externalId')
705 if programme_id:
706 return self._download_media_selector(programme_id)
707
708 # Process playlist.sxml as legacy playlist
709 href = media_meta.get('href')
710 if href:
711 playlist = self._download_legacy_playlist_url(href)
712 _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
713 return formats, subtitles
714
715 return [], []
716
baf39a1a
S
717 def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
718 programme_id, title, description, duration, formats, subtitles = \
719 self._process_legacy_playlist_url(url, playlist_id)
720 self._sort_formats(formats)
721 return {
722 'id': programme_id,
723 'title': title,
724 'description': description,
725 'duration': duration,
726 'timestamp': timestamp,
727 'formats': formats,
728 'subtitles': subtitles,
729 }
730
10273d6e 731 def _real_extract(self, url):
9afa1770
S
732 playlist_id = self._match_id(url)
733
734 webpage = self._download_webpage(url, playlist_id)
735
350e02d4
YCH
736 json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
737 timestamp = json_ld_info.get('timestamp')
0e832c2c 738
350e02d4 739 playlist_title = json_ld_info.get('title')
0e832c2c
S
740 if not playlist_title:
741 playlist_title = self._og_search_title(
742 webpage, default=None) or self._html_search_regex(
743 r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
744 if playlist_title:
745 playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
746
747 playlist_description = json_ld_info.get(
748 'description') or self._og_search_description(webpage, default=None)
ae8bdfd1
S
749
750 if not timestamp:
751 timestamp = parse_iso8601(self._search_regex(
752 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
753 r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
6f789365 754 r'"datePublished":\s*"([^"]+)'],
ae8bdfd1 755 webpage, 'date', default=None))
9afa1770 756
78f9d843
S
757 entries = []
758
de665713
S
759 # article with multiple videos embedded with playlist.sxml (e.g.
760 # http://www.bbc.com/sport/0/football/34475836)
761 playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
222e11d4 762 playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
de665713 763 if playlists:
baf39a1a
S
764 entries = [
765 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
766 for playlist_url in playlists]
de939d89 767
78f9d843
S
768 # news article with multiple videos embedded with data-playable
769 data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
770 if data_playables:
771 for _, data_playable_json in data_playables:
772 data_playable = self._parse_json(
773 unescapeHTML(data_playable_json), playlist_id, fatal=False)
774 if not data_playable:
775 continue
baf39a1a
S
776 settings = data_playable.get('settings', {})
777 if settings:
78f9d843
S
778 # data-playable with video vpid in settings.playlistObject.items (e.g.
779 # http://www.bbc.com/news/world-us-canada-34473351)
baf39a1a
S
780 playlist_object = settings.get('playlistObject', {})
781 if playlist_object:
782 items = playlist_object.get('items')
783 if items and isinstance(items, list):
78f9d843
S
784 title = playlist_object['title']
785 description = playlist_object.get('summary')
baf39a1a
S
786 duration = int_or_none(items[0].get('duration'))
787 programme_id = items[0].get('vpid')
78f9d843
S
788 formats, subtitles = self._download_media_selector(programme_id)
789 self._sort_formats(formats)
790 entries.append({
791 'id': programme_id,
792 'title': title,
793 'description': description,
794 'timestamp': timestamp,
795 'duration': duration,
796 'formats': formats,
797 'subtitles': subtitles,
798 })
799 else:
800 # data-playable without vpid but with a playlist.sxml URLs
801 # in otherSettings.playlist (e.g.
802 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
803 playlist = data_playable.get('otherSettings', {}).get('playlist', {})
804 if playlist:
a7e5f274
RA
805 entry = None
806 for key in ('streaming', 'progressiveDownload'):
05087d1b
S
807 playlist_url = playlist.get('%sUrl' % key)
808 if not playlist_url:
809 continue
810 try:
a7e5f274
RA
811 info = self._extract_from_playlist_sxml(
812 playlist_url, playlist_id, timestamp)
813 if not entry:
814 entry = info
815 else:
816 entry['title'] = info['title']
817 entry['formats'].extend(info['formats'])
05087d1b
S
818 except Exception as e:
819 # Some playlist URL may fail with 500, at the same time
820 # the other one may work fine (e.g.
821 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
822 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
823 continue
824 raise
a7e5f274
RA
825 if entry:
826 self._sort_formats(entry['formats'])
827 entries.append(entry)
78f9d843
S
828
829 if entries:
78f9d843
S
830 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
831
832 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
833 programme_id = self._search_regex(
a1cf3e38 834 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
22d7368d
S
835 r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
836 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
78f9d843 837 webpage, 'vpid', default=None)
dab062fb 838
9afa1770
S
839 if programme_id:
840 formats, subtitles = self._download_media_selector(programme_id)
841 self._sort_formats(formats)
842 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
843 digital_data = self._parse_json(
844 self._search_regex(
845 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
846 programme_id, fatal=False)
847 page_info = digital_data.get('page', {}).get('pageInfo', {})
848 title = page_info.get('pageName') or self._og_search_title(webpage)
849 description = page_info.get('description') or self._og_search_description(webpage)
850 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
851 return {
852 'id': programme_id,
853 'title': title,
854 'description': description,
855 'timestamp': timestamp,
856 'formats': formats,
857 'subtitles': subtitles,
858 }
a3bfddfa 859
88ed52ae
S
860 def extract_all(pattern):
861 return list(filter(None, map(
862 lambda s: self._parse_json(s, playlist_id, fatal=False),
863 re.findall(pattern, webpage))))
864
865 # Multiple video article (e.g.
866 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
22d7368d 867 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
88ed52ae
S
868 entries = []
869 for match in extract_all(r'new\s+SMP\(({.+?})\)'):
870 embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
871 if embed_url and re.match(EMBED_URL, embed_url):
872 entries.append(embed_url)
873 entries.extend(re.findall(
874 r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
875 if entries:
876 return self.playlist_result(
877 [self.url_result(entry, 'BBCCoUk') for entry in entries],
878 playlist_id, playlist_title, playlist_description)
9afa1770
S
879
880 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
88ed52ae 881 medias = extract_all(r"data-media-meta='({[^']+})'")
9afa1770
S
882
883 if not medias:
884 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
a346b1ff
S
885 media_asset = self._search_regex(
886 r'mediaAssetPage\.init\(\s*({.+?}), "/',
887 webpage, 'media asset', default=None)
888 if media_asset:
889 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
890 medias = []
891 for video in media_asset_page.get('videos', {}).values():
892 medias.extend(video.values())
893
894 if not medias:
895 # Multiple video playlist with single `now playing` entry (e.g.
896 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
897 vxp_playlist = self._parse_json(
9afa1770 898 self._search_regex(
a346b1ff
S
899 r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
900 webpage, 'playlist data'),
9afa1770 901 playlist_id)
a346b1ff
S
902 playlist_medias = []
903 for item in vxp_playlist:
904 media = item.get('media')
905 if not media:
906 continue
907 playlist_medias.append(media)
908 # Download single video if found media with asset id matching the video id from URL
909 if item.get('advert', {}).get('assetId') == playlist_id:
910 medias = [media]
911 break
912 # Fallback to the whole playlist
913 if not medias:
914 medias = playlist_medias
9afa1770
S
915
916 entries = []
917 for num, media_meta in enumerate(medias, start=1):
918 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
919 if not formats:
920 continue
10273d6e 921 self._sort_formats(formats)
922
9afa1770
S
923 video_id = media_meta.get('externalId')
924 if not video_id:
925 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
926
927 title = media_meta.get('caption')
928 if not title:
929 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
930
931 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
da92eeae 932
9afa1770
S
933 images = []
934 for image in media_meta.get('images', {}).values():
935 images.extend(image.values())
936 if 'image' in media_meta:
937 images.append(media_meta['image'])
938
939 thumbnails = [{
940 'url': image.get('href'),
941 'width': int_or_none(image.get('width')),
942 'height': int_or_none(image.get('height')),
943 } for image in images]
944
945 entries.append({
946 'id': video_id,
10273d6e 947 'title': title,
9afa1770 948 'thumbnails': thumbnails,
10273d6e 949 'duration': duration,
9afa1770 950 'timestamp': timestamp,
10273d6e 951 'formats': formats,
952 'subtitles': subtitles,
a3bfddfa 953 })
10273d6e 954
9afa1770 955 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
a65402ef
YCH
956
957
958class BBCCoUkArticleIE(InfoExtractor):
5886b38d 959 _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
a65402ef
YCH
960 IE_NAME = 'bbc.co.uk:article'
961 IE_DESC = 'BBC articles'
962
963 _TEST = {
964 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
965 'info_dict': {
966 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
967 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
968 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
969 },
970 'playlist_count': 4,
971 'add_ie': ['BBCCoUk'],
972 }
973
974 def _real_extract(self, url):
975 playlist_id = self._match_id(url)
976
977 webpage = self._download_webpage(url, playlist_id)
978
979 title = self._og_search_title(webpage)
980 description = self._og_search_description(webpage).strip()
981
982 entries = [self.url_result(programme_url) for programme_url in re.findall(
983 r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
984
985 return self.playlist_result(entries, playlist_id, title, description)
ded7511a
S
986
987
988class BBCCoUkPlaylistBaseIE(InfoExtractor):
989 def _real_extract(self, url):
990 playlist_id = self._match_id(url)
991
992 webpage = self._download_webpage(url, playlist_id)
993
994 entries = [
995 self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
996 for video_id in re.findall(
997 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)]
998
999 title, description = self._extract_title_and_description(webpage)
1000
1001 return self.playlist_result(entries, playlist_id, title, description)
1002
1003
1004class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
1005 IE_NAME = 'bbc.co.uk:iplayer:playlist'
9158af16 1006 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
ded7511a
S
1007 _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
1008 _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
9158af16 1009 _TESTS = [{
ded7511a
S
1010 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1011 'info_dict': {
1012 'id': 'b05rcz9v',
1013 'title': 'The Disappearance',
1014 'description': 'French thriller serial about a missing teenager.',
1015 },
1016 'playlist_mincount': 6,
c6668e4a 1017 'skip': 'This programme is not currently available on BBC iPlayer',
9158af16
S
1018 }, {
1019 # Available for over a year unlike 30 days for most other programmes
1020 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1021 'info_dict': {
1022 'id': 'p02tcc32',
1023 'title': 'Bohemian Icons',
1024 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1025 },
1026 'playlist_mincount': 10,
1027 }]
ded7511a
S
1028
1029 def _extract_title_and_description(self, webpage):
1030 title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
1031 description = self._search_regex(
1032 r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
1033 webpage, 'description', fatal=False, group='value')
1034 return title, description
1035
1036
1037class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1038 IE_NAME = 'bbc.co.uk:playlist'
1039 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
1040 _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1041 _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1042 _TESTS = [{
1043 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1044 'info_dict': {
1045 'id': 'b05rcz9v',
1046 'title': 'The Disappearance - Clips - BBC Four',
1047 'description': 'French thriller serial about a missing teenager.',
1048 },
1049 'playlist_mincount': 7,
1050 }, {
1051 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1052 'only_matching': True,
1053 }, {
1054 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1055 'only_matching': True,
1056 }, {
1057 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1058 'only_matching': True,
1059 }]
1060
1061 def _extract_title_and_description(self, webpage):
1062 title = self._og_search_title(webpage, fatal=False)
1063 description = self._og_search_description(webpage)
1064 return title, description