1 import xml
.etree
.ElementTree
7 from .common
import InfoExtractor
36 class BBCCoUkIE(InfoExtractor
):
38 IE_DESC
= 'BBC iPlayer'
39 _ID_REGEX
= r
'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
42 (?:www\.)?bbc\.co\.uk/
44 programmes/(?!articles/)|
45 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
46 music/(?:clips|audiovideo/popular)[/#]|
49 events/[^/]+/play/[^/]+/
51 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
54 _LOGIN_URL
= 'https://account.bbc.com/signin'
55 _NETRC_MACHINE
= 'bbc'
57 _MEDIA_SELECTOR_URL_TEMPL
= 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
59 # Provides HQ HLS streams with even better quality that pc mediaset but fails
60 # with geolocation in some cases when it's even not geo restricted at all (e.g.
61 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
66 _EMP_PLAYLIST_NS
= 'http://bbc.co.uk/2008/emp/playlist'
70 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
74 'title': 'Kaleidoscope, Leonard Cohen',
75 'description': 'The Canadian poet and songwriter reflects on his musical career.',
79 'skip_download': True,
83 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
87 'title': 'The Man in Black: Series 3: The Printed Name',
88 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
93 'skip_download': True,
95 'skip': 'Episode is no longer available on BBC iPlayer Radio',
98 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
102 'title': 'The Voice UK: Series 3: Blind Auditions 5',
103 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
108 'skip_download': True,
110 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
113 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
117 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
118 'description': '2. Invasion',
123 'skip_download': True,
125 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
127 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
131 'title': 'Pete Tong, The Essential New Tune Special',
132 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
137 'skip_download': True,
139 'skip': 'Episode is no longer available on BBC iPlayer Radio',
141 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
146 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
147 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
152 'skip_download': True,
155 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
160 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
161 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
166 'skip_download': True,
169 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
173 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
174 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
179 'skip_download': True,
181 'skip': 'geolocation',
183 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
187 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
188 'title': 'Royal Academy Summer Exhibition',
193 'skip_download': True,
195 'skip': 'geolocation',
197 # iptv-all mediaset fails with geolocation however there is no geo restriction
198 # for this programme at all
199 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
203 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
204 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
208 'skip_download': True,
210 'skip': 'Now it\'s really geo-restricted',
212 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
213 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
217 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
218 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
222 'skip_download': True,
225 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
230 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
231 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
236 'skip_download': True,
239 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
240 'only_matching': True,
242 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
243 'only_matching': True,
245 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
246 'only_matching': True,
248 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
249 'only_matching': True,
251 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
252 'only_matching': True,
254 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
255 'only_matching': True,
257 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
258 'only_matching': True,
260 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
261 'only_matching': True,
264 def _perform_login(self
, username
, password
):
265 login_page
= self
._download
_webpage
(
266 self
._LOGIN
_URL
, None, 'Downloading signin page')
268 login_form
= self
._hidden
_inputs
(login_page
)
271 'username': username
,
272 'password': password
,
275 post_url
= urljoin(self
._LOGIN
_URL
, self
._search
_regex
(
276 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page,
277 'post url
', default=self._LOGIN_URL, group='url
'))
279 response, urlh = self._download_webpage_handle(
280 post_url, None, 'Logging
in', data=urlencode_postdata(login_form),
281 headers={'Referer': self._LOGIN_URL})
283 if self._LOGIN_URL in urlh.geturl():
284 error = clean_html(get_element_by_class('form
-message
', response))
286 raise ExtractorError(
287 'Unable to login
: %s' % error, expected=True)
288 raise ExtractorError('Unable to log
in')
290 class MediaSelectionError(Exception):
291 def __init__(self, id):
294 def _extract_asx_playlist(self, connection, programme_id):
295 asx = self._download_xml(connection.get('href
'), programme_id, 'Downloading ASX playlist
')
296 return [ref.get('href
') for ref in asx.findall('./Entry
/ref
')]
298 def _extract_items(self, playlist):
299 return playlist.findall('./{%s}item
' % self._EMP_PLAYLIST_NS)
301 def _extract_medias(self, media_selection):
302 error = media_selection.get('result
')
304 raise BBCCoUkIE.MediaSelectionError(error)
305 return media_selection.get('media
') or []
307 def _extract_connections(self, media):
308 return media.get('connection
') or []
310 def _get_subtitles(self, media, programme_id):
312 for connection in self._extract_connections(media):
313 cc_url = url_or_none(connection.get('href
'))
316 captions = self._download_xml(
317 cc_url, programme_id, 'Downloading captions
', fatal=False)
318 if not isinstance(captions, xml.etree.ElementTree.Element):
322 'url
': connection.get('href
'),
329 def _raise_extractor_error(self, media_selection_error):
330 raise ExtractorError(
331 '%s returned error
: %s' % (self.IE_NAME, media_selection_error.id),
334 def _download_media_selector(self, programme_id):
335 last_exception = None
336 for media_set in self._MEDIA_SETS:
338 return self._download_media_selector_url(
339 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
340 except BBCCoUkIE.MediaSelectionError as e:
341 if e.id in ('notukerror
', 'geolocation
', 'selectionunavailable
'):
344 self._raise_extractor_error(e)
345 self._raise_extractor_error(last_exception)
347 def _download_media_selector_url(self, url, programme_id=None):
348 media_selection = self._download_json(
349 url, programme_id, 'Downloading media selection JSON
',
350 expected_status=(403, 404))
351 return self._process_media_selector(media_selection, programme_id)
353 def _process_media_selector(self, media_selection, programme_id):
358 for media in self._extract_medias(media_selection):
359 kind = media.get('kind
')
360 if kind in ('video
', 'audio
'):
361 bitrate = int_or_none(media.get('bitrate
'))
362 encoding = media.get('encoding
')
363 width = int_or_none(media.get('width
'))
364 height = int_or_none(media.get('height
'))
365 file_size = int_or_none(media.get('media_file_size
'))
366 for connection in self._extract_connections(media):
367 href = connection.get('href
')
372 conn_kind = connection.get('kind
')
373 protocol = connection.get('protocol
')
374 supplier = connection.get('supplier
')
375 transfer_format = connection.get('transferFormat
')
376 format_id = supplier or conn_kind or protocol
378 if supplier == 'asx
':
379 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
382 'format_id
': 'ref
%s_%s' % (i, format_id),
384 elif transfer_format == 'dash
':
385 formats.extend(self._extract_mpd_formats(
386 href, programme_id, mpd_id=format_id, fatal=False))
387 elif transfer_format == 'hls
':
388 # TODO: let expected_status be passed into _extract_xxx_formats() instead
390 fmts = self._extract_m3u8_formats(
391 href, programme_id, ext='mp4
', entry_protocol='m3u8_native
',
392 m3u8_id=format_id, fatal=False)
393 except ExtractorError as e:
394 if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
395 and e.exc_info[1].code in (403, 404)):
399 elif transfer_format == 'hds
':
400 formats.extend(self._extract_f4m_formats(
401 href, programme_id, f4m_id=format_id, fatal=False))
403 if not supplier and bitrate:
404 format_id += '-%d' % bitrate
406 'format_id
': format_id,
407 'filesize
': file_size,
422 if protocol in ('http
', 'https
'):
427 elif protocol == 'rtmp
':
428 application = connection.get('application
', 'ondemand
')
429 auth_string = connection.get('authString
')
430 identifier = connection.get('identifier
')
431 server = connection.get('server
')
433 'url
': '%s://%s/%s?
%s' % (protocol, server, application, auth_string),
434 'play_path
': identifier,
435 'app
': '%s?
%s' % (application, auth_string),
436 'page_url
': 'http
://www
.bbc
.co
.uk
',
437 'player_url
': 'http
://www
.bbc
.co
.uk
/emp
/releases
/iplayer
/revisions
/617463_618125_4/617463_618125_4_emp
.swf
',
444 elif kind == 'captions
':
445 subtitles = self.extract_subtitles(media, programme_id)
446 return formats, subtitles
448 def _download_playlist(self, playlist_id):
450 playlist = self._download_json(
451 'http
://www
.bbc
.co
.uk
/programmes
/%s/playlist
.json
' % playlist_id,
452 playlist_id, 'Downloading playlist JSON
')
456 for version in playlist.get('allAvailableVersions
', []):
457 smp_config = version['smpConfig
']
458 title = smp_config['title
']
459 description = smp_config['summary
']
460 for item in smp_config['items
']:
462 if kind not in ('programme
', 'radioProgramme
'):
464 programme_id = item.get('vpid
')
465 duration = int_or_none(item.get('duration
'))
466 version_formats, version_subtitles = self._download_media_selector(programme_id)
467 types = version['types
']
468 for f in version_formats:
469 f['format_note
'] = ', '.join(types)
470 if any('AudioDescribed
' in x for x in types):
471 f['language_preference
'] = -10
472 formats += version_formats
473 for tag, subformats in (version_subtitles or {}).items():
474 subtitles.setdefault(tag, []).extend(subformats)
476 return programme_id, title, description, duration, formats, subtitles
477 except ExtractorError as ee:
478 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
481 # fallback to legacy playlist
482 return self._process_legacy_playlist(playlist_id)
484 def _process_legacy_playlist_url(self, url, display_id):
485 playlist = self._download_legacy_playlist_url(url, display_id)
486 return self._extract_from_legacy_playlist(playlist, display_id)
488 def _process_legacy_playlist(self, playlist_id):
489 return self._process_legacy_playlist_url(
490 'http
://www
.bbc
.co
.uk
/iplayer
/playlist
/%s' % playlist_id, playlist_id)
492 def _download_legacy_playlist_url(self, url, playlist_id=None):
493 return self._download_xml(
494 url, playlist_id, 'Downloading legacy playlist XML
')
496 def _extract_from_legacy_playlist(self, playlist, playlist_id):
497 no_items = playlist.find('./{%s}noItems
' % self._EMP_PLAYLIST_NS)
498 if no_items is not None:
499 reason = no_items.get('reason
')
500 if reason == 'preAvailability
':
501 msg = 'Episode
%s is not yet available
' % playlist_id
502 elif reason == 'postAvailability
':
503 msg = 'Episode
%s is no longer available
' % playlist_id
504 elif reason == 'noMedia
':
505 msg = 'Episode
%s is not currently available
' % playlist_id
507 msg = 'Episode
%s is not available
: %s' % (playlist_id, reason)
508 raise ExtractorError(msg, expected=True)
510 for item in self._extract_items(playlist):
511 kind = item.get('kind
')
512 if kind not in ('programme
', 'radioProgramme
'):
514 title = playlist.find('./{%s}title
' % self._EMP_PLAYLIST_NS).text
515 description_el = playlist.find('./{%s}summary
' % self._EMP_PLAYLIST_NS)
516 description = description_el.text if description_el is not None else None
518 def get_programme_id(item):
519 def get_from_attributes(item):
520 for p in ('identifier
', 'group
'):
522 if value and re.match(r'^
[pb
][\da
-z
]{7}$
', value):
524 get_from_attributes(item)
525 mediator = item.find('./{%s}mediator
' % self._EMP_PLAYLIST_NS)
526 if mediator is not None:
527 return get_from_attributes(mediator)
529 programme_id = get_programme_id(item)
530 duration = int_or_none(item.get('duration
'))
533 formats, subtitles = self._download_media_selector(programme_id)
535 formats, subtitles = self._process_media_selector(item, playlist_id)
536 programme_id = playlist_id
538 return programme_id, title, description, duration, formats, subtitles
540 def _real_extract(self, url):
541 group_id = self._match_id(url)
543 webpage = self._download_webpage(url, group_id, 'Downloading video page
')
545 error = self._search_regex(
546 r'<div
\b[^
>]+\bclass
=["\'](?:smp|playout)__message delta["\'][^
>]*>\s
*([^
<]+?
)\s
*<',
547 webpage, 'error
', default=None)
549 raise ExtractorError(error, expected=True)
554 tviplayer = self._search_regex(
555 r'mediator\
.bind\
(({.+?}
)\s
*,\s
*document\
.getElementById
',
556 webpage, 'player
', default=None)
559 player = self._parse_json(tviplayer, group_id).get('player
', {})
560 duration = int_or_none(player.get('duration
'))
561 programme_id = player.get('vpid
')
564 programme_id = self._search_regex(
565 r'"vpid"\s
*:\s
*"(%s)"' % self._ID_REGEX, webpage, 'vpid
', fatal=False, default=None)
568 formats, subtitles = self._download_media_selector(programme_id)
569 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
570 (r'<h2
[^
>]+id="parent-title"[^
>]*>(.+?
)</h2
>',
571 r'<div
[^
>]+class="info"[^
>]*>\s
*<h1
>(.+?
)</h1
>'), webpage, 'title
')
572 description = self._search_regex(
573 (r'<p
class="[^"]*medium
-description
[^
"]*">([^
<]+)</p
>',
574 r'<div
[^
>]+class="info_+synopsis"[^
>]*>([^
<]+)</div
>'),
575 webpage, 'description
', default=None)
577 description = self._html_search_meta('description
', webpage)
579 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
581 self._sort_formats(formats)
586 'description
': description,
587 'thumbnail
': self._og_search_thumbnail(webpage, default=None),
588 'duration
': duration,
590 'subtitles
': subtitles,
594 class BBCIE(BBCCoUkIE):
597 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.(?
:com|co\
.uk
)/(?
:[^
/]+/)+(?P
<id>[^
/#?]+)'
601 'mobile-tablet-main',
605 # article with multiple videos embedded with data-playable containing vpids
606 'url': 'http://www.bbc.com/news/world-europe-32668511',
608 'id': 'world-europe-32668511',
609 'title': 'Russia stages massive WW2 parade',
610 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
614 # article with multiple videos embedded with data-playable (more videos)
615 'url': 'http://www.bbc.com/news/business-28299555',
617 'id': 'business-28299555',
618 'title': 'Farnborough Airshow: Video highlights',
619 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
624 # article with multiple videos embedded with `new SMP()`
626 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
628 'id': '3662a707-0af9-3149-963f-47bea720b460',
631 'playlist_count': 18,
633 # single video embedded with data-playable containing vpid
634 'url': 'http://www.bbc.com/news/world-europe-32041533',
638 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
639 'description': 'md5:2868290467291b37feda7863f7a83f54',
641 'timestamp': 1427219242,
642 'upload_date': '20150324',
646 'skip_download': True,
649 # article with single video embedded with data-playable containing XML playlist
650 # with direct video links as progressiveDownloadUrl (for now these are extracted)
651 # and playlist with f4m and m3u8 as streamingUrl
652 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
654 'id': '150615_telabyad_kentin_cogu',
656 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
657 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
658 'timestamp': 1434397334,
659 'upload_date': '20150615',
662 'skip_download': True,
665 # single video embedded with data-playable containing XML playlists (regional section)
666 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
668 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
670 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
671 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
672 'timestamp': 1434713142,
673 'upload_date': '20150619',
676 'skip_download': True,
679 # single video from video playlist embedded with vxp-playlist-data JSON
680 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
684 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
686 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
689 'skip_download': True,
692 # single video story with digitalData
693 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
697 'title': 'Sri Lanka’s spicy secret',
698 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
699 'timestamp': 1437674293,
700 'upload_date': '20150723',
704 'skip_download': True,
707 # single video story without digitalData
708 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
712 'title': 'Hyundai Santa Fe Sport: Rock star',
713 'description': 'md5:b042a26142c4154a6e472933cf20793d',
714 'timestamp': 1415867444,
715 'upload_date': '20141113',
719 'skip_download': True,
722 # single video embedded with Morph
723 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
727 'title': "Nigeria v Japan - Men's First Round",
728 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
730 'uploader': 'BBC Sport',
731 'uploader_id': 'bbc_sport',
735 'skip_download': True,
737 'skip': 'Georestricted to UK',
739 # single video with playlist.sxml URL in playlist param
740 'url': 'http://www.bbc.com/sport/0/football/33653409',
744 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
745 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
750 'skip_download': True,
753 # article with multiple videos embedded with playlist.sxml in playlist param
754 'url': 'http://www.bbc.com/sport/0/football/34475836',
757 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
758 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
762 # school report article with single video
763 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
766 'title': 'School which breaks down barriers in Jerusalem',
770 # single video with playlist URL from weather section
771 'url': 'http://www.bbc.com/weather/features/33601775',
772 'only_matching': True,
774 # custom redirection to www.bbc.com
775 # also, video with window.__INITIAL_DATA__
776 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
780 'title': "Pluto may have 'nitrogen glaciers'",
781 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
782 'thumbnail': r
're:https?://.+/.+\.jpg',
783 'timestamp': 1437785037,
784 'upload_date': '20150725',
787 # video with window.__INITIAL_DATA__ and value as JSON string
788 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
792 'title': 'Why France is making this woman a national hero',
793 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
794 'thumbnail': r
're:https?://.+/.+\.jpg',
795 'timestamp': 1638230731,
796 'upload_date': '20211130',
799 # single video article embedded with data-media-vpid
800 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
801 'only_matching': True,
804 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
808 'title': 'Things Not To Say to people that live on council estates',
809 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
811 'thumbnail': r
're:https?://.+/.+\.jpg',
814 # window.__PRELOADED_STATE__
815 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
819 'title': 'Prom 6: An American in Paris and Turangalila',
820 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
821 'uploader': 'Radio 3',
822 'uploader_id': 'bbc_radio_three',
825 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
829 'title': 'md5:2fabf12a726603193a2879a055f72514',
830 'description': 'Learn English words and phrases from this story',
832 'add_ie': [BBCCoUkIE
.ie_key()],
835 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
839 'title': 'How positive thinking is harming your happiness',
840 'alt_title': 'The downsides of positive thinking',
841 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
843 'thumbnail': r
're:https?://.+/p07c9dsr.jpg',
844 'upload_date': '20190604',
845 'categories': ['Psychology'],
850 def suitable(cls
, url
):
851 EXCLUDE_IE
= (BBCCoUkIE
, BBCCoUkArticleIE
, BBCCoUkIPlayerEpisodesIE
, BBCCoUkIPlayerGroupIE
, BBCCoUkPlaylistIE
)
852 return (False if any(ie
.suitable(url
) for ie
in EXCLUDE_IE
)
853 else super(BBCIE
, cls
).suitable(url
))
855 def _extract_from_media_meta(self
, media_meta
, video_id
):
856 # Direct links to media in media metadata (e.g.
857 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
858 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
859 source_files
= media_meta
.get('sourceFiles')
863 'format_id': format_id
,
864 'ext': f
.get('encoding'),
865 'tbr': float_or_none(f
.get('bitrate'), 1000),
866 'filesize': int_or_none(f
.get('filesize')),
867 } for format_id
, f
in source_files
.items() if f
.get('url')], []
869 programme_id
= media_meta
.get('externalId')
871 return self
._download
_media
_selector
(programme_id
)
873 # Process playlist.sxml as legacy playlist
874 href
= media_meta
.get('href')
876 playlist
= self
._download
_legacy
_playlist
_url
(href
)
877 _
, _
, _
, _
, formats
, subtitles
= self
._extract
_from
_legacy
_playlist
(playlist
, video_id
)
878 return formats
, subtitles
882 def _extract_from_playlist_sxml(self
, url
, playlist_id
, timestamp
):
883 programme_id
, title
, description
, duration
, formats
, subtitles
= \
884 self
._process
_legacy
_playlist
_url
(url
, playlist_id
)
885 self
._sort
_formats
(formats
)
889 'description': description
,
890 'duration': duration
,
891 'timestamp': timestamp
,
893 'subtitles': subtitles
,
896 def _real_extract(self
, url
):
897 playlist_id
= self
._match
_id
(url
)
899 webpage
= self
._download
_webpage
(url
, playlist_id
)
901 json_ld_info
= self
._search
_json
_ld
(webpage
, playlist_id
, default
={})
902 timestamp
= json_ld_info
.get('timestamp')
904 playlist_title
= json_ld_info
.get('title')
905 if not playlist_title
:
906 playlist_title
= (self
._og
_search
_title
(webpage
, default
=None)
907 or self
._html
_extract
_title
(webpage
, 'playlist title', default
=None))
909 playlist_title
= re
.sub(r
'(.+)\s*-\s*BBC.*?$', r
'\1', playlist_title
).strip()
911 playlist_description
= json_ld_info
.get(
912 'description') or self
._og
_search
_description
(webpage
, default
=None)
915 timestamp
= parse_iso8601(self
._search
_regex
(
916 [r
'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
917 r
'itemprop="datePublished"[^>]+datetime="([^"]+)"',
918 r
'"datePublished":\s*"([^"]+)'],
919 webpage
, 'date', default
=None))
923 # article with multiple videos embedded with playlist.sxml (e.g.
924 # http://www.bbc.com/sport/0/football/34475836)
925 playlists
= re
.findall(r
'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage
)
926 playlists
.extend(re
.findall(r
'data-media-id="([^"]+/playlist\.sxml)"', webpage
))
929 self
._extract
_from
_playlist
_sxml
(playlist_url
, playlist_id
, timestamp
)
930 for playlist_url
in playlists
]
932 # news article with multiple videos embedded with data-playable
933 data_playables
= re
.findall(r
'data-playable=(["\'])({.+?}
)\
1', webpage)
935 for _, data_playable_json in data_playables:
936 data_playable = self._parse_json(
937 unescapeHTML(data_playable_json), playlist_id, fatal=False)
938 if not data_playable:
940 settings = data_playable.get('settings
', {})
942 # data-playable with video vpid in settings.playlistObject.items (e.g.
943 # http://www.bbc.com/news/world-us-canada-34473351)
944 playlist_object = settings.get('playlistObject
', {})
946 items = playlist_object.get('items
')
947 if items and isinstance(items, list):
948 title = playlist_object['title
']
949 description = playlist_object.get('summary
')
950 duration = int_or_none(items[0].get('duration
'))
951 programme_id = items[0].get('vpid
')
952 formats, subtitles = self._download_media_selector(programme_id)
953 self._sort_formats(formats)
957 'description
': description,
958 'timestamp
': timestamp,
959 'duration
': duration,
961 'subtitles
': subtitles,
964 # data-playable without vpid but with a playlist.sxml URLs
965 # in otherSettings.playlist (e.g.
966 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
967 playlist = data_playable.get('otherSettings
', {}).get('playlist', {})
970 for key in ('streaming
', 'progressiveDownload
'):
971 playlist_url = playlist.get('%sUrl
' % key)
975 info = self._extract_from_playlist_sxml(
976 playlist_url, playlist_id, timestamp)
980 entry['title
'] = info['title
']
981 entry['formats
'].extend(info['formats
'])
982 except ExtractorError as e:
983 # Some playlist URL may fail with 500, at the same time
984 # the other one may work fine (e.g.
985 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
986 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
990 self._sort_formats(entry['formats
'])
991 entries.append(entry)
994 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
996 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
997 group_id = self._search_regex(
998 r'<div
[^
>]+\bclass
=["\']video["\'][^
>]+\bdata
-pid
=["\'](%s)' % self._ID_REGEX,
999 webpage, 'group id', default=None)
1001 return self.url_result(
1002 'https://www.bbc.co.uk/programmes/%s' % group_id,
1003 ie=BBCCoUkIE.ie_key())
1005 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1006 programme_id = self._search_regex(
1007 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
1008 r'<param[^>]+name="externalIdentifier
"[^>]+value="(%s)"' % self._ID_REGEX,
1009 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
1010 webpage, 'vpid', default=None)
1013 formats, subtitles = self._download_media_selector(programme_id)
1014 self._sort_formats(formats)
1015 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1016 digital_data = self._parse_json(
1018 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1019 programme_id, fatal=False)
1020 page_info = digital_data.get('page', {}).get('pageInfo', {})
1021 title = page_info.get('pageName') or self._og_search_title(webpage)
1022 description = page_info.get('description') or self._og_search_description(webpage)
1023 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1027 'description': description,
1028 'timestamp': timestamp,
1030 'subtitles': subtitles,
1033 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1034 initial_data = self._parse_json(self._html_search_regex(
1035 r'<script[^>]+id=(["\'])initial
-data\
1[^
>]+data
-json
=(["\'])(?P<json>(?:(?!\2).)+)',
1036 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1038 init_data = try_get(
1039 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1040 smp_data = init_data.get('smpData') or {}
1041 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1042 version_id = clip_data.get('versionID')
1044 title = smp_data['title']
1045 formats, subtitles = self._download_media_selector(version_id)
1046 self._sort_formats(formats)
1047 image_url = smp_data.get('holdingImageURL')
1048 display_date = init_data.get('displayDate')
1049 topic_title = init_data.get('topicTitle')
1055 'alt_title': init_data.get('shortTitle'),
1056 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1057 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1058 'upload_date': display_date.replace('-', '') if display_date else None,
1059 'subtitles': subtitles,
1060 'duration': int_or_none(clip_data.get('duration')),
1061 'categories': [topic_title] if topic_title else None,
1064 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1065 # There are several setPayload calls may be present but the video
1066 # seems to be always related to the first one
1067 morph_payload = self._parse_json(
1069 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1070 webpage, 'morph payload', default='{}'),
1071 playlist_id, fatal=False)
1073 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1074 for component in components:
1075 if not isinstance(component, dict):
1077 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1080 identifiers = lead_media.get('identifiers')
1081 if not identifiers or not isinstance(identifiers, dict):
1083 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1084 if not programme_id:
1086 title = lead_media.get('title') or self._og_search_title(webpage)
1087 formats, subtitles = self._download_media_selector(programme_id)
1088 self._sort_formats(formats)
1089 description = lead_media.get('summary')
1090 uploader = lead_media.get('masterBrand')
1091 uploader_id = lead_media.get('mid')
1093 duration_d = lead_media.get('duration')
1094 if isinstance(duration_d, dict):
1095 duration = parse_duration(dict_get(
1096 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1100 'description': description,
1101 'duration': duration,
1102 'uploader': uploader,
1103 'uploader_id': uploader_id,
1105 'subtitles': subtitles,
1108 preload_state = self._parse_json(self._search_regex(
1109 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1110 'preload state', default='{}'), playlist_id, fatal=False)
1112 current_programme = preload_state.get('programmes', {}).get('current') or {}
1113 programme_id = current_programme.get('id')
1114 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1115 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1116 formats, subtitles = self._download_media_selector(programme_id)
1117 self._sort_formats(formats)
1118 synopses = current_programme.get('synopses') or {}
1119 network = current_programme.get('network') or {}
1120 duration = int_or_none(
1121 current_programme.get('duration', {}).get('value'))
1123 image_url = current_programme.get('image_url')
1125 thumbnail = image_url.replace('{recipe}', 'raw')
1129 'description': dict_get(synopses, ('long', 'medium', 'short')),
1130 'thumbnail': thumbnail,
1131 'duration': duration,
1132 'uploader': network.get('short_title'),
1133 'uploader_id': network.get('id'),
1135 'subtitles': subtitles,
1138 bbc3_config = self._parse_json(
1140 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1141 'bbcthree config', default='{}'),
1142 playlist_id, transform_source=js_to_json, fatal=False) or {}
1143 payload = bbc3_config.get('payload') or {}
1145 clip = payload.get('currentClip') or {}
1146 clip_vpid = clip.get('vpid')
1147 clip_title = clip.get('title')
1148 if clip_vpid and clip_title:
1149 formats, subtitles = self._download_media_selector(clip_vpid)
1150 self._sort_formats(formats)
1153 'title': clip_title,
1154 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1155 'description': clip.get('description'),
1156 'duration': parse_duration(clip.get('duration')),
1158 'subtitles': subtitles,
1160 bbc3_playlist = try_get(
1161 payload, lambda x: x['content']['bbcMedia']['playlist'],
1164 playlist_title = bbc3_playlist.get('title') or playlist_title
1165 thumbnail = bbc3_playlist.get('holdingImageURL')
1167 for bbc3_item in bbc3_playlist['items']:
1168 programme_id = bbc3_item.get('versionID')
1169 if not programme_id:
1171 formats, subtitles = self._download_media_selector(programme_id)
1172 self._sort_formats(formats)
1175 'title': playlist_title,
1176 'thumbnail': thumbnail,
1177 'timestamp': timestamp,
1179 'subtitles': subtitles,
1181 return self.playlist_result(
1182 entries, playlist_id, playlist_title, playlist_description)
1184 initial_data = self._search_regex(
1185 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}
")\s*;', webpage,
1186 'quoted preload state', default=None)
1187 if initial_data is None:
1188 initial_data = self._search_regex(
1189 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1190 'preload state', default={})
1192 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1193 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
1195 def parse_media(media):
1198 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1199 item_id = item.get('id')
1200 item_title = item.get('title')
1201 if not (item_id and item_title):
1203 formats, subtitles = self._download_media_selector(item_id)
1204 self._sort_formats(formats)
1206 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1209 for block in blocks:
1210 text = try_get(block, lambda x: x['model']['text'], compat_str)
1212 summary.append(text)
1214 item_desc = '\n\n'.join(summary)
1216 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1217 if try_get(meta, lambda x: x['label']) == 'Published':
1218 item_time = unified_timestamp(meta.get('timestamp'))
1222 'title': item_title,
1223 'thumbnail': item.get('holdingImageUrl'),
1225 'subtitles': subtitles,
1226 'timestamp': item_time,
1227 'description': strip_or_none(item_desc),
1229 for resp in (initial_data.get('data') or {}).values():
1230 name = resp.get('name')
1231 if name == 'media-experience':
1232 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1233 elif name == 'article':
1234 for block in (try_get(resp,
1235 (lambda x: x['data']['blocks'],
1236 lambda x: x['data']['content']['model']['blocks'],),
1238 if block.get('type') != 'media':
1240 parse_media(block.get('model'))
1241 return self.playlist_result(
1242 entries, playlist_id, playlist_title, playlist_description)
1244 def extract_all(pattern):
1245 return list(filter(None, map(
1246 lambda s: self._parse_json(s, playlist_id, fatal=False),
1247 re.findall(pattern, webpage))))
1249 # Multiple video article (e.g.
1250 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
1251 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?
' % self._ID_REGEX
1253 for match in extract_all(r'new\s
+SMP\
(({.+?}
)\
)'):
1254 embed_url = match.get('playerSettings
', {}).get('externalEmbedUrl
')
1255 if embed_url and re.match(EMBED_URL, embed_url):
1256 entries.append(embed_url)
1257 entries.extend(re.findall(
1258 r'setPlaylist\
("(%s)"\
)' % EMBED_URL, webpage))
1260 return self.playlist_result(
1261 [self.url_result(entry_, 'BBCCoUk
') for entry_ in entries],
1262 playlist_id, playlist_title, playlist_description)
1264 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
1265 medias = extract_all(r"data-media-meta='({[^']+}
)'")
1268 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
1269 media_asset = self._search_regex(
1270 r'mediaAssetPage\
.init\
(\s
*({.+?}
), "/',
1271 webpage, 'media asset', default=None)
1273 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1275 for video in media_asset_page.get('videos', {}).values():
1276 medias.extend(video.values())
1279 # Multiple video playlist with single `now playing` entry (e.g.
1280 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1281 vxp_playlist = self._parse_json(
1283 r'<script[^>]+class="vxp
-playlist
-data
"[^>]+type="application
/json
"[^>]*>([^<]+)</script>',
1284 webpage, 'playlist data'),
1286 playlist_medias = []
1287 for item in vxp_playlist:
1288 media = item.get('media')
1291 playlist_medias.append(media)
1292 # Download single video if found media with asset id matching the video id from URL
1293 if item.get('advert', {}).get('assetId') == playlist_id:
1296 # Fallback to the whole playlist
1298 medias = playlist_medias
1301 for num, media_meta in enumerate(medias, start=1):
1302 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
1303 if not formats and not self.get_param('ignore_no_formats'):
1305 self._sort_formats(formats)
1307 video_id = media_meta.get('externalId')
1309 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1311 title = media_meta.get('caption')
1313 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1315 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
1318 for image in media_meta.get('images', {}).values():
1319 images.extend(image.values())
1320 if 'image' in media_meta:
1321 images.append(media_meta['image'])
1324 'url': image.get('href'),
1325 'width': int_or_none(image.get('width')),
1326 'height': int_or_none(image.get('height')),
1327 } for image in images]
1332 'thumbnails': thumbnails,
1333 'duration': duration,
1334 'timestamp': timestamp,
1336 'subtitles': subtitles,
1339 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1342 class BBCCoUkArticleIE(InfoExtractor):
1343 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
1344 IE_NAME = 'bbc.co.uk:article'
1345 IE_DESC = 'BBC articles'
1348 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1350 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1351 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1352 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1354 'playlist_count': 4,
1355 'add_ie': ['BBCCoUk'],
1358 def _real_extract(self, url):
1359 playlist_id = self._match_id(url)
1361 webpage = self._download_webpage(url, playlist_id)
1363 title = self._og_search_title(webpage)
1364 description = self._og_search_description(webpage).strip()
1366 entries = [self.url_result(programme_url) for programme_url in re.findall(
1367 r'<div[^>]+typeof="Clip
"[^>]+resource="([^
"]+)"', webpage)]
1369 return self.playlist_result(entries, playlist_id, title, description)
1372 class BBCCoUkPlaylistBaseIE(InfoExtractor):
1373 def _entries(self, webpage, url, playlist_id):
1374 single_page = 'page
' in compat_urlparse.parse_qs(
1375 compat_urlparse.urlparse(url).query)
1376 for page_num in itertools.count(2):
1377 for video_id in re.findall(
1378 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1379 yield self.url_result(
1380 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1383 next_page = self._search_regex(
1384 r'<li
[^
>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P
<url
>(?
:(?
!\
2).)+)\
2',
1385 webpage, 'next page url
', default=None, group='url
')
1388 webpage = self._download_webpage(
1389 compat_urlparse.urljoin(url, next_page), playlist_id,
1390 'Downloading page
%d' % page_num, page_num)
1392 def _real_extract(self, url):
1393 playlist_id = self._match_id(url)
1395 webpage = self._download_webpage(url, playlist_id)
1397 title, description = self._extract_title_and_description(webpage)
1399 return self.playlist_result(
1400 self._entries(webpage, url, playlist_id),
1401 playlist_id, title, description)
1404 class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1405 _VALID_URL_TMPL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/iplayer
/%%s/(?P
<id>%s)' % BBCCoUkIE._ID_REGEX
1408 def _get_default(episode, key, default_key='default
'):
1409 return try_get(episode, lambda x: x[key][default_key])
1411 def _get_description(self, data):
1412 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1413 return dict_get(synopsis, ('large
', 'medium
', 'small
'))
1415 def _fetch_page(self, programme_id, per_page, series_id, page):
1416 elements = self._get_elements(self._call_api(
1417 programme_id, per_page, page + 1, series_id))
1418 for element in elements:
1419 episode = self._get_episode(element)
1420 episode_id = episode.get('id')
1424 image = self._get_episode_image(episode)
1426 thumbnail = image.replace('{recipe}
', 'raw
')
1427 category = self._get_default(episode, 'labels
', 'category
')
1431 'title
': self._get_episode_field(episode, 'subtitle
'),
1432 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episode
/' + episode_id,
1433 'thumbnail
': thumbnail,
1434 'description
': self._get_description(episode),
1435 'categories
': [category] if category else None,
1436 'series
': self._get_episode_field(episode, 'title
'),
1437 'ie_key
': BBCCoUkIE.ie_key(),
1440 def _real_extract(self, url):
1441 pid = self._match_id(url)
1443 series_id = qs.get('seriesId
', [None])[0]
1444 page = qs.get('page
', [None])[0]
1445 per_page = 36 if page else self._PAGE_SIZE
1446 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1447 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1448 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1449 return self.playlist_result(
1450 entries, pid, self._get_playlist_title(playlist_data),
1451 self._get_description(playlist_data))
1454 class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1455 IE_NAME = 'bbc
.co
.uk
:iplayer
:episodes
'
1456 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes
'
1458 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/episodes
/b05rcz9v
',
1461 'title
': 'The Disappearance
',
1462 'description
': 'md5
:58eb101aee3116bad4da05f91179c0cb
',
1464 'playlist_mincount
': 8,
1467 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster
',
1470 'title
': 'Doctor Foster
',
1471 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1473 'playlist_mincount
': 10,
1476 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster?seriesId
=b094m6nv
',
1479 'title
': 'Doctor Foster
',
1480 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1482 'playlist_mincount
': 5,
1485 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove
',
1488 'title
': 'Beechgrove
',
1489 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1491 'playlist_mincount
': 37,
1494 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove?page
=2',
1497 'title
': 'Beechgrove
',
1498 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1500 'playlist_mincount
': 1,
1503 _DESCRIPTION_KEY = 'synopsis
'
1505 def _get_episode_image(self, episode):
1506 return self._get_default(episode, 'image
')
1508 def _get_episode_field(self, episode, field):
1509 return self._get_default(episode, field)
1512 def _get_elements(data):
1513 return data['entities
']['results
']
1516 def _get_episode(element):
1517 return element.get('episode
') or {}
1519 def _call_api(self, pid, per_page, page=1, series_id=None):
1523 'perPage
': per_page,
1526 variables['sliceId
'] = series_id
1527 return self._download_json(
1528 'https
://graph
.ibl
.api
.bbc
.co
.uk
/', pid, headers={
1529 'Content
-Type
': 'application
/json
'
1530 }, data=json.dumps({
1531 'id': '5692d93d5aac8d796a0305e895e61551
',
1532 'variables
': variables,
1533 }).encode('utf
-8'))['data
']['programme
']
1536 def _get_playlist_data(data):
1539 def _get_playlist_title(self, data):
1540 return self._get_default(data, 'title
')
1543 class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1544 IE_NAME = 'bbc
.co
.uk
:iplayer
:group
'
1545 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group
'
1547 # Available for over a year unlike 30 days for most other programmes
1548 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/group
/p02tcc32
',
1551 'title
': 'Bohemian Icons
',
1552 'description
': 'md5
:683e901041b2fe9ba596f2ab04c4dbe7
',
1554 'playlist_mincount
': 10,
1557 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7
',
1560 'title
': 'Music
in Scotland
',
1561 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1563 'playlist_mincount
': 47,
1566 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7?page
=2',
1569 'title
': 'Music
in Scotland
',
1570 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1572 'playlist_mincount
': 11,
1575 _DESCRIPTION_KEY = 'synopses
'
1577 def _get_episode_image(self, episode):
1578 return self._get_default(episode, 'images
', 'standard
')
1580 def _get_episode_field(self, episode, field):
1581 return episode.get(field)
1584 def _get_elements(data):
1585 return data['elements
']
1588 def _get_episode(element):
1591 def _call_api(self, pid, per_page, page=1, series_id=None):
1592 return self._download_json(
1593 'http
://ibl
.api
.bbc
.co
.uk
/ibl
/v1
/groups
/%s/episodes
' % pid,
1596 'per_page
': per_page,
1597 })['group_episodes
']
1600 def _get_playlist_data(data):
1601 return data['group
']
1603 def _get_playlist_title(self, data):
1604 return data.get('title
')
1607 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1608 IE_NAME = 'bbc
.co
.uk
:playlist
'
1609 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/programmes
/(?P
<id>%s)/(?
:episodes|broadcasts|clips
)' % BBCCoUkIE._ID_REGEX
1610 _URL_TEMPLATE = 'http
://www
.bbc
.co
.uk
/programmes
/%s'
1611 _VIDEO_ID_TEMPLATE = r'data
-pid
=["\'](%s)'
1613 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1616 'title': 'The Disappearance - Clips - BBC Four',
1617 'description': 'French thriller serial about a missing teenager.',
1619 'playlist_mincount': 7,
1621 # multipage playlist, explicit page
1622 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1625 'title': 'Frozen Planet - Clips - BBC One',
1626 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1628 'playlist_mincount': 24,
1630 # multipage playlist, all pages
1631 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1634 'title': 'Frozen Planet - Clips - BBC One',
1635 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1637 'playlist_mincount': 142,
1639 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1640 'only_matching': True,
1642 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1643 'only_matching': True,
1645 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1646 'only_matching': True,
1649 def _extract_title_and_description(self, webpage):
1650 title = self._og_search_title(webpage, fatal=False)
1651 description = self._og_search_description(webpage)
1652 return title, description