6 import xml
.etree
.ElementTree
8 from .common
import InfoExtractor
9 from ..compat
import compat_HTTPError
, compat_str
, compat_urlparse
32 class BBCCoUkIE(InfoExtractor
):
34 IE_DESC
= 'BBC iPlayer'
35 _ID_REGEX
= r
'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
38 (?:www\.)?bbc\.co\.uk/
40 programmes/(?!articles/)|
41 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
42 music/(?:clips|audiovideo/popular)[/#]|
45 events/[^/]+/play/[^/]+/
47 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
50 _LOGIN_URL
= 'https://account.bbc.com/signin'
51 _NETRC_MACHINE
= 'bbc'
53 _MEDIA_SELECTOR_URL_TEMPL
= 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
55 # Provides HQ HLS streams with even better quality that pc mediaset but fails
56 # with geolocation in some cases when it's even not geo restricted at all (e.g.
57 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
62 _EMP_PLAYLIST_NS
= 'http://bbc.co.uk/2008/emp/playlist'
66 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
70 'title': 'Kaleidoscope, Leonard Cohen',
71 'description': 'The Canadian poet and songwriter reflects on his musical career.',
75 'skip_download': True,
79 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
83 'title': 'The Man in Black: Series 3: The Printed Name',
84 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
89 'skip_download': True,
91 'skip': 'Episode is no longer available on BBC iPlayer Radio',
94 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
98 'title': 'The Voice UK: Series 3: Blind Auditions 5',
99 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
104 'skip_download': True,
106 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
109 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
113 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
114 'description': '2. Invasion',
119 'skip_download': True,
121 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
123 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
127 'title': 'Pete Tong, The Essential New Tune Special',
128 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
133 'skip_download': True,
135 'skip': 'Episode is no longer available on BBC iPlayer Radio',
137 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
142 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
143 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
148 'skip_download': True,
151 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
156 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
157 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
162 'skip_download': True,
165 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
169 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
170 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
175 'skip_download': True,
177 'skip': 'geolocation',
179 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
183 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
184 'title': 'Royal Academy Summer Exhibition',
189 'skip_download': True,
191 'skip': 'geolocation',
193 # iptv-all mediaset fails with geolocation however there is no geo restriction
194 # for this programme at all
195 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
199 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
200 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
204 'skip_download': True,
206 'skip': 'Now it\'s really geo-restricted',
208 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
209 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
213 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
214 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
218 'skip_download': True,
221 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
226 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
227 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
232 'skip_download': True,
235 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
236 'only_matching': True,
238 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
239 'only_matching': True,
241 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
242 'only_matching': True,
244 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
245 'only_matching': True,
247 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
248 'only_matching': True,
250 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
251 'only_matching': True,
253 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
254 'only_matching': True,
256 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
257 'only_matching': True,
260 def _perform_login(self
, username
, password
):
261 login_page
= self
._download
_webpage
(
262 self
._LOGIN
_URL
, None, 'Downloading signin page')
264 login_form
= self
._hidden
_inputs
(login_page
)
267 'username': username
,
268 'password': password
,
271 post_url
= urljoin(self
._LOGIN
_URL
, self
._search
_regex
(
272 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page,
273 'post url
', default=self._LOGIN_URL, group='url
'))
275 response, urlh = self._download_webpage_handle(
276 post_url, None, 'Logging
in', data=urlencode_postdata(login_form),
277 headers={'Referer': self._LOGIN_URL})
279 if self._LOGIN_URL in urlh.geturl():
280 error = clean_html(get_element_by_class('form
-message
', response))
282 raise ExtractorError(
283 'Unable to login
: %s' % error, expected=True)
284 raise ExtractorError('Unable to log
in')
286 class MediaSelectionError(Exception):
287 def __init__(self, id):
290 def _extract_asx_playlist(self, connection, programme_id):
291 asx = self._download_xml(connection.get('href
'), programme_id, 'Downloading ASX playlist
')
292 return [ref.get('href
') for ref in asx.findall('./Entry
/ref
')]
294 def _extract_items(self, playlist):
295 return playlist.findall('./{%s}item
' % self._EMP_PLAYLIST_NS)
297 def _extract_medias(self, media_selection):
298 error = media_selection.get('result
')
300 raise BBCCoUkIE.MediaSelectionError(error)
301 return media_selection.get('media
') or []
303 def _extract_connections(self, media):
304 return media.get('connection
') or []
306 def _get_subtitles(self, media, programme_id):
308 for connection in self._extract_connections(media):
309 cc_url = url_or_none(connection.get('href
'))
312 captions = self._download_xml(
313 cc_url, programme_id, 'Downloading captions
', fatal=False)
314 if not isinstance(captions, xml.etree.ElementTree.Element):
318 'url
': connection.get('href
'),
325 def _raise_extractor_error(self, media_selection_error):
326 raise ExtractorError(
327 '%s returned error
: %s' % (self.IE_NAME, media_selection_error.id),
330 def _download_media_selector(self, programme_id):
331 last_exception = None
332 for media_set in self._MEDIA_SETS:
334 return self._download_media_selector_url(
335 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
336 except BBCCoUkIE.MediaSelectionError as e:
337 if e.id in ('notukerror
', 'geolocation
', 'selectionunavailable
'):
340 self._raise_extractor_error(e)
341 self._raise_extractor_error(last_exception)
343 def _download_media_selector_url(self, url, programme_id=None):
344 media_selection = self._download_json(
345 url, programme_id, 'Downloading media selection JSON
',
346 expected_status=(403, 404))
347 return self._process_media_selector(media_selection, programme_id)
349 def _process_media_selector(self, media_selection, programme_id):
354 for media in self._extract_medias(media_selection):
355 kind = media.get('kind
')
356 if kind in ('video
', 'audio
'):
357 bitrate = int_or_none(media.get('bitrate
'))
358 encoding = media.get('encoding
')
359 width = int_or_none(media.get('width
'))
360 height = int_or_none(media.get('height
'))
361 file_size = int_or_none(media.get('media_file_size
'))
362 for connection in self._extract_connections(media):
363 href = connection.get('href
')
368 conn_kind = connection.get('kind
')
369 protocol = connection.get('protocol
')
370 supplier = connection.get('supplier
')
371 transfer_format = connection.get('transferFormat
')
372 format_id = supplier or conn_kind or protocol
374 if supplier == 'asx
':
375 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
378 'format_id
': 'ref
%s_%s' % (i, format_id),
380 elif transfer_format == 'dash
':
381 formats.extend(self._extract_mpd_formats(
382 href, programme_id, mpd_id=format_id, fatal=False))
383 elif transfer_format == 'hls
':
384 # TODO: let expected_status be passed into _extract_xxx_formats() instead
386 fmts = self._extract_m3u8_formats(
387 href, programme_id, ext='mp4
', entry_protocol='m3u8_native
',
388 m3u8_id=format_id, fatal=False)
389 except ExtractorError as e:
390 if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
391 and e.exc_info[1].code in (403, 404)):
395 elif transfer_format == 'hds
':
396 formats.extend(self._extract_f4m_formats(
397 href, programme_id, f4m_id=format_id, fatal=False))
399 if not supplier and bitrate:
400 format_id += '-%d' % bitrate
402 'format_id
': format_id,
403 'filesize
': file_size,
418 if protocol in ('http
', 'https
'):
423 elif protocol == 'rtmp
':
424 application = connection.get('application
', 'ondemand
')
425 auth_string = connection.get('authString
')
426 identifier = connection.get('identifier
')
427 server = connection.get('server
')
429 'url
': '%s://%s/%s?
%s' % (protocol, server, application, auth_string),
430 'play_path
': identifier,
431 'app
': '%s?
%s' % (application, auth_string),
432 'page_url
': 'http
://www
.bbc
.co
.uk
',
433 'player_url
': 'http
://www
.bbc
.co
.uk
/emp
/releases
/iplayer
/revisions
/617463_618125_4/617463_618125_4_emp
.swf
',
440 elif kind == 'captions
':
441 subtitles = self.extract_subtitles(media, programme_id)
442 return formats, subtitles
444 def _download_playlist(self, playlist_id):
446 playlist = self._download_json(
447 'http
://www
.bbc
.co
.uk
/programmes
/%s/playlist
.json
' % playlist_id,
448 playlist_id, 'Downloading playlist JSON
')
452 for version in playlist.get('allAvailableVersions
', []):
453 smp_config = version['smpConfig
']
454 title = smp_config['title
']
455 description = smp_config['summary
']
456 for item in smp_config['items
']:
458 if kind not in ('programme
', 'radioProgramme
'):
460 programme_id = item.get('vpid
')
461 duration = int_or_none(item.get('duration
'))
462 version_formats, version_subtitles = self._download_media_selector(programme_id)
463 types = version['types
']
464 for f in version_formats:
465 f['format_note
'] = ', '.join(types)
466 if any('AudioDescribed
' in x for x in types):
467 f['language_preference
'] = -10
468 formats += version_formats
469 for tag, subformats in (version_subtitles or {}).items():
470 subtitles.setdefault(tag, []).extend(subformats)
472 return programme_id, title, description, duration, formats, subtitles
473 except ExtractorError as ee:
474 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
477 # fallback to legacy playlist
478 return self._process_legacy_playlist(playlist_id)
480 def _process_legacy_playlist_url(self, url, display_id):
481 playlist = self._download_legacy_playlist_url(url, display_id)
482 return self._extract_from_legacy_playlist(playlist, display_id)
484 def _process_legacy_playlist(self, playlist_id):
485 return self._process_legacy_playlist_url(
486 'http
://www
.bbc
.co
.uk
/iplayer
/playlist
/%s' % playlist_id, playlist_id)
488 def _download_legacy_playlist_url(self, url, playlist_id=None):
489 return self._download_xml(
490 url, playlist_id, 'Downloading legacy playlist XML
')
492 def _extract_from_legacy_playlist(self, playlist, playlist_id):
493 no_items = playlist.find('./{%s}noItems
' % self._EMP_PLAYLIST_NS)
494 if no_items is not None:
495 reason = no_items.get('reason
')
496 if reason == 'preAvailability
':
497 msg = 'Episode
%s is not yet available
' % playlist_id
498 elif reason == 'postAvailability
':
499 msg = 'Episode
%s is no longer available
' % playlist_id
500 elif reason == 'noMedia
':
501 msg = 'Episode
%s is not currently available
' % playlist_id
503 msg = 'Episode
%s is not available
: %s' % (playlist_id, reason)
504 raise ExtractorError(msg, expected=True)
506 for item in self._extract_items(playlist):
507 kind = item.get('kind
')
508 if kind not in ('programme
', 'radioProgramme
'):
510 title = playlist.find('./{%s}title
' % self._EMP_PLAYLIST_NS).text
511 description_el = playlist.find('./{%s}summary
' % self._EMP_PLAYLIST_NS)
512 description = description_el.text if description_el is not None else None
514 def get_programme_id(item):
515 def get_from_attributes(item):
516 for p in ('identifier
', 'group
'):
518 if value and re.match(r'^
[pb
][\da
-z
]{7}$
', value):
520 get_from_attributes(item)
521 mediator = item.find('./{%s}mediator
' % self._EMP_PLAYLIST_NS)
522 if mediator is not None:
523 return get_from_attributes(mediator)
525 programme_id = get_programme_id(item)
526 duration = int_or_none(item.get('duration
'))
529 formats, subtitles = self._download_media_selector(programme_id)
531 formats, subtitles = self._process_media_selector(item, playlist_id)
532 programme_id = playlist_id
534 return programme_id, title, description, duration, formats, subtitles
536 def _real_extract(self, url):
537 group_id = self._match_id(url)
539 webpage = self._download_webpage(url, group_id, 'Downloading video page
')
541 error = self._search_regex(
542 r'<div
\b[^
>]+\bclass
=["\'](?:smp|playout)__message delta["\'][^
>]*>\s
*([^
<]+?
)\s
*<',
543 webpage, 'error
', default=None)
545 raise ExtractorError(error, expected=True)
550 tviplayer = self._search_regex(
551 r'mediator\
.bind\
(({.+?}
)\s
*,\s
*document\
.getElementById
',
552 webpage, 'player
', default=None)
555 player = self._parse_json(tviplayer, group_id).get('player
', {})
556 duration = int_or_none(player.get('duration
'))
557 programme_id = player.get('vpid
')
560 programme_id = self._search_regex(
561 r'"vpid"\s
*:\s
*"(%s)"' % self._ID_REGEX, webpage, 'vpid
', fatal=False, default=None)
564 formats, subtitles = self._download_media_selector(programme_id)
565 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
566 (r'<h2
[^
>]+id="parent-title"[^
>]*>(.+?
)</h2
>',
567 r'<div
[^
>]+class="info"[^
>]*>\s
*<h1
>(.+?
)</h1
>'), webpage, 'title
')
568 description = self._search_regex(
569 (r'<p
class="[^"]*medium
-description
[^
"]*">([^
<]+)</p
>',
570 r'<div
[^
>]+class="info_+synopsis"[^
>]*>([^
<]+)</div
>'),
571 webpage, 'description
', default=None)
573 description = self._html_search_meta('description
', webpage)
575 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
577 self._sort_formats(formats)
582 'description
': description,
583 'thumbnail
': self._og_search_thumbnail(webpage, default=None),
584 'duration
': duration,
586 'subtitles
': subtitles,
590 class BBCIE(BBCCoUkIE):
593 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.(?
:com|co\
.uk
)/(?
:[^
/]+/)+(?P
<id>[^
/#?]+)'
597 'mobile-tablet-main',
601 # article with multiple videos embedded with data-playable containing vpids
602 'url': 'http://www.bbc.com/news/world-europe-32668511',
604 'id': 'world-europe-32668511',
605 'title': 'Russia stages massive WW2 parade',
606 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
610 # article with multiple videos embedded with data-playable (more videos)
611 'url': 'http://www.bbc.com/news/business-28299555',
613 'id': 'business-28299555',
614 'title': 'Farnborough Airshow: Video highlights',
615 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
620 # article with multiple videos embedded with `new SMP()`
622 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
624 'id': '3662a707-0af9-3149-963f-47bea720b460',
627 'playlist_count': 18,
629 # single video embedded with data-playable containing vpid
630 'url': 'http://www.bbc.com/news/world-europe-32041533',
634 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
635 'description': 'md5:2868290467291b37feda7863f7a83f54',
637 'timestamp': 1427219242,
638 'upload_date': '20150324',
642 'skip_download': True,
645 # article with single video embedded with data-playable containing XML playlist
646 # with direct video links as progressiveDownloadUrl (for now these are extracted)
647 # and playlist with f4m and m3u8 as streamingUrl
648 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
650 'id': '150615_telabyad_kentin_cogu',
652 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
653 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
654 'timestamp': 1434397334,
655 'upload_date': '20150615',
658 'skip_download': True,
661 # single video embedded with data-playable containing XML playlists (regional section)
662 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
664 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
666 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
667 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
668 'timestamp': 1434713142,
669 'upload_date': '20150619',
672 'skip_download': True,
675 # single video from video playlist embedded with vxp-playlist-data JSON
676 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
680 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
682 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
685 'skip_download': True,
688 # single video story with digitalData
689 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
693 'title': 'Sri Lanka’s spicy secret',
694 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
695 'timestamp': 1437674293,
696 'upload_date': '20150723',
700 'skip_download': True,
703 # single video story without digitalData
704 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
708 'title': 'Hyundai Santa Fe Sport: Rock star',
709 'description': 'md5:b042a26142c4154a6e472933cf20793d',
710 'timestamp': 1415867444,
711 'upload_date': '20141113',
715 'skip_download': True,
718 # single video embedded with Morph
719 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
723 'title': "Nigeria v Japan - Men's First Round",
724 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
726 'uploader': 'BBC Sport',
727 'uploader_id': 'bbc_sport',
731 'skip_download': True,
733 'skip': 'Georestricted to UK',
735 # single video with playlist.sxml URL in playlist param
736 'url': 'http://www.bbc.com/sport/0/football/33653409',
740 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
741 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
746 'skip_download': True,
749 # article with multiple videos embedded with playlist.sxml in playlist param
750 'url': 'http://www.bbc.com/sport/0/football/34475836',
753 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
754 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
758 # school report article with single video
759 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
762 'title': 'School which breaks down barriers in Jerusalem',
766 # single video with playlist URL from weather section
767 'url': 'http://www.bbc.com/weather/features/33601775',
768 'only_matching': True,
770 # custom redirection to www.bbc.com
771 # also, video with window.__INITIAL_DATA__
772 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
776 'title': "Pluto may have 'nitrogen glaciers'",
777 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
778 'thumbnail': r
're:https?://.+/.+\.jpg',
779 'timestamp': 1437785037,
780 'upload_date': '20150725',
783 # video with window.__INITIAL_DATA__ and value as JSON string
784 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
788 'title': 'Why France is making this woman a national hero',
789 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
790 'thumbnail': r
're:https?://.+/.+\.jpg',
791 'timestamp': 1638230731,
792 'upload_date': '20211130',
795 # single video article embedded with data-media-vpid
796 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
797 'only_matching': True,
800 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
804 'title': 'Things Not To Say to people that live on council estates',
805 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
807 'thumbnail': r
're:https?://.+/.+\.jpg',
810 # window.__PRELOADED_STATE__
811 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
815 'title': 'Prom 6: An American in Paris and Turangalila',
816 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
817 'uploader': 'Radio 3',
818 'uploader_id': 'bbc_radio_three',
821 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
825 'title': 'md5:2fabf12a726603193a2879a055f72514',
826 'description': 'Learn English words and phrases from this story',
828 'add_ie': [BBCCoUkIE
.ie_key()],
831 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
835 'title': 'How positive thinking is harming your happiness',
836 'alt_title': 'The downsides of positive thinking',
837 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
839 'thumbnail': r
're:https?://.+/p07c9dsr.jpg',
840 'upload_date': '20190604',
841 'categories': ['Psychology'],
846 def suitable(cls
, url
):
847 EXCLUDE_IE
= (BBCCoUkIE
, BBCCoUkArticleIE
, BBCCoUkIPlayerEpisodesIE
, BBCCoUkIPlayerGroupIE
, BBCCoUkPlaylistIE
)
848 return (False if any(ie
.suitable(url
) for ie
in EXCLUDE_IE
)
849 else super(BBCIE
, cls
).suitable(url
))
851 def _extract_from_media_meta(self
, media_meta
, video_id
):
852 # Direct links to media in media metadata (e.g.
853 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
854 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
855 source_files
= media_meta
.get('sourceFiles')
859 'format_id': format_id
,
860 'ext': f
.get('encoding'),
861 'tbr': float_or_none(f
.get('bitrate'), 1000),
862 'filesize': int_or_none(f
.get('filesize')),
863 } for format_id
, f
in source_files
.items() if f
.get('url')], []
865 programme_id
= media_meta
.get('externalId')
867 return self
._download
_media
_selector
(programme_id
)
869 # Process playlist.sxml as legacy playlist
870 href
= media_meta
.get('href')
872 playlist
= self
._download
_legacy
_playlist
_url
(href
)
873 _
, _
, _
, _
, formats
, subtitles
= self
._extract
_from
_legacy
_playlist
(playlist
, video_id
)
874 return formats
, subtitles
878 def _extract_from_playlist_sxml(self
, url
, playlist_id
, timestamp
):
879 programme_id
, title
, description
, duration
, formats
, subtitles
= \
880 self
._process
_legacy
_playlist
_url
(url
, playlist_id
)
881 self
._sort
_formats
(formats
)
885 'description': description
,
886 'duration': duration
,
887 'timestamp': timestamp
,
889 'subtitles': subtitles
,
892 def _real_extract(self
, url
):
893 playlist_id
= self
._match
_id
(url
)
895 webpage
= self
._download
_webpage
(url
, playlist_id
)
897 json_ld_info
= self
._search
_json
_ld
(webpage
, playlist_id
, default
={})
898 timestamp
= json_ld_info
.get('timestamp')
900 playlist_title
= json_ld_info
.get('title')
901 if not playlist_title
:
902 playlist_title
= (self
._og
_search
_title
(webpage
, default
=None)
903 or self
._html
_extract
_title
(webpage
, 'playlist title', default
=None))
905 playlist_title
= re
.sub(r
'(.+)\s*-\s*BBC.*?$', r
'\1', playlist_title
).strip()
907 playlist_description
= json_ld_info
.get(
908 'description') or self
._og
_search
_description
(webpage
, default
=None)
911 timestamp
= parse_iso8601(self
._search
_regex
(
912 [r
'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
913 r
'itemprop="datePublished"[^>]+datetime="([^"]+)"',
914 r
'"datePublished":\s*"([^"]+)'],
915 webpage
, 'date', default
=None))
919 # article with multiple videos embedded with playlist.sxml (e.g.
920 # http://www.bbc.com/sport/0/football/34475836)
921 playlists
= re
.findall(r
'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage
)
922 playlists
.extend(re
.findall(r
'data-media-id="([^"]+/playlist\.sxml)"', webpage
))
925 self
._extract
_from
_playlist
_sxml
(playlist_url
, playlist_id
, timestamp
)
926 for playlist_url
in playlists
]
928 # news article with multiple videos embedded with data-playable
929 data_playables
= re
.findall(r
'data-playable=(["\'])({.+?}
)\
1', webpage)
931 for _, data_playable_json in data_playables:
932 data_playable = self._parse_json(
933 unescapeHTML(data_playable_json), playlist_id, fatal=False)
934 if not data_playable:
936 settings = data_playable.get('settings
', {})
938 # data-playable with video vpid in settings.playlistObject.items (e.g.
939 # http://www.bbc.com/news/world-us-canada-34473351)
940 playlist_object = settings.get('playlistObject
', {})
942 items = playlist_object.get('items
')
943 if items and isinstance(items, list):
944 title = playlist_object['title
']
945 description = playlist_object.get('summary
')
946 duration = int_or_none(items[0].get('duration
'))
947 programme_id = items[0].get('vpid
')
948 formats, subtitles = self._download_media_selector(programme_id)
949 self._sort_formats(formats)
953 'description
': description,
954 'timestamp
': timestamp,
955 'duration
': duration,
957 'subtitles
': subtitles,
960 # data-playable without vpid but with a playlist.sxml URLs
961 # in otherSettings.playlist (e.g.
962 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
963 playlist = data_playable.get('otherSettings
', {}).get('playlist', {})
966 for key in ('streaming
', 'progressiveDownload
'):
967 playlist_url = playlist.get('%sUrl
' % key)
971 info = self._extract_from_playlist_sxml(
972 playlist_url, playlist_id, timestamp)
976 entry['title
'] = info['title
']
977 entry['formats
'].extend(info['formats
'])
978 except ExtractorError as e:
979 # Some playlist URL may fail with 500, at the same time
980 # the other one may work fine (e.g.
981 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
982 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
986 self._sort_formats(entry['formats
'])
987 entries.append(entry)
990 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
992 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
993 group_id = self._search_regex(
994 r'<div
[^
>]+\bclass
=["\']video["\'][^
>]+\bdata
-pid
=["\'](%s)' % self._ID_REGEX,
995 webpage, 'group id', default=None)
997 return self.url_result(
998 'https://www.bbc.co.uk/programmes/%s' % group_id,
999 ie=BBCCoUkIE.ie_key())
1001 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1002 programme_id = self._search_regex(
1003 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
1004 r'<param[^>]+name="externalIdentifier
"[^>]+value="(%s)"' % self._ID_REGEX,
1005 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
1006 webpage, 'vpid', default=None)
1009 formats, subtitles = self._download_media_selector(programme_id)
1010 self._sort_formats(formats)
1011 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1012 digital_data = self._parse_json(
1014 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1015 programme_id, fatal=False)
1016 page_info = digital_data.get('page', {}).get('pageInfo', {})
1017 title = page_info.get('pageName') or self._og_search_title(webpage)
1018 description = page_info.get('description') or self._og_search_description(webpage)
1019 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1023 'description': description,
1024 'timestamp': timestamp,
1026 'subtitles': subtitles,
1029 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1030 initial_data = self._parse_json(self._html_search_regex(
1031 r'<script[^>]+id=(["\'])initial
-data\
1[^
>]+data
-json
=(["\'])(?P<json>(?:(?!\2).)+)',
1032 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1034 init_data = try_get(
1035 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1036 smp_data = init_data.get('smpData') or {}
1037 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1038 version_id = clip_data.get('versionID')
1040 title = smp_data['title']
1041 formats, subtitles = self._download_media_selector(version_id)
1042 self._sort_formats(formats)
1043 image_url = smp_data.get('holdingImageURL')
1044 display_date = init_data.get('displayDate')
1045 topic_title = init_data.get('topicTitle')
1051 'alt_title': init_data.get('shortTitle'),
1052 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1053 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1054 'upload_date': display_date.replace('-', '') if display_date else None,
1055 'subtitles': subtitles,
1056 'duration': int_or_none(clip_data.get('duration')),
1057 'categories': [topic_title] if topic_title else None,
1060 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1061 # There are several setPayload calls may be present but the video
1062 # seems to be always related to the first one
1063 morph_payload = self._parse_json(
1065 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1066 webpage, 'morph payload', default='{}'),
1067 playlist_id, fatal=False)
1069 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1070 for component in components:
1071 if not isinstance(component, dict):
1073 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1076 identifiers = lead_media.get('identifiers')
1077 if not identifiers or not isinstance(identifiers, dict):
1079 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1080 if not programme_id:
1082 title = lead_media.get('title') or self._og_search_title(webpage)
1083 formats, subtitles = self._download_media_selector(programme_id)
1084 self._sort_formats(formats)
1085 description = lead_media.get('summary')
1086 uploader = lead_media.get('masterBrand')
1087 uploader_id = lead_media.get('mid')
1089 duration_d = lead_media.get('duration')
1090 if isinstance(duration_d, dict):
1091 duration = parse_duration(dict_get(
1092 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1096 'description': description,
1097 'duration': duration,
1098 'uploader': uploader,
1099 'uploader_id': uploader_id,
1101 'subtitles': subtitles,
1104 preload_state = self._parse_json(self._search_regex(
1105 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1106 'preload state', default='{}'), playlist_id, fatal=False)
1108 current_programme = preload_state.get('programmes', {}).get('current') or {}
1109 programme_id = current_programme.get('id')
1110 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1111 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1112 formats, subtitles = self._download_media_selector(programme_id)
1113 self._sort_formats(formats)
1114 synopses = current_programme.get('synopses') or {}
1115 network = current_programme.get('network') or {}
1116 duration = int_or_none(
1117 current_programme.get('duration', {}).get('value'))
1119 image_url = current_programme.get('image_url')
1121 thumbnail = image_url.replace('{recipe}', 'raw')
1125 'description': dict_get(synopses, ('long', 'medium', 'short')),
1126 'thumbnail': thumbnail,
1127 'duration': duration,
1128 'uploader': network.get('short_title'),
1129 'uploader_id': network.get('id'),
1131 'subtitles': subtitles,
1134 bbc3_config = self._parse_json(
1136 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1137 'bbcthree config', default='{}'),
1138 playlist_id, transform_source=js_to_json, fatal=False) or {}
1139 payload = bbc3_config.get('payload') or {}
1141 clip = payload.get('currentClip') or {}
1142 clip_vpid = clip.get('vpid')
1143 clip_title = clip.get('title')
1144 if clip_vpid and clip_title:
1145 formats, subtitles = self._download_media_selector(clip_vpid)
1146 self._sort_formats(formats)
1149 'title': clip_title,
1150 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1151 'description': clip.get('description'),
1152 'duration': parse_duration(clip.get('duration')),
1154 'subtitles': subtitles,
1156 bbc3_playlist = try_get(
1157 payload, lambda x: x['content']['bbcMedia']['playlist'],
1160 playlist_title = bbc3_playlist.get('title') or playlist_title
1161 thumbnail = bbc3_playlist.get('holdingImageURL')
1163 for bbc3_item in bbc3_playlist['items']:
1164 programme_id = bbc3_item.get('versionID')
1165 if not programme_id:
1167 formats, subtitles = self._download_media_selector(programme_id)
1168 self._sort_formats(formats)
1171 'title': playlist_title,
1172 'thumbnail': thumbnail,
1173 'timestamp': timestamp,
1175 'subtitles': subtitles,
1177 return self.playlist_result(
1178 entries, playlist_id, playlist_title, playlist_description)
1180 initial_data = self._search_regex(
1181 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}
")\s*;', webpage,
1182 'quoted preload state', default=None)
1183 if initial_data is None:
1184 initial_data = self._search_regex(
1185 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1186 'preload state', default={})
1188 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1189 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
1191 def parse_media(media):
1194 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1195 item_id = item.get('id')
1196 item_title = item.get('title')
1197 if not (item_id and item_title):
1199 formats, subtitles = self._download_media_selector(item_id)
1200 self._sort_formats(formats)
1202 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1205 for block in blocks:
1206 text = try_get(block, lambda x: x['model']['text'], compat_str)
1208 summary.append(text)
1210 item_desc = '\n\n'.join(summary)
1212 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1213 if try_get(meta, lambda x: x['label']) == 'Published':
1214 item_time = unified_timestamp(meta.get('timestamp'))
1218 'title': item_title,
1219 'thumbnail': item.get('holdingImageUrl'),
1221 'subtitles': subtitles,
1222 'timestamp': item_time,
1223 'description': strip_or_none(item_desc),
1225 for resp in (initial_data.get('data') or {}).values():
1226 name = resp.get('name')
1227 if name == 'media-experience':
1228 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1229 elif name == 'article':
1230 for block in (try_get(resp,
1231 (lambda x: x['data']['blocks'],
1232 lambda x: x['data']['content']['model']['blocks'],),
1234 if block.get('type') != 'media':
1236 parse_media(block.get('model'))
1237 return self.playlist_result(
1238 entries, playlist_id, playlist_title, playlist_description)
1240 def extract_all(pattern):
1241 return list(filter(None, map(
1242 lambda s: self._parse_json(s, playlist_id, fatal=False),
1243 re.findall(pattern, webpage))))
1245 # Multiple video article (e.g.
1246 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
1247 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?
' % self._ID_REGEX
1249 for match in extract_all(r'new\s
+SMP\
(({.+?}
)\
)'):
1250 embed_url = match.get('playerSettings
', {}).get('externalEmbedUrl
')
1251 if embed_url and re.match(EMBED_URL, embed_url):
1252 entries.append(embed_url)
1253 entries.extend(re.findall(
1254 r'setPlaylist\
("(%s)"\
)' % EMBED_URL, webpage))
1256 return self.playlist_result(
1257 [self.url_result(entry_, 'BBCCoUk
') for entry_ in entries],
1258 playlist_id, playlist_title, playlist_description)
1260 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
1261 medias = extract_all(r"data-media-meta='({[^']+}
)'")
1264 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
1265 media_asset = self._search_regex(
1266 r'mediaAssetPage\
.init\
(\s
*({.+?}
), "/',
1267 webpage, 'media asset', default=None)
1269 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1271 for video in media_asset_page.get('videos', {}).values():
1272 medias.extend(video.values())
1275 # Multiple video playlist with single `now playing` entry (e.g.
1276 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1277 vxp_playlist = self._parse_json(
1279 r'<script[^>]+class="vxp
-playlist
-data
"[^>]+type="application
/json
"[^>]*>([^<]+)</script>',
1280 webpage, 'playlist data'),
1282 playlist_medias = []
1283 for item in vxp_playlist:
1284 media = item.get('media')
1287 playlist_medias.append(media)
1288 # Download single video if found media with asset id matching the video id from URL
1289 if item.get('advert', {}).get('assetId') == playlist_id:
1292 # Fallback to the whole playlist
1294 medias = playlist_medias
1297 for num, media_meta in enumerate(medias, start=1):
1298 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
1299 if not formats and not self.get_param('ignore_no_formats'):
1301 self._sort_formats(formats)
1303 video_id = media_meta.get('externalId')
1305 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1307 title = media_meta.get('caption')
1309 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1311 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
1314 for image in media_meta.get('images', {}).values():
1315 images.extend(image.values())
1316 if 'image' in media_meta:
1317 images.append(media_meta['image'])
1320 'url': image.get('href'),
1321 'width': int_or_none(image.get('width')),
1322 'height': int_or_none(image.get('height')),
1323 } for image in images]
1328 'thumbnails': thumbnails,
1329 'duration': duration,
1330 'timestamp': timestamp,
1332 'subtitles': subtitles,
1335 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1338 class BBCCoUkArticleIE(InfoExtractor):
1339 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
1340 IE_NAME = 'bbc.co.uk:article'
1341 IE_DESC = 'BBC articles'
1344 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1346 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1347 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1348 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1350 'playlist_count': 4,
1351 'add_ie': ['BBCCoUk'],
1354 def _real_extract(self, url):
1355 playlist_id = self._match_id(url)
1357 webpage = self._download_webpage(url, playlist_id)
1359 title = self._og_search_title(webpage)
1360 description = self._og_search_description(webpage).strip()
1362 entries = [self.url_result(programme_url) for programme_url in re.findall(
1363 r'<div[^>]+typeof="Clip
"[^>]+resource="([^
"]+)"', webpage)]
1365 return self.playlist_result(entries, playlist_id, title, description)
1368 class BBCCoUkPlaylistBaseIE(InfoExtractor):
1369 def _entries(self, webpage, url, playlist_id):
1370 single_page = 'page
' in compat_urlparse.parse_qs(
1371 compat_urlparse.urlparse(url).query)
1372 for page_num in itertools.count(2):
1373 for video_id in re.findall(
1374 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1375 yield self.url_result(
1376 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1379 next_page = self._search_regex(
1380 r'<li
[^
>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P
<url
>(?
:(?
!\
2).)+)\
2',
1381 webpage, 'next page url
', default=None, group='url
')
1384 webpage = self._download_webpage(
1385 compat_urlparse.urljoin(url, next_page), playlist_id,
1386 'Downloading page
%d' % page_num, page_num)
1388 def _real_extract(self, url):
1389 playlist_id = self._match_id(url)
1391 webpage = self._download_webpage(url, playlist_id)
1393 title, description = self._extract_title_and_description(webpage)
1395 return self.playlist_result(
1396 self._entries(webpage, url, playlist_id),
1397 playlist_id, title, description)
1400 class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1401 _VALID_URL_TMPL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/iplayer
/%%s/(?P
<id>%s)' % BBCCoUkIE._ID_REGEX
1404 def _get_default(episode, key, default_key='default
'):
1405 return try_get(episode, lambda x: x[key][default_key])
1407 def _get_description(self, data):
1408 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1409 return dict_get(synopsis, ('large
', 'medium
', 'small
'))
1411 def _fetch_page(self, programme_id, per_page, series_id, page):
1412 elements = self._get_elements(self._call_api(
1413 programme_id, per_page, page + 1, series_id))
1414 for element in elements:
1415 episode = self._get_episode(element)
1416 episode_id = episode.get('id')
1420 image = self._get_episode_image(episode)
1422 thumbnail = image.replace('{recipe}
', 'raw
')
1423 category = self._get_default(episode, 'labels
', 'category
')
1427 'title
': self._get_episode_field(episode, 'subtitle
'),
1428 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episode
/' + episode_id,
1429 'thumbnail
': thumbnail,
1430 'description
': self._get_description(episode),
1431 'categories
': [category] if category else None,
1432 'series
': self._get_episode_field(episode, 'title
'),
1433 'ie_key
': BBCCoUkIE.ie_key(),
1436 def _real_extract(self, url):
1437 pid = self._match_id(url)
1439 series_id = qs.get('seriesId
', [None])[0]
1440 page = qs.get('page
', [None])[0]
1441 per_page = 36 if page else self._PAGE_SIZE
1442 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1443 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1444 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1445 return self.playlist_result(
1446 entries, pid, self._get_playlist_title(playlist_data),
1447 self._get_description(playlist_data))
1450 class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1451 IE_NAME = 'bbc
.co
.uk
:iplayer
:episodes
'
1452 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes
'
1454 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/episodes
/b05rcz9v
',
1457 'title
': 'The Disappearance
',
1458 'description
': 'md5
:58eb101aee3116bad4da05f91179c0cb
',
1460 'playlist_mincount
': 8,
1463 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster
',
1466 'title
': 'Doctor Foster
',
1467 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1469 'playlist_mincount
': 10,
1472 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster?seriesId
=b094m6nv
',
1475 'title
': 'Doctor Foster
',
1476 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1478 'playlist_mincount
': 5,
1481 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove
',
1484 'title
': 'Beechgrove
',
1485 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1487 'playlist_mincount
': 37,
1490 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove?page
=2',
1493 'title
': 'Beechgrove
',
1494 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1496 'playlist_mincount
': 1,
1499 _DESCRIPTION_KEY = 'synopsis
'
1501 def _get_episode_image(self, episode):
1502 return self._get_default(episode, 'image
')
1504 def _get_episode_field(self, episode, field):
1505 return self._get_default(episode, field)
1508 def _get_elements(data):
1509 return data['entities
']['results
']
1512 def _get_episode(element):
1513 return element.get('episode
') or {}
1515 def _call_api(self, pid, per_page, page=1, series_id=None):
1519 'perPage
': per_page,
1522 variables['sliceId
'] = series_id
1523 return self._download_json(
1524 'https
://graph
.ibl
.api
.bbc
.co
.uk
/', pid, headers={
1525 'Content
-Type
': 'application
/json
'
1526 }, data=json.dumps({
1527 'id': '5692d93d5aac8d796a0305e895e61551
',
1528 'variables
': variables,
1529 }).encode('utf
-8'))['data
']['programme
']
1532 def _get_playlist_data(data):
1535 def _get_playlist_title(self, data):
1536 return self._get_default(data, 'title
')
1539 class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1540 IE_NAME = 'bbc
.co
.uk
:iplayer
:group
'
1541 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group
'
1543 # Available for over a year unlike 30 days for most other programmes
1544 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/group
/p02tcc32
',
1547 'title
': 'Bohemian Icons
',
1548 'description
': 'md5
:683e901041b2fe9ba596f2ab04c4dbe7
',
1550 'playlist_mincount
': 10,
1553 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7
',
1556 'title
': 'Music
in Scotland
',
1557 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1559 'playlist_mincount
': 47,
1562 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7?page
=2',
1565 'title
': 'Music
in Scotland
',
1566 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1568 'playlist_mincount
': 11,
1571 _DESCRIPTION_KEY = 'synopses
'
1573 def _get_episode_image(self, episode):
1574 return self._get_default(episode, 'images
', 'standard
')
1576 def _get_episode_field(self, episode, field):
1577 return episode.get(field)
1580 def _get_elements(data):
1581 return data['elements
']
1584 def _get_episode(element):
1587 def _call_api(self, pid, per_page, page=1, series_id=None):
1588 return self._download_json(
1589 'http
://ibl
.api
.bbc
.co
.uk
/ibl
/v1
/groups
/%s/episodes
' % pid,
1592 'per_page
': per_page,
1593 })['group_episodes
']
1596 def _get_playlist_data(data):
1597 return data['group
']
1599 def _get_playlist_title(self, data):
1600 return data.get('title
')
1603 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1604 IE_NAME = 'bbc
.co
.uk
:playlist
'
1605 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/programmes
/(?P
<id>%s)/(?
:episodes|broadcasts|clips
)' % BBCCoUkIE._ID_REGEX
1606 _URL_TEMPLATE = 'http
://www
.bbc
.co
.uk
/programmes
/%s'
1607 _VIDEO_ID_TEMPLATE = r'data
-pid
=["\'](%s)'
1609 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1612 'title': 'The Disappearance - Clips - BBC Four',
1613 'description': 'French thriller serial about a missing teenager.',
1615 'playlist_mincount': 7,
1617 # multipage playlist, explicit page
1618 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1621 'title': 'Frozen Planet - Clips - BBC One',
1622 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1624 'playlist_mincount': 24,
1626 # multipage playlist, all pages
1627 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1630 'title': 'Frozen Planet - Clips - BBC One',
1631 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1633 'playlist_mincount': 142,
1635 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1636 'only_matching': True,
1638 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1639 'only_matching': True,
1641 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1642 'only_matching': True,
1645 def _extract_title_and_description(self, webpage):
1646 title = self._og_search_title(webpage, fatal=False)
1647 description = self._og_search_description(webpage)
1648 return title, description