6 import xml
.etree
.ElementTree
8 from .common
import InfoExtractor
9 from ..compat
import compat_HTTPError
, compat_str
, compat_urlparse
32 class BBCCoUkIE(InfoExtractor
):
34 IE_DESC
= 'BBC iPlayer'
35 _ID_REGEX
= r
'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
38 (?:www\.)?bbc\.co\.uk/
40 programmes/(?!articles/)|
41 iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
42 music/(?:clips|audiovideo/popular)[/#]|
45 events/[^/]+/play/[^/]+/
47 (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
49 _EMBED_REGEX
= [r
'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
51 _LOGIN_URL
= 'https://account.bbc.com/signin'
52 _NETRC_MACHINE
= 'bbc'
54 _MEDIA_SELECTOR_URL_TEMPL
= 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
56 # Provides HQ HLS streams with even better quality that pc mediaset but fails
57 # with geolocation in some cases when it's even not geo restricted at all (e.g.
58 # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
63 _EMP_PLAYLIST_NS
= 'http://bbc.co.uk/2008/emp/playlist'
67 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
71 'title': 'Kaleidoscope, Leonard Cohen',
72 'description': 'The Canadian poet and songwriter reflects on his musical career.',
76 'skip_download': True,
80 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
84 'title': 'The Man in Black: Series 3: The Printed Name',
85 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
90 'skip_download': True,
92 'skip': 'Episode is no longer available on BBC iPlayer Radio',
95 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
99 'title': 'The Voice UK: Series 3: Blind Auditions 5',
100 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
105 'skip_download': True,
107 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
110 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
114 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
115 'description': '2. Invasion',
120 'skip_download': True,
122 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
124 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
128 'title': 'Pete Tong, The Essential New Tune Special',
129 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
134 'skip_download': True,
136 'skip': 'Episode is no longer available on BBC iPlayer Radio',
138 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
143 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
144 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
149 'skip_download': True,
152 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
157 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
158 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
163 'skip_download': True,
166 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
170 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
171 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
176 'skip_download': True,
178 'skip': 'geolocation',
180 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
184 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
185 'title': 'Royal Academy Summer Exhibition',
190 'skip_download': True,
192 'skip': 'geolocation',
194 # iptv-all mediaset fails with geolocation however there is no geo restriction
195 # for this programme at all
196 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
200 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
201 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
205 'skip_download': True,
207 'skip': 'Now it\'s really geo-restricted',
209 # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
210 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
214 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
215 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
219 'skip_download': True,
222 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
227 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
228 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
233 'skip_download': True,
236 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
237 'only_matching': True,
239 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
240 'only_matching': True,
242 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
243 'only_matching': True,
245 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
246 'only_matching': True,
248 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
249 'only_matching': True,
251 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
252 'only_matching': True,
254 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
255 'only_matching': True,
257 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
258 'only_matching': True,
261 def _perform_login(self
, username
, password
):
262 login_page
= self
._download
_webpage
(
263 self
._LOGIN
_URL
, None, 'Downloading signin page')
265 login_form
= self
._hidden
_inputs
(login_page
)
268 'username': username
,
269 'password': password
,
272 post_url
= urljoin(self
._LOGIN
_URL
, self
._search
_regex
(
273 r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page,
274 'post url
', default=self._LOGIN_URL, group='url
'))
276 response, urlh = self._download_webpage_handle(
277 post_url, None, 'Logging
in', data=urlencode_postdata(login_form),
278 headers={'Referer': self._LOGIN_URL})
280 if self._LOGIN_URL in urlh.geturl():
281 error = clean_html(get_element_by_class('form
-message
', response))
283 raise ExtractorError(
284 'Unable to login
: %s' % error, expected=True)
285 raise ExtractorError('Unable to log
in')
287 class MediaSelectionError(Exception):
288 def __init__(self, id):
291 def _extract_asx_playlist(self, connection, programme_id):
292 asx = self._download_xml(connection.get('href
'), programme_id, 'Downloading ASX playlist
')
293 return [ref.get('href
') for ref in asx.findall('./Entry
/ref
')]
295 def _extract_items(self, playlist):
296 return playlist.findall('./{%s}item
' % self._EMP_PLAYLIST_NS)
298 def _extract_medias(self, media_selection):
299 error = media_selection.get('result
')
301 raise BBCCoUkIE.MediaSelectionError(error)
302 return media_selection.get('media
') or []
304 def _extract_connections(self, media):
305 return media.get('connection
') or []
307 def _get_subtitles(self, media, programme_id):
309 for connection in self._extract_connections(media):
310 cc_url = url_or_none(connection.get('href
'))
313 captions = self._download_xml(
314 cc_url, programme_id, 'Downloading captions
', fatal=False)
315 if not isinstance(captions, xml.etree.ElementTree.Element):
319 'url
': connection.get('href
'),
326 def _raise_extractor_error(self, media_selection_error):
327 raise ExtractorError(
328 '%s returned error
: %s' % (self.IE_NAME, media_selection_error.id),
331 def _download_media_selector(self, programme_id):
332 last_exception = None
333 for media_set in self._MEDIA_SETS:
335 return self._download_media_selector_url(
336 self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
337 except BBCCoUkIE.MediaSelectionError as e:
338 if e.id in ('notukerror
', 'geolocation
', 'selectionunavailable
'):
341 self._raise_extractor_error(e)
342 self._raise_extractor_error(last_exception)
344 def _download_media_selector_url(self, url, programme_id=None):
345 media_selection = self._download_json(
346 url, programme_id, 'Downloading media selection JSON
',
347 expected_status=(403, 404))
348 return self._process_media_selector(media_selection, programme_id)
350 def _process_media_selector(self, media_selection, programme_id):
355 for media in self._extract_medias(media_selection):
356 kind = media.get('kind
')
357 if kind in ('video
', 'audio
'):
358 bitrate = int_or_none(media.get('bitrate
'))
359 encoding = media.get('encoding
')
360 width = int_or_none(media.get('width
'))
361 height = int_or_none(media.get('height
'))
362 file_size = int_or_none(media.get('media_file_size
'))
363 for connection in self._extract_connections(media):
364 href = connection.get('href
')
369 conn_kind = connection.get('kind
')
370 protocol = connection.get('protocol
')
371 supplier = connection.get('supplier
')
372 transfer_format = connection.get('transferFormat
')
373 format_id = supplier or conn_kind or protocol
375 if supplier == 'asx
':
376 for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
379 'format_id
': 'ref
%s_%s' % (i, format_id),
381 elif transfer_format == 'dash
':
382 formats.extend(self._extract_mpd_formats(
383 href, programme_id, mpd_id=format_id, fatal=False))
384 elif transfer_format == 'hls
':
385 # TODO: let expected_status be passed into _extract_xxx_formats() instead
387 fmts = self._extract_m3u8_formats(
388 href, programme_id, ext='mp4
', entry_protocol='m3u8_native
',
389 m3u8_id=format_id, fatal=False)
390 except ExtractorError as e:
391 if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
392 and e.exc_info[1].code in (403, 404)):
396 elif transfer_format == 'hds
':
397 formats.extend(self._extract_f4m_formats(
398 href, programme_id, f4m_id=format_id, fatal=False))
400 if not supplier and bitrate:
401 format_id += '-%d' % bitrate
403 'format_id
': format_id,
404 'filesize
': file_size,
419 if protocol in ('http
', 'https
'):
424 elif protocol == 'rtmp
':
425 application = connection.get('application
', 'ondemand
')
426 auth_string = connection.get('authString
')
427 identifier = connection.get('identifier
')
428 server = connection.get('server
')
430 'url
': '%s://%s/%s?
%s' % (protocol, server, application, auth_string),
431 'play_path
': identifier,
432 'app
': '%s?
%s' % (application, auth_string),
433 'page_url
': 'http
://www
.bbc
.co
.uk
',
434 'player_url
': 'http
://www
.bbc
.co
.uk
/emp
/releases
/iplayer
/revisions
/617463_618125_4/617463_618125_4_emp
.swf
',
441 elif kind == 'captions
':
442 subtitles = self.extract_subtitles(media, programme_id)
443 return formats, subtitles
445 def _download_playlist(self, playlist_id):
447 playlist = self._download_json(
448 'http
://www
.bbc
.co
.uk
/programmes
/%s/playlist
.json
' % playlist_id,
449 playlist_id, 'Downloading playlist JSON
')
453 for version in playlist.get('allAvailableVersions
', []):
454 smp_config = version['smpConfig
']
455 title = smp_config['title
']
456 description = smp_config['summary
']
457 for item in smp_config['items
']:
459 if kind not in ('programme
', 'radioProgramme
'):
461 programme_id = item.get('vpid
')
462 duration = int_or_none(item.get('duration
'))
463 version_formats, version_subtitles = self._download_media_selector(programme_id)
464 types = version['types
']
465 for f in version_formats:
466 f['format_note
'] = ', '.join(types)
467 if any('AudioDescribed
' in x for x in types):
468 f['language_preference
'] = -10
469 formats += version_formats
470 for tag, subformats in (version_subtitles or {}).items():
471 subtitles.setdefault(tag, []).extend(subformats)
473 return programme_id, title, description, duration, formats, subtitles
474 except ExtractorError as ee:
475 if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
478 # fallback to legacy playlist
479 return self._process_legacy_playlist(playlist_id)
481 def _process_legacy_playlist_url(self, url, display_id):
482 playlist = self._download_legacy_playlist_url(url, display_id)
483 return self._extract_from_legacy_playlist(playlist, display_id)
485 def _process_legacy_playlist(self, playlist_id):
486 return self._process_legacy_playlist_url(
487 'http
://www
.bbc
.co
.uk
/iplayer
/playlist
/%s' % playlist_id, playlist_id)
489 def _download_legacy_playlist_url(self, url, playlist_id=None):
490 return self._download_xml(
491 url, playlist_id, 'Downloading legacy playlist XML
')
493 def _extract_from_legacy_playlist(self, playlist, playlist_id):
494 no_items = playlist.find('./{%s}noItems
' % self._EMP_PLAYLIST_NS)
495 if no_items is not None:
496 reason = no_items.get('reason
')
497 if reason == 'preAvailability
':
498 msg = 'Episode
%s is not yet available
' % playlist_id
499 elif reason == 'postAvailability
':
500 msg = 'Episode
%s is no longer available
' % playlist_id
501 elif reason == 'noMedia
':
502 msg = 'Episode
%s is not currently available
' % playlist_id
504 msg = 'Episode
%s is not available
: %s' % (playlist_id, reason)
505 raise ExtractorError(msg, expected=True)
507 for item in self._extract_items(playlist):
508 kind = item.get('kind
')
509 if kind not in ('programme
', 'radioProgramme
'):
511 title = playlist.find('./{%s}title
' % self._EMP_PLAYLIST_NS).text
512 description_el = playlist.find('./{%s}summary
' % self._EMP_PLAYLIST_NS)
513 description = description_el.text if description_el is not None else None
515 def get_programme_id(item):
516 def get_from_attributes(item):
517 for p in ('identifier
', 'group
'):
519 if value and re.match(r'^
[pb
][\da
-z
]{7}$
', value):
521 get_from_attributes(item)
522 mediator = item.find('./{%s}mediator
' % self._EMP_PLAYLIST_NS)
523 if mediator is not None:
524 return get_from_attributes(mediator)
526 programme_id = get_programme_id(item)
527 duration = int_or_none(item.get('duration
'))
530 formats, subtitles = self._download_media_selector(programme_id)
532 formats, subtitles = self._process_media_selector(item, playlist_id)
533 programme_id = playlist_id
535 return programme_id, title, description, duration, formats, subtitles
537 def _real_extract(self, url):
538 group_id = self._match_id(url)
540 webpage = self._download_webpage(url, group_id, 'Downloading video page
')
542 error = self._search_regex(
543 r'<div
\b[^
>]+\bclass
=["\'](?:smp|playout)__message delta["\'][^
>]*>\s
*([^
<]+?
)\s
*<',
544 webpage, 'error
', default=None)
546 raise ExtractorError(error, expected=True)
551 tviplayer = self._search_regex(
552 r'mediator\
.bind\
(({.+?}
)\s
*,\s
*document\
.getElementById
',
553 webpage, 'player
', default=None)
556 player = self._parse_json(tviplayer, group_id).get('player
', {})
557 duration = int_or_none(player.get('duration
'))
558 programme_id = player.get('vpid
')
561 programme_id = self._search_regex(
562 r'"vpid"\s
*:\s
*"(%s)"' % self._ID_REGEX, webpage, 'vpid
', fatal=False, default=None)
565 formats, subtitles = self._download_media_selector(programme_id)
566 title = self._og_search_title(webpage, default=None) or self._html_search_regex(
567 (r'<h2
[^
>]+id="parent-title"[^
>]*>(.+?
)</h2
>',
568 r'<div
[^
>]+class="info"[^
>]*>\s
*<h1
>(.+?
)</h1
>'), webpage, 'title
')
569 description = self._search_regex(
570 (r'<p
class="[^"]*medium
-description
[^
"]*">([^
<]+)</p
>',
571 r'<div
[^
>]+class="info_+synopsis"[^
>]*>([^
<]+)</div
>'),
572 webpage, 'description
', default=None)
574 description = self._html_search_meta('description
', webpage)
576 programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
578 self._sort_formats(formats)
583 'description
': description,
584 'thumbnail
': self._og_search_thumbnail(webpage, default=None),
585 'duration
': duration,
587 'subtitles
': subtitles,
591 class BBCIE(BBCCoUkIE):
594 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.(?
:com|co\
.uk
)/(?
:[^
/]+/)+(?P
<id>[^
/#?]+)'
598 'mobile-tablet-main',
602 # article with multiple videos embedded with data-playable containing vpids
603 'url': 'http://www.bbc.com/news/world-europe-32668511',
605 'id': 'world-europe-32668511',
606 'title': 'Russia stages massive WW2 parade',
607 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
611 # article with multiple videos embedded with data-playable (more videos)
612 'url': 'http://www.bbc.com/news/business-28299555',
614 'id': 'business-28299555',
615 'title': 'Farnborough Airshow: Video highlights',
616 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
621 # article with multiple videos embedded with `new SMP()`
623 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
625 'id': '3662a707-0af9-3149-963f-47bea720b460',
628 'playlist_count': 18,
630 # single video embedded with data-playable containing vpid
631 'url': 'http://www.bbc.com/news/world-europe-32041533',
635 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
636 'description': 'md5:2868290467291b37feda7863f7a83f54',
638 'timestamp': 1427219242,
639 'upload_date': '20150324',
643 'skip_download': True,
646 # article with single video embedded with data-playable containing XML playlist
647 # with direct video links as progressiveDownloadUrl (for now these are extracted)
648 # and playlist with f4m and m3u8 as streamingUrl
649 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
651 'id': '150615_telabyad_kentin_cogu',
653 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
654 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
655 'timestamp': 1434397334,
656 'upload_date': '20150615',
659 'skip_download': True,
662 # single video embedded with data-playable containing XML playlists (regional section)
663 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
665 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
667 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
668 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
669 'timestamp': 1434713142,
670 'upload_date': '20150619',
673 'skip_download': True,
676 # single video from video playlist embedded with vxp-playlist-data JSON
677 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
681 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
683 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
686 'skip_download': True,
689 # single video story with digitalData
690 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
694 'title': 'Sri Lanka’s spicy secret',
695 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
696 'timestamp': 1437674293,
697 'upload_date': '20150723',
701 'skip_download': True,
704 # single video story without digitalData
705 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
709 'title': 'Hyundai Santa Fe Sport: Rock star',
710 'description': 'md5:b042a26142c4154a6e472933cf20793d',
711 'timestamp': 1415867444,
712 'upload_date': '20141113',
716 'skip_download': True,
719 # single video embedded with Morph
720 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
724 'title': "Nigeria v Japan - Men's First Round",
725 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
727 'uploader': 'BBC Sport',
728 'uploader_id': 'bbc_sport',
732 'skip_download': True,
734 'skip': 'Georestricted to UK',
736 # single video with playlist.sxml URL in playlist param
737 'url': 'http://www.bbc.com/sport/0/football/33653409',
741 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
742 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
747 'skip_download': True,
750 # article with multiple videos embedded with playlist.sxml in playlist param
751 'url': 'http://www.bbc.com/sport/0/football/34475836',
754 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
755 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
759 # school report article with single video
760 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
763 'title': 'School which breaks down barriers in Jerusalem',
767 # single video with playlist URL from weather section
768 'url': 'http://www.bbc.com/weather/features/33601775',
769 'only_matching': True,
771 # custom redirection to www.bbc.com
772 # also, video with window.__INITIAL_DATA__
773 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
777 'title': "Pluto may have 'nitrogen glaciers'",
778 'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
779 'thumbnail': r
're:https?://.+/.+\.jpg',
780 'timestamp': 1437785037,
781 'upload_date': '20150725',
784 # video with window.__INITIAL_DATA__ and value as JSON string
785 'url': 'https://www.bbc.com/news/av/world-europe-59468682',
789 'title': 'Why France is making this woman a national hero',
790 'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
791 'thumbnail': r
're:https?://.+/.+\.jpg',
792 'timestamp': 1638230731,
793 'upload_date': '20211130',
796 # single video article embedded with data-media-vpid
797 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
798 'only_matching': True,
801 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
805 'title': 'Things Not To Say to people that live on council estates',
806 'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
808 'thumbnail': r
're:https?://.+/.+\.jpg',
811 # window.__PRELOADED_STATE__
812 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
816 'title': 'Prom 6: An American in Paris and Turangalila',
817 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
818 'uploader': 'Radio 3',
819 'uploader_id': 'bbc_radio_three',
822 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
826 'title': 'md5:2fabf12a726603193a2879a055f72514',
827 'description': 'Learn English words and phrases from this story',
829 'add_ie': [BBCCoUkIE
.ie_key()],
832 'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
836 'title': 'How positive thinking is harming your happiness',
837 'alt_title': 'The downsides of positive thinking',
838 'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
840 'thumbnail': r
're:https?://.+/p07c9dsr.jpg',
841 'upload_date': '20190604',
842 'categories': ['Psychology'],
847 def suitable(cls
, url
):
848 EXCLUDE_IE
= (BBCCoUkIE
, BBCCoUkArticleIE
, BBCCoUkIPlayerEpisodesIE
, BBCCoUkIPlayerGroupIE
, BBCCoUkPlaylistIE
)
849 return (False if any(ie
.suitable(url
) for ie
in EXCLUDE_IE
)
850 else super(BBCIE
, cls
).suitable(url
))
852 def _extract_from_media_meta(self
, media_meta
, video_id
):
853 # Direct links to media in media metadata (e.g.
854 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
855 # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
856 source_files
= media_meta
.get('sourceFiles')
860 'format_id': format_id
,
861 'ext': f
.get('encoding'),
862 'tbr': float_or_none(f
.get('bitrate'), 1000),
863 'filesize': int_or_none(f
.get('filesize')),
864 } for format_id
, f
in source_files
.items() if f
.get('url')], []
866 programme_id
= media_meta
.get('externalId')
868 return self
._download
_media
_selector
(programme_id
)
870 # Process playlist.sxml as legacy playlist
871 href
= media_meta
.get('href')
873 playlist
= self
._download
_legacy
_playlist
_url
(href
)
874 _
, _
, _
, _
, formats
, subtitles
= self
._extract
_from
_legacy
_playlist
(playlist
, video_id
)
875 return formats
, subtitles
879 def _extract_from_playlist_sxml(self
, url
, playlist_id
, timestamp
):
880 programme_id
, title
, description
, duration
, formats
, subtitles
= \
881 self
._process
_legacy
_playlist
_url
(url
, playlist_id
)
882 self
._sort
_formats
(formats
)
886 'description': description
,
887 'duration': duration
,
888 'timestamp': timestamp
,
890 'subtitles': subtitles
,
893 def _real_extract(self
, url
):
894 playlist_id
= self
._match
_id
(url
)
896 webpage
= self
._download
_webpage
(url
, playlist_id
)
898 json_ld_info
= self
._search
_json
_ld
(webpage
, playlist_id
, default
={})
899 timestamp
= json_ld_info
.get('timestamp')
901 playlist_title
= json_ld_info
.get('title')
902 if not playlist_title
:
903 playlist_title
= (self
._og
_search
_title
(webpage
, default
=None)
904 or self
._html
_extract
_title
(webpage
, 'playlist title', default
=None))
906 playlist_title
= re
.sub(r
'(.+)\s*-\s*BBC.*?$', r
'\1', playlist_title
).strip()
908 playlist_description
= json_ld_info
.get(
909 'description') or self
._og
_search
_description
(webpage
, default
=None)
912 timestamp
= parse_iso8601(self
._search
_regex
(
913 [r
'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
914 r
'itemprop="datePublished"[^>]+datetime="([^"]+)"',
915 r
'"datePublished":\s*"([^"]+)'],
916 webpage
, 'date', default
=None))
920 # article with multiple videos embedded with playlist.sxml (e.g.
921 # http://www.bbc.com/sport/0/football/34475836)
922 playlists
= re
.findall(r
'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage
)
923 playlists
.extend(re
.findall(r
'data-media-id="([^"]+/playlist\.sxml)"', webpage
))
926 self
._extract
_from
_playlist
_sxml
(playlist_url
, playlist_id
, timestamp
)
927 for playlist_url
in playlists
]
929 # news article with multiple videos embedded with data-playable
930 data_playables
= re
.findall(r
'data-playable=(["\'])({.+?}
)\
1', webpage)
932 for _, data_playable_json in data_playables:
933 data_playable = self._parse_json(
934 unescapeHTML(data_playable_json), playlist_id, fatal=False)
935 if not data_playable:
937 settings = data_playable.get('settings
', {})
939 # data-playable with video vpid in settings.playlistObject.items (e.g.
940 # http://www.bbc.com/news/world-us-canada-34473351)
941 playlist_object = settings.get('playlistObject
', {})
943 items = playlist_object.get('items
')
944 if items and isinstance(items, list):
945 title = playlist_object['title
']
946 description = playlist_object.get('summary
')
947 duration = int_or_none(items[0].get('duration
'))
948 programme_id = items[0].get('vpid
')
949 formats, subtitles = self._download_media_selector(programme_id)
950 self._sort_formats(formats)
954 'description
': description,
955 'timestamp
': timestamp,
956 'duration
': duration,
958 'subtitles
': subtitles,
961 # data-playable without vpid but with a playlist.sxml URLs
962 # in otherSettings.playlist (e.g.
963 # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
964 playlist = data_playable.get('otherSettings
', {}).get('playlist', {})
967 for key in ('streaming
', 'progressiveDownload
'):
968 playlist_url = playlist.get('%sUrl
' % key)
972 info = self._extract_from_playlist_sxml(
973 playlist_url, playlist_id, timestamp)
977 entry['title
'] = info['title
']
978 entry['formats
'].extend(info['formats
'])
979 except ExtractorError as e:
980 # Some playlist URL may fail with 500, at the same time
981 # the other one may work fine (e.g.
982 # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
983 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
987 self._sort_formats(entry['formats
'])
988 entries.append(entry)
991 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
993 # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
994 group_id = self._search_regex(
995 r'<div
[^
>]+\bclass
=["\']video["\'][^
>]+\bdata
-pid
=["\'](%s)' % self._ID_REGEX,
996 webpage, 'group id', default=None)
998 return self.url_result(
999 'https://www.bbc.co.uk/programmes/%s' % group_id,
1000 ie=BBCCoUkIE.ie_key())
1002 # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1003 programme_id = self._search_regex(
1004 [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
1005 r'<param[^>]+name="externalIdentifier
"[^>]+value="(%s)"' % self._ID_REGEX,
1006 r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
1007 webpage, 'vpid', default=None)
1010 formats, subtitles = self._download_media_selector(programme_id)
1011 self._sort_formats(formats)
1012 # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1013 digital_data = self._parse_json(
1015 r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1016 programme_id, fatal=False)
1017 page_info = digital_data.get('page', {}).get('pageInfo', {})
1018 title = page_info.get('pageName') or self._og_search_title(webpage)
1019 description = page_info.get('description') or self._og_search_description(webpage)
1020 timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1024 'description': description,
1025 'timestamp': timestamp,
1027 'subtitles': subtitles,
1030 # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1031 initial_data = self._parse_json(self._html_search_regex(
1032 r'<script[^>]+id=(["\'])initial
-data\
1[^
>]+data
-json
=(["\'])(?P<json>(?:(?!\2).)+)',
1033 webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1035 init_data = try_get(
1036 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1037 smp_data = init_data.get('smpData') or {}
1038 clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1039 version_id = clip_data.get('versionID')
1041 title = smp_data['title']
1042 formats, subtitles = self._download_media_selector(version_id)
1043 self._sort_formats(formats)
1044 image_url = smp_data.get('holdingImageURL')
1045 display_date = init_data.get('displayDate')
1046 topic_title = init_data.get('topicTitle')
1052 'alt_title': init_data.get('shortTitle'),
1053 'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1054 'description': smp_data.get('summary') or init_data.get('shortSummary'),
1055 'upload_date': display_date.replace('-', '') if display_date else None,
1056 'subtitles': subtitles,
1057 'duration': int_or_none(clip_data.get('duration')),
1058 'categories': [topic_title] if topic_title else None,
1061 # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1062 # There are several setPayload calls may be present but the video
1063 # seems to be always related to the first one
1064 morph_payload = self._parse_json(
1066 r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
1067 webpage, 'morph payload', default='{}'),
1068 playlist_id, fatal=False)
1070 components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
1071 for component in components:
1072 if not isinstance(component, dict):
1074 lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
1077 identifiers = lead_media.get('identifiers')
1078 if not identifiers or not isinstance(identifiers, dict):
1080 programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
1081 if not programme_id:
1083 title = lead_media.get('title') or self._og_search_title(webpage)
1084 formats, subtitles = self._download_media_selector(programme_id)
1085 self._sort_formats(formats)
1086 description = lead_media.get('summary')
1087 uploader = lead_media.get('masterBrand')
1088 uploader_id = lead_media.get('mid')
1090 duration_d = lead_media.get('duration')
1091 if isinstance(duration_d, dict):
1092 duration = parse_duration(dict_get(
1093 duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
1097 'description': description,
1098 'duration': duration,
1099 'uploader': uploader,
1100 'uploader_id': uploader_id,
1102 'subtitles': subtitles,
1105 preload_state = self._parse_json(self._search_regex(
1106 r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
1107 'preload state', default='{}'), playlist_id, fatal=False)
1109 current_programme = preload_state.get('programmes', {}).get('current') or {}
1110 programme_id = current_programme.get('id')
1111 if current_programme and programme_id and current_programme.get('type') == 'playable_item':
1112 title = current_programme.get('titles', {}).get('tertiary') or playlist_title
1113 formats, subtitles = self._download_media_selector(programme_id)
1114 self._sort_formats(formats)
1115 synopses = current_programme.get('synopses') or {}
1116 network = current_programme.get('network') or {}
1117 duration = int_or_none(
1118 current_programme.get('duration', {}).get('value'))
1120 image_url = current_programme.get('image_url')
1122 thumbnail = image_url.replace('{recipe}', 'raw')
1126 'description': dict_get(synopses, ('long', 'medium', 'short')),
1127 'thumbnail': thumbnail,
1128 'duration': duration,
1129 'uploader': network.get('short_title'),
1130 'uploader_id': network.get('id'),
1132 'subtitles': subtitles,
1135 bbc3_config = self._parse_json(
1137 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1138 'bbcthree config', default='{}'),
1139 playlist_id, transform_source=js_to_json, fatal=False) or {}
1140 payload = bbc3_config.get('payload') or {}
1142 clip = payload.get('currentClip') or {}
1143 clip_vpid = clip.get('vpid')
1144 clip_title = clip.get('title')
1145 if clip_vpid and clip_title:
1146 formats, subtitles = self._download_media_selector(clip_vpid)
1147 self._sort_formats(formats)
1150 'title': clip_title,
1151 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1152 'description': clip.get('description'),
1153 'duration': parse_duration(clip.get('duration')),
1155 'subtitles': subtitles,
1157 bbc3_playlist = try_get(
1158 payload, lambda x: x['content']['bbcMedia']['playlist'],
1161 playlist_title = bbc3_playlist.get('title') or playlist_title
1162 thumbnail = bbc3_playlist.get('holdingImageURL')
1164 for bbc3_item in bbc3_playlist['items']:
1165 programme_id = bbc3_item.get('versionID')
1166 if not programme_id:
1168 formats, subtitles = self._download_media_selector(programme_id)
1169 self._sort_formats(formats)
1172 'title': playlist_title,
1173 'thumbnail': thumbnail,
1174 'timestamp': timestamp,
1176 'subtitles': subtitles,
1178 return self.playlist_result(
1179 entries, playlist_id, playlist_title, playlist_description)
1181 initial_data = self._search_regex(
1182 r'window\.__INITIAL_DATA__\s*=\s*("{.+?}
")\s*;', webpage,
1183 'quoted preload state', default=None)
1184 if initial_data is None:
1185 initial_data = self._search_regex(
1186 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1187 'preload state', default={})
1189 initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1190 initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
1192 def parse_media(media):
1195 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1196 item_id = item.get('id')
1197 item_title = item.get('title')
1198 if not (item_id and item_title):
1200 formats, subtitles = self._download_media_selector(item_id)
1201 self._sort_formats(formats)
1203 blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1206 for block in blocks:
1207 text = try_get(block, lambda x: x['model']['text'], compat_str)
1209 summary.append(text)
1211 item_desc = '\n\n'.join(summary)
1213 for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1214 if try_get(meta, lambda x: x['label']) == 'Published':
1215 item_time = unified_timestamp(meta.get('timestamp'))
1219 'title': item_title,
1220 'thumbnail': item.get('holdingImageUrl'),
1222 'subtitles': subtitles,
1223 'timestamp': item_time,
1224 'description': strip_or_none(item_desc),
1226 for resp in (initial_data.get('data') or {}).values():
1227 name = resp.get('name')
1228 if name == 'media-experience':
1229 parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1230 elif name == 'article':
1231 for block in (try_get(resp,
1232 (lambda x: x['data']['blocks'],
1233 lambda x: x['data']['content']['model']['blocks'],),
1235 if block.get('type') not in ['media', 'video']:
1237 parse_media(block.get('model'))
1238 return self.playlist_result(
1239 entries, playlist_id, playlist_title, playlist_description)
1241 def extract_all(pattern):
1242 return list(filter(None, map(
1243 lambda s: self._parse_json(s, playlist_id, fatal=False),
1244 re.findall(pattern, webpage))))
1246 # Multiple video article (e.g.
1247 # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
1248 EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?
' % self._ID_REGEX
1250 for match in extract_all(r'new\s
+SMP\
(({.+?}
)\
)'):
1251 embed_url = match.get('playerSettings
', {}).get('externalEmbedUrl
')
1252 if embed_url and re.match(EMBED_URL, embed_url):
1253 entries.append(embed_url)
1254 entries.extend(re.findall(
1255 r'setPlaylist\
("(%s)"\
)' % EMBED_URL, webpage))
1257 return self.playlist_result(
1258 [self.url_result(entry_, 'BBCCoUk
') for entry_ in entries],
1259 playlist_id, playlist_title, playlist_description)
1261 # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
1262 medias = extract_all(r"data-media-meta='({[^']+}
)'")
1265 # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
1266 media_asset = self._search_regex(
1267 r'mediaAssetPage\
.init\
(\s
*({.+?}
), "/',
1268 webpage, 'media asset', default=None)
1270 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1272 for video in media_asset_page.get('videos', {}).values():
1273 medias.extend(video.values())
1276 # Multiple video playlist with single `now playing` entry (e.g.
1277 # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1278 vxp_playlist = self._parse_json(
1280 r'<script[^>]+class="vxp
-playlist
-data
"[^>]+type="application
/json
"[^>]*>([^<]+)</script>',
1281 webpage, 'playlist data'),
1283 playlist_medias = []
1284 for item in vxp_playlist:
1285 media = item.get('media')
1288 playlist_medias.append(media)
1289 # Download single video if found media with asset id matching the video id from URL
1290 if item.get('advert', {}).get('assetId') == playlist_id:
1293 # Fallback to the whole playlist
1295 medias = playlist_medias
1298 for num, media_meta in enumerate(medias, start=1):
1299 formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
1300 if not formats and not self.get_param('ignore_no_formats'):
1302 self._sort_formats(formats)
1304 video_id = media_meta.get('externalId')
1306 video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
1308 title = media_meta.get('caption')
1310 title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
1312 duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
1315 for image in media_meta.get('images', {}).values():
1316 images.extend(image.values())
1317 if 'image' in media_meta:
1318 images.append(media_meta['image'])
1321 'url': image.get('href'),
1322 'width': int_or_none(image.get('width')),
1323 'height': int_or_none(image.get('height')),
1324 } for image in images]
1329 'thumbnails': thumbnails,
1330 'duration': duration,
1331 'timestamp': timestamp,
1333 'subtitles': subtitles,
1336 return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1339 class BBCCoUkArticleIE(InfoExtractor):
1340 _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
1341 IE_NAME = 'bbc.co.uk:article'
1342 IE_DESC = 'BBC articles'
1345 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1347 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1348 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1349 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1351 'playlist_count': 4,
1352 'add_ie': ['BBCCoUk'],
1355 def _real_extract(self, url):
1356 playlist_id = self._match_id(url)
1358 webpage = self._download_webpage(url, playlist_id)
1360 title = self._og_search_title(webpage)
1361 description = self._og_search_description(webpage).strip()
1363 entries = [self.url_result(programme_url) for programme_url in re.findall(
1364 r'<div[^>]+typeof="Clip
"[^>]+resource="([^
"]+)"', webpage)]
1366 return self.playlist_result(entries, playlist_id, title, description)
1369 class BBCCoUkPlaylistBaseIE(InfoExtractor):
1370 def _entries(self, webpage, url, playlist_id):
1371 single_page = 'page
' in compat_urlparse.parse_qs(
1372 compat_urlparse.urlparse(url).query)
1373 for page_num in itertools.count(2):
1374 for video_id in re.findall(
1375 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1376 yield self.url_result(
1377 self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1380 next_page = self._search_regex(
1381 r'<li
[^
>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P
<url
>(?
:(?
!\
2).)+)\
2',
1382 webpage, 'next page url
', default=None, group='url
')
1385 webpage = self._download_webpage(
1386 compat_urlparse.urljoin(url, next_page), playlist_id,
1387 'Downloading page
%d' % page_num, page_num)
1389 def _real_extract(self, url):
1390 playlist_id = self._match_id(url)
1392 webpage = self._download_webpage(url, playlist_id)
1394 title, description = self._extract_title_and_description(webpage)
1396 return self.playlist_result(
1397 self._entries(webpage, url, playlist_id),
1398 playlist_id, title, description)
1401 class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1402 _VALID_URL_TMPL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/iplayer
/%%s/(?P
<id>%s)' % BBCCoUkIE._ID_REGEX
1405 def _get_default(episode, key, default_key='default
'):
1406 return try_get(episode, lambda x: x[key][default_key])
1408 def _get_description(self, data):
1409 synopsis = data.get(self._DESCRIPTION_KEY) or {}
1410 return dict_get(synopsis, ('large
', 'medium
', 'small
'))
1412 def _fetch_page(self, programme_id, per_page, series_id, page):
1413 elements = self._get_elements(self._call_api(
1414 programme_id, per_page, page + 1, series_id))
1415 for element in elements:
1416 episode = self._get_episode(element)
1417 episode_id = episode.get('id')
1421 image = self._get_episode_image(episode)
1423 thumbnail = image.replace('{recipe}
', 'raw
')
1424 category = self._get_default(episode, 'labels
', 'category
')
1428 'title
': self._get_episode_field(episode, 'subtitle
'),
1429 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episode
/' + episode_id,
1430 'thumbnail
': thumbnail,
1431 'description
': self._get_description(episode),
1432 'categories
': [category] if category else None,
1433 'series
': self._get_episode_field(episode, 'title
'),
1434 'ie_key
': BBCCoUkIE.ie_key(),
1437 def _real_extract(self, url):
1438 pid = self._match_id(url)
1440 series_id = qs.get('seriesId
', [None])[0]
1441 page = qs.get('page
', [None])[0]
1442 per_page = 36 if page else self._PAGE_SIZE
1443 fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1444 entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1445 playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1446 return self.playlist_result(
1447 entries, pid, self._get_playlist_title(playlist_data),
1448 self._get_description(playlist_data))
1451 class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1452 IE_NAME = 'bbc
.co
.uk
:iplayer
:episodes
'
1453 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes
'
1455 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/episodes
/b05rcz9v
',
1458 'title
': 'The Disappearance
',
1459 'description
': 'md5
:58eb101aee3116bad4da05f91179c0cb
',
1461 'playlist_mincount
': 8,
1464 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster
',
1467 'title
': 'Doctor Foster
',
1468 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1470 'playlist_mincount
': 10,
1473 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/b094m5t9
/doctor
-foster?seriesId
=b094m6nv
',
1476 'title
': 'Doctor Foster
',
1477 'description
': 'md5
:5aa9195fad900e8e14b52acd765a9fd6
',
1479 'playlist_mincount
': 5,
1482 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove
',
1485 'title
': 'Beechgrove
',
1486 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1488 'playlist_mincount
': 37,
1491 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/episodes
/m0004c4v
/beechgrove?page
=2',
1494 'title
': 'Beechgrove
',
1495 'description
': 'Gardening show that celebrates Scottish horticulture
and growing conditions
.',
1497 'playlist_mincount
': 1,
1500 _DESCRIPTION_KEY = 'synopsis
'
1502 def _get_episode_image(self, episode):
1503 return self._get_default(episode, 'image
')
1505 def _get_episode_field(self, episode, field):
1506 return self._get_default(episode, field)
1509 def _get_elements(data):
1510 return data['entities
']['results
']
1513 def _get_episode(element):
1514 return element.get('episode
') or {}
1516 def _call_api(self, pid, per_page, page=1, series_id=None):
1520 'perPage
': per_page,
1523 variables['sliceId
'] = series_id
1524 return self._download_json(
1525 'https
://graph
.ibl
.api
.bbc
.co
.uk
/', pid, headers={
1526 'Content
-Type
': 'application
/json
'
1527 }, data=json.dumps({
1528 'id': '5692d93d5aac8d796a0305e895e61551
',
1529 'variables
': variables,
1530 }).encode('utf
-8'))['data
']['programme
']
1533 def _get_playlist_data(data):
1536 def _get_playlist_title(self, data):
1537 return self._get_default(data, 'title
')
1540 class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1541 IE_NAME = 'bbc
.co
.uk
:iplayer
:group
'
1542 _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group
'
1544 # Available for over a year unlike 30 days for most other programmes
1545 'url
': 'http
://www
.bbc
.co
.uk
/iplayer
/group
/p02tcc32
',
1548 'title
': 'Bohemian Icons
',
1549 'description
': 'md5
:683e901041b2fe9ba596f2ab04c4dbe7
',
1551 'playlist_mincount
': 10,
1554 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7
',
1557 'title
': 'Music
in Scotland
',
1558 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1560 'playlist_mincount
': 47,
1563 'url
': 'https
://www
.bbc
.co
.uk
/iplayer
/group
/p081d7j7?page
=2',
1566 'title
': 'Music
in Scotland
',
1567 'description
': 'Perfomances
in Scotland
and programmes featuring Scottish acts
.',
1569 'playlist_mincount
': 11,
1572 _DESCRIPTION_KEY = 'synopses
'
1574 def _get_episode_image(self, episode):
1575 return self._get_default(episode, 'images
', 'standard
')
1577 def _get_episode_field(self, episode, field):
1578 return episode.get(field)
1581 def _get_elements(data):
1582 return data['elements
']
1585 def _get_episode(element):
1588 def _call_api(self, pid, per_page, page=1, series_id=None):
1589 return self._download_json(
1590 'http
://ibl
.api
.bbc
.co
.uk
/ibl
/v1
/groups
/%s/episodes
' % pid,
1593 'per_page
': per_page,
1594 })['group_episodes
']
1597 def _get_playlist_data(data):
1598 return data['group
']
1600 def _get_playlist_title(self, data):
1601 return data.get('title
')
1604 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1605 IE_NAME = 'bbc
.co
.uk
:playlist
'
1606 _VALID_URL = r'https?
://(?
:www\
.)?bbc\
.co\
.uk
/programmes
/(?P
<id>%s)/(?
:episodes|broadcasts|clips
)' % BBCCoUkIE._ID_REGEX
1607 _URL_TEMPLATE = 'http
://www
.bbc
.co
.uk
/programmes
/%s'
1608 _VIDEO_ID_TEMPLATE = r'data
-pid
=["\'](%s)'
1610 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1613 'title': 'The Disappearance - Clips - BBC Four',
1614 'description': 'French thriller serial about a missing teenager.',
1616 'playlist_mincount': 7,
1618 # multipage playlist, explicit page
1619 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1622 'title': 'Frozen Planet - Clips - BBC One',
1623 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1625 'playlist_mincount': 24,
1627 # multipage playlist, all pages
1628 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1631 'title': 'Frozen Planet - Clips - BBC One',
1632 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1634 'playlist_mincount': 142,
1636 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1637 'only_matching': True,
1639 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1640 'only_matching': True,
1642 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1643 'only_matching': True,
1646 def _extract_title_and_description(self, webpage):
1647 title = self._og_search_title(webpage, fatal=False)
1648 description = self._og_search_description(webpage)
1649 return title, description