yt_dlp/extractor/bbc.py

   1 import functools
   2 import itertools
   3 import json
   4 import re
   5 import urllib.parse
   6 import xml.etree.ElementTree
   7
   8 from .common import InfoExtractor
   9 from ..networking.exceptions import HTTPError
  10 from ..utils import (
  11     ExtractorError,
  12     OnDemandPagedList,
  13     clean_html,
  14     dict_get,
  15     float_or_none,
  16     get_element_by_class,
  17     int_or_none,
  18     join_nonempty,
  19     js_to_json,
  20     parse_duration,
  21     parse_iso8601,
  22     parse_qs,
  23     strip_or_none,
  24     traverse_obj,
  25     try_get,
  26     unescapeHTML,
  27     unified_timestamp,
  28     url_or_none,
  29     urlencode_postdata,
  30     urljoin,
  31 )
  32
  33
  34 class BBCCoUkIE(InfoExtractor):
  35     IE_NAME = 'bbc.co.uk'
  36     IE_DESC = 'BBC iPlayer'
  37     _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
  38     _VALID_URL = rf'''(?x)
  39                     https?://
  40                         (?:www\.)?bbc\.co\.uk/
  41                         (?:
  42                             programmes/(?!articles/)|
  43                             iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
  44                             music/(?:clips|audiovideo/popular)[/#]|
  45                             radio/player/|
  46                             events/[^/]+/play/[^/]+/
  47                         )
  48                         (?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips))
  49                     '''
  50     _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
  51
  52     _LOGIN_URL = 'https://account.bbc.com/signin'
  53     _NETRC_MACHINE = 'bbc'
  54
  55     _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
  56     _MEDIA_SETS = [
  57         # Provides HQ HLS streams with even better quality that pc mediaset but fails
  58         # with geolocation in some cases when it's even not geo restricted at all (e.g.
  59         # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
  60         'iptv-all',
  61         'pc',
  62     ]
  63
  64     _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
  65
  66     _TESTS = [
  67         {
  68             'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
  69             'info_dict': {
  70                 'id': 'b039d07m',
  71                 'ext': 'flv',
  72                 'title': 'Kaleidoscope, Leonard Cohen',
  73                 'description': 'The Canadian poet and songwriter reflects on his musical career.',
  74             },
  75             'params': {
  76                 # rtmp download
  77                 'skip_download': True,
  78             },
  79         },
  80         {
  81             'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
  82             'info_dict': {
  83                 'id': 'b00yng1d',
  84                 'ext': 'flv',
  85                 'title': 'The Man in Black: Series 3: The Printed Name',
  86                 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
  87                 'duration': 1800,
  88             },
  89             'params': {
  90                 # rtmp download
  91                 'skip_download': True,
  92             },
  93             'skip': 'Episode is no longer available on BBC iPlayer Radio',
  94         },
  95         {
  96             'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
  97             'info_dict': {
  98                 'id': 'b00yng1d',
  99                 'ext': 'flv',
 100                 'title': 'The Voice UK: Series 3: Blind Auditions 5',
 101                 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
 102                 'duration': 5100,
 103             },
 104             'params': {
 105                 # rtmp download
 106                 'skip_download': True,
 107             },
 108             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
 109         },
 110         {
 111             'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
 112             'info_dict': {
 113                 'id': 'b03k3pb7',
 114                 'ext': 'flv',
 115                 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
 116                 'description': '2. Invasion',
 117                 'duration': 3600,
 118             },
 119             'params': {
 120                 # rtmp download
 121                 'skip_download': True,
 122             },
 123             'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
 124         }, {
 125             'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
 126             'info_dict': {
 127                 'id': 'b04v209v',
 128                 'ext': 'flv',
 129                 'title': 'Pete Tong, The Essential New Tune Special',
 130                 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
 131                 'duration': 10800,
 132             },
 133             'params': {
 134                 # rtmp download
 135                 'skip_download': True,
 136             },
 137             'skip': 'Episode is no longer available on BBC iPlayer Radio',
 138         }, {
 139             'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
 140             'note': 'Audio',
 141             'info_dict': {
 142                 'id': 'p022h44j',
 143                 'ext': 'flv',
 144                 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
 145                 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
 146                 'duration': 227,
 147             },
 148             'params': {
 149                 # rtmp download
 150                 'skip_download': True,
 151             },
 152         }, {
 153             'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
 154             'note': 'Video',
 155             'info_dict': {
 156                 'id': 'p025c103',
 157                 'ext': 'flv',
 158                 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
 159                 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
 160                 'duration': 226,
 161             },
 162             'params': {
 163                 # rtmp download
 164                 'skip_download': True,
 165             },
 166         }, {
 167             'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
 168             'info_dict': {
 169                 'id': 'p02n76xf',
 170                 'ext': 'flv',
 171                 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
 172                 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
 173                 'duration': 3540,
 174             },
 175             'params': {
 176                 # rtmp download
 177                 'skip_download': True,
 178             },
 179             'skip': 'geolocation',
 180         }, {
 181             'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
 182             'info_dict': {
 183                 'id': 'b05zmgw1',
 184                 'ext': 'flv',
 185                 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
 186                 'title': 'Royal Academy Summer Exhibition',
 187                 'duration': 3540,
 188             },
 189             'params': {
 190                 # rtmp download
 191                 'skip_download': True,
 192             },
 193             'skip': 'geolocation',
 194         }, {
 195             # iptv-all mediaset fails with geolocation however there is no geo restriction
 196             # for this programme at all
 197             'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
 198             'info_dict': {
 199                 'id': 'b06rkms3',
 200                 'ext': 'flv',
 201                 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
 202                 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
 203             },
 204             'params': {
 205                 # rtmp download
 206                 'skip_download': True,
 207             },
 208             'skip': 'Now it\'s really geo-restricted',
 209         }, {
 210             # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
 211             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
 212             'info_dict': {
 213                 'id': 'p028bfkj',
 214                 'ext': 'flv',
 215                 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
 216                 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
 217             },
 218             'params': {
 219                 # rtmp download
 220                 'skip_download': True,
 221             },
 222         }, {
 223             'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
 224             'only_matching': True,
 225         }, {
 226             'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
 227             'only_matching': True,
 228         }, {
 229             'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
 230             'only_matching': True,
 231         }, {
 232             'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
 233             'only_matching': True,
 234         }, {
 235             'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
 236             'only_matching': True,
 237         }, {
 238             'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
 239             'only_matching': True,
 240         }, {
 241             'url': 'https://www.bbc.co.uk/programmes/m00005xn',
 242             'only_matching': True,
 243         }, {
 244             'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
 245             'only_matching': True,
 246         }]
 247
 248     def _perform_login(self, username, password):
 249         login_page = self._download_webpage(
 250             self._LOGIN_URL, None, 'Downloading signin page')
 251
 252         login_form = self._hidden_inputs(login_page)
 253
 254         login_form.update({
 255             'username': username,
 256             'password': password,
 257         })
 258
 259         post_url = urljoin(self._LOGIN_URL, self._search_regex(
 260             r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
 261             'post url', default=self._LOGIN_URL, group='url'))
 262
 263         response, urlh = self._download_webpage_handle(
 264             post_url, None, 'Logging in', data=urlencode_postdata(login_form),
 265             headers={'Referer': self._LOGIN_URL})
 266
 267         if self._LOGIN_URL in urlh.url:
 268             error = clean_html(get_element_by_class('form-message', response))
 269             if error:
 270                 raise ExtractorError(
 271                     f'Unable to login: {error}', expected=True)
 272             raise ExtractorError('Unable to log in')
 273
 274     class MediaSelectionError(Exception):
 275         def __init__(self, error_id):
 276             self.id = error_id
 277
 278     def _extract_asx_playlist(self, connection, programme_id):
 279         asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
 280         return [ref.get('href') for ref in asx.findall('./Entry/ref')]
 281
 282     def _extract_items(self, playlist):
 283         return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item')
 284
 285     def _extract_medias(self, media_selection):
 286         error = media_selection.get('result')
 287         if error:
 288             raise BBCCoUkIE.MediaSelectionError(error)
 289         return media_selection.get('media') or []
 290
 291     def _extract_connections(self, media):
 292         return media.get('connection') or []
 293
 294     def _get_subtitles(self, media, programme_id):
 295         subtitles = {}
 296         for connection in self._extract_connections(media):
 297             cc_url = url_or_none(connection.get('href'))
 298             if not cc_url:
 299                 continue
 300             captions = self._download_xml(
 301                 cc_url, programme_id, 'Downloading captions', fatal=False)
 302             if not isinstance(captions, xml.etree.ElementTree.Element):
 303                 continue
 304             subtitles['en'] = [
 305                 {
 306                     'url': connection.get('href'),
 307                     'ext': 'ttml',
 308                 },
 309             ]
 310             break
 311         return subtitles
 312
 313     def _raise_extractor_error(self, media_selection_error):
 314         raise ExtractorError(
 315             f'{self.IE_NAME} returned error: {media_selection_error.id}',
 316             expected=True)
 317
 318     def _download_media_selector(self, programme_id):
 319         last_exception = None
 320         formats, subtitles = [], {}
 321         for media_set in self._MEDIA_SETS:
 322             try:
 323                 fmts, subs = self._download_media_selector_url(
 324                     self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
 325                 formats.extend(fmts)
 326                 if subs:
 327                     self._merge_subtitles(subs, target=subtitles)
 328             except BBCCoUkIE.MediaSelectionError as e:
 329                 if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
 330                     last_exception = e
 331                     continue
 332                 self._raise_extractor_error(e)
 333         if last_exception:
 334             if formats or subtitles:
 335                 self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
 336             else:
 337                 self._raise_extractor_error(last_exception)
 338         return formats, subtitles
 339
 340     def _download_media_selector_url(self, url, programme_id=None):
 341         media_selection = self._download_json(
 342             url, programme_id, 'Downloading media selection JSON',
 343             expected_status=(403, 404))
 344         return self._process_media_selector(media_selection, programme_id)
 345
 346     def _process_media_selector(self, media_selection, programme_id):
 347         formats = []
 348         subtitles = None
 349         urls = []
 350
 351         for media in self._extract_medias(media_selection):
 352             kind = media.get('kind')
 353             if kind in ('video', 'audio'):
 354                 bitrate = int_or_none(media.get('bitrate'))
 355                 encoding = media.get('encoding')
 356                 width = int_or_none(media.get('width'))
 357                 height = int_or_none(media.get('height'))
 358                 file_size = int_or_none(media.get('media_file_size'))
 359                 for connection in self._extract_connections(media):
 360                     href = connection.get('href')
 361                     if href in urls:
 362                         continue
 363                     if href:
 364                         urls.append(href)
 365                     conn_kind = connection.get('kind')
 366                     protocol = connection.get('protocol')
 367                     supplier = connection.get('supplier')
 368                     transfer_format = connection.get('transferFormat')
 369                     format_id = supplier or conn_kind or protocol
 370                     # ASX playlist
 371                     if supplier == 'asx':
 372                         for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
 373                             formats.append({
 374                                 'url': ref,
 375                                 'format_id': f'ref{i}_{format_id}',
 376                             })
 377                     elif transfer_format == 'dash':
 378                         formats.extend(self._extract_mpd_formats(
 379                             href, programme_id, mpd_id=format_id, fatal=False))
 380                     elif transfer_format == 'hls':
 381                         # TODO: let expected_status be passed into _extract_xxx_formats() instead
 382                         try:
 383                             fmts = self._extract_m3u8_formats(
 384                                 href, programme_id, ext='mp4', entry_protocol='m3u8_native',
 385                                 m3u8_id=format_id, fatal=False)
 386                         except ExtractorError as e:
 387                             if not (isinstance(e.exc_info[1], HTTPError)
 388                                     and e.exc_info[1].status in (403, 404)):
 389                                 raise
 390                             fmts = []
 391                         formats.extend(fmts)
 392                     elif transfer_format == 'hds':
 393                         formats.extend(self._extract_f4m_formats(
 394                             href, programme_id, f4m_id=format_id, fatal=False))
 395                     else:
 396                         if not supplier and bitrate:
 397                             format_id += f'-{bitrate}'
 398                         fmt = {
 399                             'format_id': format_id,
 400                             'filesize': file_size,
 401                         }
 402                         if kind == 'video':
 403                             fmt.update({
 404                                 'width': width,
 405                                 'height': height,
 406                                 'tbr': bitrate,
 407                                 'vcodec': encoding,
 408                             })
 409                         else:
 410                             fmt.update({
 411                                 'abr': bitrate,
 412                                 'acodec': encoding,
 413                                 'vcodec': 'none',
 414                             })
 415                         if protocol in ('http', 'https'):
 416                             # Direct link
 417                             fmt.update({
 418                                 'url': href,
 419                             })
 420                         elif protocol == 'rtmp':
 421                             application = connection.get('application', 'ondemand')
 422                             auth_string = connection.get('authString')
 423                             identifier = connection.get('identifier')
 424                             server = connection.get('server')
 425                             fmt.update({
 426                                 'url': f'{protocol}://{server}/{application}?{auth_string}',
 427                                 'play_path': identifier,
 428                                 'app': f'{application}?{auth_string}',
 429                                 'page_url': 'http://www.bbc.co.uk',
 430                                 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
 431                                 'rtmp_live': False,
 432                                 'ext': 'flv',
 433                             })
 434                         else:
 435                             continue
 436                         formats.append(fmt)
 437             elif kind == 'captions':
 438                 subtitles = self.extract_subtitles(media, programme_id)
 439         return formats, subtitles
 440
 441     def _download_playlist(self, playlist_id):
 442         try:
 443             playlist = self._download_json(
 444                 f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json',
 445                 playlist_id, 'Downloading playlist JSON')
 446             formats = []
 447             subtitles = {}
 448
 449             for version in playlist.get('allAvailableVersions', []):
 450                 smp_config = version['smpConfig']
 451                 title = smp_config['title']
 452                 description = smp_config['summary']
 453                 for item in smp_config['items']:
 454                     kind = item['kind']
 455                     if kind not in ('programme', 'radioProgramme'):
 456                         continue
 457                     programme_id = item.get('vpid')
 458                     duration = int_or_none(item.get('duration'))
 459                     version_formats, version_subtitles = self._download_media_selector(programme_id)
 460                     types = version['types']
 461                     for f in version_formats:
 462                         f['format_note'] = ', '.join(types)
 463                         if any('AudioDescribed' in x for x in types):
 464                             f['language_preference'] = -10
 465                     formats += version_formats
 466                     for tag, subformats in (version_subtitles or {}).items():
 467                         subtitles.setdefault(tag, []).extend(subformats)
 468
 469             return programme_id, title, description, duration, formats, subtitles
 470         except ExtractorError as ee:
 471             if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
 472                 raise
 473
 474         # fallback to legacy playlist
 475         return self._process_legacy_playlist(playlist_id)
 476
 477     def _process_legacy_playlist_url(self, url, display_id):
 478         playlist = self._download_legacy_playlist_url(url, display_id)
 479         return self._extract_from_legacy_playlist(playlist, display_id)
 480
 481     def _process_legacy_playlist(self, playlist_id):
 482         return self._process_legacy_playlist_url(
 483             f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id)
 484
 485     def _download_legacy_playlist_url(self, url, playlist_id=None):
 486         return self._download_xml(
 487             url, playlist_id, 'Downloading legacy playlist XML')
 488
 489     def _extract_from_legacy_playlist(self, playlist, playlist_id):
 490         no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems')
 491         if no_items is not None:
 492             reason = no_items.get('reason')
 493             if reason == 'preAvailability':
 494                 msg = f'Episode {playlist_id} is not yet available'
 495             elif reason == 'postAvailability':
 496                 msg = f'Episode {playlist_id} is no longer available'
 497             elif reason == 'noMedia':
 498                 msg = f'Episode {playlist_id} is not currently available'
 499             else:
 500                 msg = f'Episode {playlist_id} is not available: {reason}'
 501             raise ExtractorError(msg, expected=True)
 502
 503         for item in self._extract_items(playlist):
 504             kind = item.get('kind')
 505             if kind not in ('programme', 'radioProgramme'):
 506                 continue
 507             title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text
 508             description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary')
 509             description = description_el.text if description_el is not None else None
 510
 511             def get_programme_id(item):
 512                 def get_from_attributes(item):
 513                     for p in ('identifier', 'group'):
 514                         value = item.get(p)
 515                         if value and re.match(r'^[pb][\da-z]{7}$', value):
 516                             return value
 517                 get_from_attributes(item)
 518                 mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator')
 519                 if mediator is not None:
 520                     return get_from_attributes(mediator)
 521
 522             programme_id = get_programme_id(item)
 523             duration = int_or_none(item.get('duration'))
 524
 525             if programme_id:
 526                 formats, subtitles = self._download_media_selector(programme_id)
 527             else:
 528                 formats, subtitles = self._process_media_selector(item, playlist_id)
 529                 programme_id = playlist_id
 530
 531         return programme_id, title, description, duration, formats, subtitles
 532
 533     def _real_extract(self, url):
 534         group_id = self._match_id(url)
 535
 536         webpage = self._download_webpage(url, group_id, 'Downloading video page')
 537
 538         error = self._search_regex(
 539             r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
 540             webpage, 'error', default=None)
 541         if error:
 542             raise ExtractorError(error, expected=True)
 543
 544         programme_id = None
 545         duration = None
 546
 547         tviplayer = self._search_regex(
 548             r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
 549             webpage, 'player', default=None)
 550
 551         if tviplayer:
 552             player = self._parse_json(tviplayer, group_id).get('player', {})
 553             duration = int_or_none(player.get('duration'))
 554             programme_id = player.get('vpid')
 555
 556         if not programme_id:
 557             programme_id = self._search_regex(
 558                 rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None)
 559
 560         if programme_id:
 561             formats, subtitles = self._download_media_selector(programme_id)
 562             title = self._og_search_title(webpage, default=None) or self._html_search_regex(
 563                 (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
 564                  r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
 565             description = self._search_regex(
 566                 (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
 567                  r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
 568                 webpage, 'description', default=None)
 569             if not description:
 570                 description = self._html_search_meta('description', webpage)
 571         else:
 572             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 573
 574         return {
 575             'id': programme_id,
 576             'title': title,
 577             'description': description,
 578             'thumbnail': self._og_search_thumbnail(webpage, default=None),
 579             'duration': duration,
 580             'formats': formats,
 581             'subtitles': subtitles,
 582         }
 583
 584
 585 class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
 586     IE_NAME = 'bbc'
 587     IE_DESC = 'BBC'
 588     _VALID_URL = r'''(?x)
 589         https?://(?:www\.)?(?:
 590             bbc\.(?:com|co\.uk)|
 591             bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
 592             bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
 593         )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
 594
 595     _MEDIA_SETS = [
 596         'pc',
 597         'mobile-tablet-main',
 598     ]
 599
 600     _TESTS = [{
 601         # article with multiple videos embedded with data-playable containing vpids
 602         'url': 'http://www.bbc.com/news/world-europe-32668511',
 603         'info_dict': {
 604             'id': 'world-europe-32668511',
 605             'title': 'Russia stages massive WW2 parade despite Western boycott',
 606             'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
 607         },
 608         'playlist_count': 2,
 609     }, {
 610         # article with multiple videos embedded with data-playable (more videos)
 611         'url': 'http://www.bbc.com/news/business-28299555',
 612         'info_dict': {
 613             'id': 'business-28299555',
 614             'title': 'Farnborough Airshow: Video highlights',
 615             'description': 'BBC reports and video highlights at the Farnborough Airshow.',
 616         },
 617         'playlist_count': 9,
 618         'skip': 'Save time',
 619     }, {
 620         # article with multiple videos embedded with `new SMP()`
 621         # broken
 622         'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
 623         'info_dict': {
 624             'id': '3662a707-0af9-3149-963f-47bea720b460',
 625             'title': 'BUGGER',
 626             'description': r're:BUGGER  The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
 627         },
 628         'playlist_count': 18,
 629     }, {
 630         # single video embedded with data-playable containing vpid
 631         'url': 'http://www.bbc.com/news/world-europe-32041533',
 632         'info_dict': {
 633             'id': 'p02mprgb',
 634             'ext': 'mp4',
 635             'title': 'Germanwings crash site aerial video',
 636             'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
 637             'duration': 47,
 638             'timestamp': 1427219242,
 639             'upload_date': '20150324',
 640             'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
 641         },
 642         'params': {
 643             'skip_download': True,
 644         },
 645     }, {
 646         # article with single video embedded with data-playable containing XML playlist
 647         # with direct video links as progressiveDownloadUrl (for now these are extracted)
 648         # and playlist with f4m and m3u8 as streamingUrl
 649         'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
 650         'info_dict': {
 651             'id': '150615_telabyad_kentin_cogu',
 652             'ext': 'mp4',
 653             'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
 654             'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
 655             'timestamp': 1434397334,
 656             'upload_date': '20150615',
 657         },
 658         'params': {
 659             'skip_download': True,
 660         },
 661         'skip': 'now SIMORGH_DATA with no video',
 662     }, {
 663         # single video embedded with data-playable containing XML playlists (regional section)
 664         'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
 665         'info_dict': {
 666             'id': '39275083',
 667             'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
 668             'ext': 'mp4',
 669             'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
 670             'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
 671             'timestamp': 1434713142,
 672             'upload_date': '20150619',
 673             'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
 674         },
 675         'params': {
 676             'skip_download': True,
 677         },
 678     }, {
 679         # single video from video playlist embedded with vxp-playlist-data JSON
 680         'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
 681         'info_dict': {
 682             'id': 'p02w6qjc',
 683             'ext': 'mp4',
 684             'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
 685             'duration': 56,
 686             'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
 687         },
 688         'params': {
 689             'skip_download': True,
 690         },
 691         'skip': '404 Not Found',
 692     }, {
 693         # single video story with __PWA_PRELOADED_STATE__
 694         'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
 695         'info_dict': {
 696             'id': 'p02q6gc4',
 697             'ext': 'mp4',
 698             'title': 'Tasting the spice of life in Jaffna',
 699             'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
 700             'timestamp': 1646058397,
 701             'upload_date': '20220228',
 702             'duration': 255,
 703             'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
 704         },
 705     }, {
 706         # single video story without digitalData
 707         'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
 708         'info_dict': {
 709             'id': 'p018zqqg',
 710             'ext': 'mp4',
 711             'title': 'Hyundai Santa Fe Sport: Rock star',
 712             'description': 'md5:b042a26142c4154a6e472933cf20793d',
 713             'timestamp': 1415867444,
 714             'upload_date': '20141113',
 715         },
 716         'skip': 'redirects to TopGear home page',
 717     }, {
 718         # single video embedded with Morph
 719         # TODO: replacement test page
 720         'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
 721         'info_dict': {
 722             'id': 'p041vhd0',
 723             'ext': 'mp4',
 724             'title': "Nigeria v Japan - Men's First Round",
 725             'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
 726             'duration': 7980,
 727             'uploader': 'BBC Sport',
 728             'uploader_id': 'bbc_sport',
 729         },
 730         'skip': 'Video no longer in page',
 731     }, {
 732         # single video in __INITIAL_DATA__
 733         'url': 'http://www.bbc.com/sport/0/football/33653409',
 734         'info_dict': {
 735             'id': 'p02xycnp',
 736             'ext': 'mp4',
 737             'title': 'Ronaldo to Man Utd, Arsenal to spend?',
 738             'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
 739             'timestamp': 1437750175,
 740             'upload_date': '20150724',
 741             'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
 742             'duration': 140,
 743         },
 744     }, {
 745         # article with multiple videos embedded with Morph.setPayload
 746         'url': 'http://www.bbc.com/sport/0/football/34475836',
 747         'info_dict': {
 748             'id': '34475836',
 749             'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
 750             'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
 751         },
 752         'playlist_count': 3,
 753     }, {
 754         # Testing noplaylist
 755         'url': 'http://www.bbc.com/sport/0/football/34475836',
 756         'info_dict': {
 757             'id': 'p034ppnv',
 758             'ext': 'mp4',
 759             'title': 'All you need to know about Jurgen Klopp',
 760             'timestamp': 1444335081,
 761             'upload_date': '20151008',
 762             'duration': 122.0,
 763             'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
 764         },
 765         'params': {
 766             'noplaylist': True,
 767         },
 768     }, {
 769         # school report article with single video
 770         'url': 'http://www.bbc.co.uk/schoolreport/35744779',
 771         'info_dict': {
 772             'id': '35744779',
 773             'title': 'School which breaks down barriers in Jerusalem',
 774         },
 775         'playlist_count': 1,
 776         'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
 777     }, {
 778         # single video with playlist URL from weather section
 779         'url': 'http://www.bbc.com/weather/features/33601775',
 780         'only_matching': True,
 781     }, {
 782         # custom redirection to www.bbc.com
 783         # also, video with window.__INITIAL_DATA__
 784         'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
 785         'info_dict': {
 786             'id': 'p02xzws1',
 787             'ext': 'mp4',
 788             'title': "Pluto may have 'nitrogen glaciers'",
 789             'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
 790             'thumbnail': r're:https?://.+/.+\.jpg',
 791             'timestamp': 1437785037,
 792             'upload_date': '20150725',
 793             'duration': 105,
 794         },
 795     }, {
 796         # video with window.__INITIAL_DATA__ and value as JSON string
 797         'url': 'https://www.bbc.com/news/av/world-europe-59468682',
 798         'info_dict': {
 799             'id': 'p0b779gc',
 800             'ext': 'mp4',
 801             'title': 'Why France is making this woman a national hero',
 802             'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
 803             'thumbnail': r're:https?://.+/.+\.jpg',
 804             'timestamp': 1638215626,
 805             'upload_date': '20211129',
 806             'duration': 125,
 807         },
 808     }, {
 809         # video with script id __NEXT_DATA__ and value as JSON string
 810         'url': 'https://www.bbc.com/news/uk-68546268',
 811         'info_dict': {
 812             'id': 'p0hj0lq7',
 813             'ext': 'mp4',
 814             'title': 'Nasser Hospital doctor describes his treatment by IDF',
 815             'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
 816             'thumbnail': r're:https?://.+/.+\.jpg',
 817             'timestamp': 1710188248,
 818             'upload_date': '20240311',
 819             'duration': 104,
 820         },
 821     }, {
 822         # single video article embedded with data-media-vpid
 823         'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
 824         'only_matching': True,
 825     }, {
 826         # bbcthreeConfig
 827         'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
 828         'info_dict': {
 829             'id': 'p06556y7',
 830             'ext': 'mp4',
 831             'title': 'Things Not To Say to people that live on council estates',
 832             'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
 833             'duration': 360,
 834             'thumbnail': r're:https?://.+/.+\.jpg',
 835         },
 836     }, {
 837         # window.__PRELOADED_STATE__
 838         'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
 839         'info_dict': {
 840             'id': 'b0b9z4vz',
 841             'ext': 'mp4',
 842             'title': 'Prom 6: An American in Paris and Turangalila',
 843             'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
 844             'uploader': 'Radio 3',
 845             'uploader_id': 'bbc_radio_three',
 846         },
 847         'skip': '404 Not Found',
 848     }, {
 849         'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
 850         'info_dict': {
 851             'id': 'p06w9tws',
 852             'ext': 'mp4',
 853             'title': 'md5:2fabf12a726603193a2879a055f72514',
 854             'description': 'Learn English words and phrases from this story',
 855             'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
 856         },
 857         'add_ie': [BBCCoUkIE.ie_key()],
 858     }, {
 859         # BBC Reel
 860         'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
 861         'info_dict': {
 862             'id': 'p07c6sb9',
 863             'ext': 'mp4',
 864             'title': 'The downsides of positive thinking',
 865             'description': 'The downsides of positive thinking',
 866             'duration': 235,
 867             'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
 868             'upload_date': '20220223',
 869             'timestamp': 1645632746,
 870         },
 871     }, {
 872         # BBC Sounds
 873         'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
 874         'info_dict': {
 875             'id': 'p0hrw4nr',
 876             'ext': 'mp4',
 877             'title': 'Are our coastlines being washed away?',
 878             'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
 879             'timestamp': 1713556800,
 880             'upload_date': '20240419',
 881             'duration': 1588,
 882             'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
 883             'uploader': 'World Service',
 884             'uploader_id': 'bbc_world_service',
 885             'series': 'CrowdScience',
 886             'chapters': [],
 887         },
 888     }, {  # onion routes
 889         'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
 890         'only_matching': True,
 891     }, {
 892         'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
 893         'only_matching': True,
 894     }]
 895
 896     @classmethod
 897     def suitable(cls, url):
 898         EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
 899         return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
 900                 else super().suitable(url))
 901
 902     def _extract_from_media_meta(self, media_meta, video_id):
 903         # Direct links to media in media metadata (e.g.
 904         # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
 905         # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
 906         source_files = media_meta.get('sourceFiles')
 907         if source_files:
 908             return [{
 909                 'url': f['url'],
 910                 'format_id': format_id,
 911                 'ext': f.get('encoding'),
 912                 'tbr': float_or_none(f.get('bitrate'), 1000),
 913                 'filesize': int_or_none(f.get('filesize')),
 914             } for format_id, f in source_files.items() if f.get('url')], []
 915
 916         programme_id = media_meta.get('externalId')
 917         if programme_id:
 918             return self._download_media_selector(programme_id)
 919
 920         # Process playlist.sxml as legacy playlist
 921         href = media_meta.get('href')
 922         if href:
 923             playlist = self._download_legacy_playlist_url(href)
 924             _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
 925             return formats, subtitles
 926
 927         return [], []
 928
 929     def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
 930         programme_id, title, description, duration, formats, subtitles = \
 931             self._process_legacy_playlist_url(url, playlist_id)
 932         return {
 933             'id': programme_id,
 934             'title': title,
 935             'description': description,
 936             'duration': duration,
 937             'timestamp': timestamp,
 938             'formats': formats,
 939             'subtitles': subtitles,
 940         }
 941
 942     def _real_extract(self, url):
 943         playlist_id = self._match_id(url)
 944
 945         webpage = self._download_webpage(url, playlist_id)
 946
 947         json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
 948         timestamp = json_ld_info.get('timestamp')
 949
 950         playlist_title = json_ld_info.get('title') or re.sub(
 951             r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
 952
 953         playlist_description = json_ld_info.get(
 954             'description') or self._og_search_description(webpage, default=None)
 955
 956         if not timestamp:
 957             timestamp = parse_iso8601(self._search_regex(
 958                 [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
 959                  r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
 960                  r'"datePublished":\s*"([^"]+)'],
 961                 webpage, 'date', default=None))
 962
 963         entries = []
 964
 965         # article with multiple videos embedded with playlist.sxml (e.g.
 966         # http://www.bbc.com/sport/0/football/34475836)
 967         playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
 968         playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
 969         if playlists:
 970             entries = [
 971                 self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
 972                 for playlist_url in playlists]
 973
 974         # news article with multiple videos embedded with data-playable
 975         data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
 976         if data_playables:
 977             for _, data_playable_json in data_playables:
 978                 data_playable = self._parse_json(
 979                     unescapeHTML(data_playable_json), playlist_id, fatal=False)
 980                 if not data_playable:
 981                     continue
 982                 settings = data_playable.get('settings', {})
 983                 if settings:
 984                     # data-playable with video vpid in settings.playlistObject.items (e.g.
 985                     # http://www.bbc.com/news/world-us-canada-34473351)
 986                     playlist_object = settings.get('playlistObject', {})
 987                     if playlist_object:
 988                         items = playlist_object.get('items')
 989                         if items and isinstance(items, list):
 990                             title = playlist_object['title']
 991                             description = playlist_object.get('summary')
 992                             duration = int_or_none(items[0].get('duration'))
 993                             programme_id = items[0].get('vpid')
 994                             formats, subtitles = self._download_media_selector(programme_id)
 995                             entries.append({
 996                                 'id': programme_id,
 997                                 'title': title,
 998                                 'description': description,
 999                                 'timestamp': timestamp,
1000                                 'duration': duration,
1001                                 'formats': formats,
1002                                 'subtitles': subtitles,
1003                             })
1004                     else:
1005                         # data-playable without vpid but with a playlist.sxml URLs
1006                         # in otherSettings.playlist (e.g.
1007                         # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
1008                         playlist = data_playable.get('otherSettings', {}).get('playlist', {})
1009                         if playlist:
1010                             entry = None
1011                             for key in ('streaming', 'progressiveDownload'):
1012                                 playlist_url = playlist.get(f'{key}Url')
1013                                 if not playlist_url:
1014                                     continue
1015                                 try:
1016                                     info = self._extract_from_playlist_sxml(
1017                                         playlist_url, playlist_id, timestamp)
1018                                     if not entry:
1019                                         entry = info
1020                                     else:
1021                                         entry['title'] = info['title']
1022                                         entry['formats'].extend(info['formats'])
1023                                 except ExtractorError as e:
1024                                     # Some playlist URL may fail with 500, at the same time
1025                                     # the other one may work fine (e.g.
1026                                     # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
1027                                     if isinstance(e.cause, HTTPError) and e.cause.status == 500:
1028                                         continue
1029                                     raise
1030                             if entry:
1031                                 entries.append(entry)
1032
1033         if entries:
1034             return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1035
1036         # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
1037         group_id = self._search_regex(
1038             rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})',
1039             webpage, 'group id', default=None)
1040         if group_id:
1041             return self.url_result(
1042                 f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
1043
1044         # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
1045         programme_id = self._search_regex(
1046             [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"',
1047              rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"',
1048              rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'],
1049             webpage, 'vpid', default=None)
1050
1051         if programme_id:
1052             formats, subtitles = self._download_media_selector(programme_id)
1053             # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
1054             digital_data = self._parse_json(
1055                 self._search_regex(
1056                     r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
1057                 programme_id, fatal=False)
1058             page_info = digital_data.get('page', {}).get('pageInfo', {})
1059             title = page_info.get('pageName') or self._og_search_title(webpage)
1060             description = page_info.get('description') or self._og_search_description(webpage)
1061             timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
1062             return {
1063                 'id': programme_id,
1064                 'title': title,
1065                 'description': description,
1066                 'timestamp': timestamp,
1067                 'formats': formats,
1068                 'subtitles': subtitles,
1069             }
1070
1071         # bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
1072         initial_data = self._parse_json(self._html_search_regex(
1073             r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
1074             webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
1075         if initial_data:
1076             init_data = try_get(
1077                 initial_data, lambda x: x['initData']['items'][0], dict) or {}
1078             smp_data = init_data.get('smpData') or {}
1079             clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
1080             version_id = clip_data.get('versionID')
1081             if version_id:
1082                 title = smp_data['title']
1083                 formats, subtitles = self._download_media_selector(version_id)
1084                 image_url = smp_data.get('holdingImageURL')
1085                 display_date = init_data.get('displayDate')
1086                 topic_title = init_data.get('topicTitle')
1087
1088                 return {
1089                     'id': version_id,
1090                     'title': title,
1091                     'formats': formats,
1092                     'alt_title': init_data.get('shortTitle'),
1093                     'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
1094                     'description': smp_data.get('summary') or init_data.get('shortSummary'),
1095                     'upload_date': display_date.replace('-', '') if display_date else None,
1096                     'subtitles': subtitles,
1097                     'duration': int_or_none(clip_data.get('duration')),
1098                     'categories': [topic_title] if topic_title else None,
1099                 }
1100
1101         # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
1102         # Several setPayload calls may be present but the video(s)
1103         # should be in one that mentions leadMedia or videoData
1104         morph_payload = self._search_json(
1105             r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
1106             contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
1107             default={})
1108         if morph_payload:
1109             for lead_media in traverse_obj(morph_payload, (
1110                     'body', 'components', ..., 'props', 'leadMedia', {dict})):
1111                 programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
1112                 if not programme_id:
1113                     continue
1114                 formats, subtitles = self._download_media_selector(programme_id)
1115                 return {
1116                     'id': programme_id,
1117                     'title': lead_media.get('title') or self._og_search_title(webpage),
1118                     **traverse_obj(lead_media, {
1119                         'description': ('summary', {str}),
1120                         'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
1121                         'uploader': ('masterBrand', {str}),
1122                         'uploader_id': ('mid', {str}),
1123                     }),
1124                     'formats': formats,
1125                     'subtitles': subtitles,
1126                 }
1127             body = self._parse_json(traverse_obj(morph_payload, (
1128                 'body', 'content', 'article', 'body')), playlist_id, fatal=False)
1129             for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
1130                 if video_data.get('vpid'):
1131                     video_id = video_data['vpid']
1132                     formats, subtitles = self._download_media_selector(video_id)
1133                     entry = {
1134                         'id': video_id,
1135                         'formats': formats,
1136                         'subtitles': subtitles,
1137                     }
1138                 else:
1139                     video_id = video_data['pid']
1140                     entry = self.url_result(
1141                         f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
1142                         video_id, url_transparent=True)
1143                 entry.update({
1144                     'timestamp': traverse_obj(morph_payload, (
1145                         'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}),
1146                     ),
1147                     **traverse_obj(video_data, {
1148                         'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
1149                         'title': (('title', 'caption'), {str}, any),
1150                         'duration': ('duration', {parse_duration}),
1151                     }),
1152                 })
1153                 if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
1154                     return entry
1155                 entries.append(entry)
1156             if entries:
1157                 playlist_title = traverse_obj(morph_payload, (
1158                     'body', 'content', 'article', 'headline', {str})) or playlist_title
1159                 return self.playlist_result(
1160                     entries, playlist_id, playlist_title, playlist_description)
1161
1162         # various PRELOADED_STATE JSON
1163         preload_state = self._search_json(
1164             r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
1165             'preload state', playlist_id, transform_source=js_to_json, default={})
1166         # PRELOADED_STATE with current programmme
1167         current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
1168         programme_id = traverse_obj(current_programme, ('id', {str}))
1169         if programme_id and current_programme.get('type') == 'playable_item':
1170             title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
1171             formats, subtitles = self._download_media_selector(programme_id)
1172             return {
1173                 'id': programme_id,
1174                 'title': title,
1175                 'formats': formats,
1176                 **traverse_obj(current_programme, {
1177                     'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
1178                     'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
1179                     'duration': ('duration', 'value', {int_or_none}),
1180                     'uploader': ('network', 'short_title', {str}),
1181                     'uploader_id': ('network', 'id', {str}),
1182                     'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
1183                     'series': ('titles', 'primary', {str}),
1184                 }),
1185                 'subtitles': subtitles,
1186                 'chapters': traverse_obj(preload_state, (
1187                     'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
1188                         'title': ('titles', {lambda x: join_nonempty(
1189                             'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
1190                         'start_time': ('offset', 'start', {float_or_none}),
1191                         'end_time': ('offset', 'end', {float_or_none}),
1192                     }),
1193                 ),
1194             }
1195
1196         # PWA_PRELOADED_STATE with article video asset
1197         asset_id = traverse_obj(preload_state, (
1198             'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
1199             'assetVideo', 0, {str}, any))
1200         if asset_id:
1201             video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
1202             if video_id:
1203                 article = traverse_obj(preload_state, (
1204                     'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
1205
1206                 def image_url(image_id):
1207                     return traverse_obj(preload_state, (
1208                         'entities', 'images', image_id, 'url',
1209                         {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
1210
1211                 formats, subtitles = self._download_media_selector(video_id)
1212                 return {
1213                     'id': video_id,
1214                     **traverse_obj(preload_state, ('entities', 'videos', asset_id, {
1215                         'title': ('title', {str}),
1216                         'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
1217                         'thumbnail': (0, {image_url}),
1218                         'duration': ('duration', {int_or_none}),
1219                     })),
1220                     'formats': formats,
1221                     'subtitles': subtitles,
1222                     'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
1223                 }
1224             else:
1225                 return self.url_result(
1226                     f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
1227                     asset_id, playlist_title, display_id=playlist_id,
1228                     description=playlist_description)
1229
1230         bbc3_config = self._parse_json(
1231             self._search_regex(
1232                 r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
1233                 'bbcthree config', default='{}'),
1234             playlist_id, transform_source=js_to_json, fatal=False) or {}
1235         payload = bbc3_config.get('payload') or {}
1236         if payload:
1237             clip = payload.get('currentClip') or {}
1238             clip_vpid = clip.get('vpid')
1239             clip_title = clip.get('title')
1240             if clip_vpid and clip_title:
1241                 formats, subtitles = self._download_media_selector(clip_vpid)
1242                 return {
1243                     'id': clip_vpid,
1244                     'title': clip_title,
1245                     'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
1246                     'description': clip.get('description'),
1247                     'duration': parse_duration(clip.get('duration')),
1248                     'formats': formats,
1249                     'subtitles': subtitles,
1250                 }
1251             bbc3_playlist = try_get(
1252                 payload, lambda x: x['content']['bbcMedia']['playlist'],
1253                 dict)
1254             if bbc3_playlist:
1255                 playlist_title = bbc3_playlist.get('title') or playlist_title
1256                 thumbnail = bbc3_playlist.get('holdingImageURL')
1257                 entries = []
1258                 for bbc3_item in bbc3_playlist['items']:
1259                     programme_id = bbc3_item.get('versionID')
1260                     if not programme_id:
1261                         continue
1262                     formats, subtitles = self._download_media_selector(programme_id)
1263                     entries.append({
1264                         'id': programme_id,
1265                         'title': playlist_title,
1266                         'thumbnail': thumbnail,
1267                         'timestamp': timestamp,
1268                         'formats': formats,
1269                         'subtitles': subtitles,
1270                     })
1271                 return self.playlist_result(
1272                     entries, playlist_id, playlist_title, playlist_description)
1273
1274         def parse_model(model):
1275             """Extract single video from model structure"""
1276             item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
1277             if not item_id:
1278                 return
1279             formats, subtitles = self._download_media_selector(item_id)
1280             return {
1281                 'id': item_id,
1282                 'formats': formats,
1283                 'subtitles': subtitles,
1284                 **traverse_obj(model, {
1285                     'title': ('title', {str}),
1286                     'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
1287                     'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
1288                     'duration': ('versions', 0, 'duration', {int}),
1289                     'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
1290                 }),
1291             }
1292
1293         def is_type(*types):
1294             return lambda _, v: v['type'] in types
1295
1296         initial_data = self._search_regex(
1297             r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
1298             'quoted preload state', default=None)
1299         if initial_data is None:
1300             initial_data = self._search_regex(
1301                 r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
1302                 'preload state', default='{}')
1303         else:
1304             initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
1305         initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
1306         if initial_data:
1307             for video_data in traverse_obj(initial_data, (
1308                     'stores', 'article', 'articleBodyContent', is_type('video'))):
1309                 model = traverse_obj(video_data, (
1310                     'model', 'blocks', is_type('aresMedia'),
1311                     'model', 'blocks', is_type('aresMediaMetadata'),
1312                     'model', {dict}, any))
1313                 entry = parse_model(model)
1314                 if entry:
1315                     entries.append(entry)
1316             if entries:
1317                 return self.playlist_result(
1318                     entries, playlist_id, playlist_title, playlist_description)
1319
1320             def parse_media(media):
1321                 if not media:
1322                     return
1323                 for item in (try_get(media, lambda x: x['media']['items'], list) or []):
1324                     item_id = item.get('id')
1325                     item_title = item.get('title')
1326                     if not (item_id and item_title):
1327                         continue
1328                     formats, subtitles = self._download_media_selector(item_id)
1329                     item_desc = None
1330                     blocks = try_get(media, lambda x: x['summary']['blocks'], list)
1331                     if blocks:
1332                         summary = []
1333                         for block in blocks:
1334                             text = try_get(block, lambda x: x['model']['text'], str)
1335                             if text:
1336                                 summary.append(text)
1337                         if summary:
1338                             item_desc = '\n\n'.join(summary)
1339                     item_time = None
1340                     for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
1341                         if try_get(meta, lambda x: x['label']) == 'Published':
1342                             item_time = unified_timestamp(meta.get('timestamp'))
1343                             break
1344                     entries.append({
1345                         'id': item_id,
1346                         'title': item_title,
1347                         'thumbnail': item.get('holdingImageUrl'),
1348                         'formats': formats,
1349                         'subtitles': subtitles,
1350                         'timestamp': item_time,
1351                         'description': strip_or_none(item_desc),
1352                         'duration': int_or_none(item.get('duration')),
1353                     })
1354
1355             for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
1356                 name = resp['name']
1357                 if name == 'media-experience':
1358                     parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
1359                 elif name == 'article':
1360                     for block in traverse_obj(resp, (
1361                             'data', (None, ('content', 'model')), 'blocks',
1362                             is_type('media', 'video'), 'model', {dict})):
1363                         parse_media(block)
1364             return self.playlist_result(
1365                 entries, playlist_id, playlist_title, playlist_description)
1366
1367         # extract from SIMORGH_DATA hydration JSON
1368         simorgh_data = self._search_json(
1369             r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
1370             'simorgh data', playlist_id, default={})
1371         if simorgh_data:
1372             done = False
1373             for video_data in traverse_obj(simorgh_data, (
1374                     'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
1375                 model = traverse_obj(video_data, (
1376                     'model', 'blocks', is_type('aresMedia'),
1377                     'model', 'blocks', is_type('aresMediaMetadata'),
1378                     'model', {dict}, any))
1379                 if video_data['type'] == 'video':
1380                     entry = parse_model(model)
1381                 else:  # legacyMedia: no duration, subtitles
1382                     block_id, entry = traverse_obj(model, ('blockId', {str})), None
1383                     media_data = traverse_obj(simorgh_data, (
1384                         'pageData', 'promo', 'media',
1385                         {lambda x: x if x['id'] == block_id else None}))
1386                     formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
1387                         'url': ('url', {url_or_none}),
1388                         'ext': ('format', {str}),
1389                         'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
1390                     }))
1391                     if formats:
1392                         entry = {
1393                             'id': block_id,
1394                             'display_id': playlist_id,
1395                             'formats': formats,
1396                             'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
1397                             **traverse_obj(model, {
1398                                 'title': ('title', {str}),
1399                                 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
1400                                 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
1401                                 'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
1402                             }),
1403                         }
1404                         done = True
1405                 if entry:
1406                     entries.append(entry)
1407                 if done:
1408                     break
1409             if entries:
1410                 return self.playlist_result(
1411                     entries, playlist_id, playlist_title, playlist_description)
1412
1413         def extract_all(pattern):
1414             return list(filter(None, (
1415                 self._parse_json(s, playlist_id, fatal=False)
1416                 for s in re.findall(pattern, webpage))))
1417
1418         # US accessed article with single embedded video (e.g.
1419         # https://www.bbc.com/news/uk-68546268)
1420         next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
1421                                  ('props', 'pageProps', 'page'))
1422         model = traverse_obj(next_data, (
1423             ..., 'contents', is_type('video'),
1424             'model', 'blocks', is_type('media'),
1425             'model', 'blocks', is_type('mediaMetadata'),
1426             'model', {dict}, any))
1427         if model and (entry := parse_model(model)):
1428             if not entry.get('timestamp'):
1429                 entry['timestamp'] = traverse_obj(next_data, (
1430                     ..., 'contents', is_type('timestamp'), 'model',
1431                     'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
1432             entries.append(entry)
1433             return self.playlist_result(
1434                 entries, playlist_id, playlist_title, playlist_description)
1435
1436         # Multiple video article (e.g.
1437         # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
1438         EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?'
1439         entries = []
1440         for match in extract_all(r'new\s+SMP\(({.+?})\)'):
1441             embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
1442             if embed_url and re.match(EMBED_URL, embed_url):
1443                 entries.append(embed_url)
1444         entries.extend(re.findall(
1445             rf'setPlaylist\("({EMBED_URL})"\)', webpage))
1446         if entries:
1447             return self.playlist_result(
1448                 [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
1449                 playlist_id, playlist_title, playlist_description)
1450
1451         # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
1452         medias = extract_all(r"data-media-meta='({[^']+})'")
1453
1454         if not medias:
1455             # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
1456             media_asset = self._search_regex(
1457                 r'mediaAssetPage\.init\(\s*({.+?}), "/',
1458                 webpage, 'media asset', default=None)
1459             if media_asset:
1460                 media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
1461                 medias = []
1462                 for video in media_asset_page.get('videos', {}).values():
1463                     medias.extend(video.values())
1464
1465         if not medias:
1466             # Multiple video playlist with single `now playing` entry (e.g.
1467             # http://www.bbc.com/news/video_and_audio/must_see/33767813)
1468             vxp_playlist = self._parse_json(
1469                 self._search_regex(
1470                     r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
1471                     webpage, 'playlist data'),
1472                 playlist_id)
1473             playlist_medias = []
1474             for item in vxp_playlist:
1475                 media = item.get('media')
1476                 if not media:
1477                     continue
1478                 playlist_medias.append(media)
1479                 # Download single video if found media with asset id matching the video id from URL
1480                 if item.get('advert', {}).get('assetId') == playlist_id:
1481                     medias = [media]
1482                     break
1483             # Fallback to the whole playlist
1484             if not medias:
1485                 medias = playlist_medias
1486
1487         entries = []
1488         for num, media_meta in enumerate(medias, start=1):
1489             formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
1490             if not formats and not self.get_param('ignore_no_formats'):
1491                 continue
1492
1493             video_id = media_meta.get('externalId')
1494             if not video_id:
1495                 video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}'
1496
1497             title = media_meta.get('caption')
1498             if not title:
1499                 title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}'
1500
1501             duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
1502
1503             images = []
1504             for image in media_meta.get('images', {}).values():
1505                 images.extend(image.values())
1506             if 'image' in media_meta:
1507                 images.append(media_meta['image'])
1508
1509             thumbnails = [{
1510                 'url': image.get('href'),
1511                 'width': int_or_none(image.get('width')),
1512                 'height': int_or_none(image.get('height')),
1513             } for image in images]
1514
1515             entries.append({
1516                 'id': video_id,
1517                 'title': title,
1518                 'thumbnails': thumbnails,
1519                 'duration': duration,
1520                 'timestamp': timestamp,
1521                 'formats': formats,
1522                 'subtitles': subtitles,
1523             })
1524
1525         return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
1526
1527
1528 class BBCCoUkArticleIE(InfoExtractor):
1529     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
1530     IE_NAME = 'bbc.co.uk:article'
1531     IE_DESC = 'BBC articles'
1532
1533     _TEST = {
1534         'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
1535         'info_dict': {
1536             'id': '3jNQLTMrPlYGTBn0WV6M2MS',
1537             'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
1538             'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
1539         },
1540         'playlist_count': 4,
1541         'add_ie': ['BBCCoUk'],
1542     }
1543
1544     def _real_extract(self, url):
1545         playlist_id = self._match_id(url)
1546
1547         webpage = self._download_webpage(url, playlist_id)
1548
1549         title = self._og_search_title(webpage)
1550         description = self._og_search_description(webpage).strip()
1551
1552         entries = [self.url_result(programme_url) for programme_url in re.findall(
1553             r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
1554
1555         return self.playlist_result(entries, playlist_id, title, description)
1556
1557
1558 class BBCCoUkPlaylistBaseIE(InfoExtractor):
1559     def _entries(self, webpage, url, playlist_id):
1560         single_page = 'page' in urllib.parse.parse_qs(
1561             urllib.parse.urlparse(url).query)
1562         for page_num in itertools.count(2):
1563             for video_id in re.findall(
1564                     self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
1565                 yield self.url_result(
1566                     self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
1567             if single_page:
1568                 return
1569             next_page = self._search_regex(
1570                 r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
1571                 webpage, 'next page url', default=None, group='url')
1572             if not next_page:
1573                 break
1574             webpage = self._download_webpage(
1575                 urllib.parse.urljoin(url, next_page), playlist_id,
1576                 f'Downloading page {page_num}', page_num)
1577
1578     def _real_extract(self, url):
1579         playlist_id = self._match_id(url)
1580
1581         webpage = self._download_webpage(url, playlist_id)
1582
1583         title, description = self._extract_title_and_description(webpage)
1584
1585         return self.playlist_result(
1586             self._entries(webpage, url, playlist_id),
1587             playlist_id, title, description)
1588
1589
1590 class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
1591     _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})'
1592
1593     @staticmethod
1594     def _get_default(episode, key, default_key='default'):
1595         return try_get(episode, lambda x: x[key][default_key])
1596
1597     def _get_description(self, data):
1598         synopsis = data.get(self._DESCRIPTION_KEY) or {}
1599         return dict_get(synopsis, ('large', 'medium', 'small'))
1600
1601     def _fetch_page(self, programme_id, per_page, series_id, page):
1602         elements = self._get_elements(self._call_api(
1603             programme_id, per_page, page + 1, series_id))
1604         for element in elements:
1605             episode = self._get_episode(element)
1606             episode_id = episode.get('id')
1607             if not episode_id:
1608                 continue
1609             thumbnail = None
1610             image = self._get_episode_image(episode)
1611             if image:
1612                 thumbnail = image.replace('{recipe}', 'raw')
1613             category = self._get_default(episode, 'labels', 'category')
1614             yield {
1615                 '_type': 'url',
1616                 'id': episode_id,
1617                 'title': self._get_episode_field(episode, 'subtitle'),
1618                 'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
1619                 'thumbnail': thumbnail,
1620                 'description': self._get_description(episode),
1621                 'categories': [category] if category else None,
1622                 'series': self._get_episode_field(episode, 'title'),
1623                 'ie_key': BBCCoUkIE.ie_key(),
1624             }
1625
1626     def _real_extract(self, url):
1627         pid = self._match_id(url)
1628         qs = parse_qs(url)
1629         series_id = qs.get('seriesId', [None])[0]
1630         page = qs.get('page', [None])[0]
1631         per_page = 36 if page else self._PAGE_SIZE
1632         fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
1633         entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
1634         playlist_data = self._get_playlist_data(self._call_api(pid, 1))
1635         return self.playlist_result(
1636             entries, pid, self._get_playlist_title(playlist_data),
1637             self._get_description(playlist_data))
1638
1639
1640 class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
1641     IE_NAME = 'bbc.co.uk:iplayer:episodes'
1642     _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
1643     _TESTS = [{
1644         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
1645         'info_dict': {
1646             'id': 'b05rcz9v',
1647             'title': 'The Disappearance',
1648             'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
1649         },
1650         'playlist_mincount': 8,
1651     }, {
1652         # all seasons
1653         'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
1654         'info_dict': {
1655             'id': 'b094m5t9',
1656             'title': 'Doctor Foster',
1657             'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1658         },
1659         'playlist_mincount': 10,
1660     }, {
1661         # explicit season
1662         'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
1663         'info_dict': {
1664             'id': 'b094m5t9',
1665             'title': 'Doctor Foster',
1666             'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
1667         },
1668         'playlist_mincount': 5,
1669     }, {
1670         # all pages
1671         'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
1672         'info_dict': {
1673             'id': 'm0004c4v',
1674             'title': 'Beechgrove',
1675             'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1676         },
1677         'playlist_mincount': 37,
1678     }, {
1679         # explicit page
1680         'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
1681         'info_dict': {
1682             'id': 'm0004c4v',
1683             'title': 'Beechgrove',
1684             'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
1685         },
1686         'playlist_mincount': 1,
1687     }]
1688     _PAGE_SIZE = 100
1689     _DESCRIPTION_KEY = 'synopsis'
1690
1691     def _get_episode_image(self, episode):
1692         return self._get_default(episode, 'image')
1693
1694     def _get_episode_field(self, episode, field):
1695         return self._get_default(episode, field)
1696
1697     @staticmethod
1698     def _get_elements(data):
1699         return data['entities']['results']
1700
1701     @staticmethod
1702     def _get_episode(element):
1703         return element.get('episode') or {}
1704
1705     def _call_api(self, pid, per_page, page=1, series_id=None):
1706         variables = {
1707             'id': pid,
1708             'page': page,
1709             'perPage': per_page,
1710         }
1711         if series_id:
1712             variables['sliceId'] = series_id
1713         return self._download_json(
1714             'https://graph.ibl.api.bbc.co.uk/', pid, headers={
1715                 'Content-Type': 'application/json',
1716             }, data=json.dumps({
1717                 'id': '5692d93d5aac8d796a0305e895e61551',
1718                 'variables': variables,
1719             }).encode())['data']['programme']
1720
1721     @staticmethod
1722     def _get_playlist_data(data):
1723         return data
1724
1725     def _get_playlist_title(self, data):
1726         return self._get_default(data, 'title')
1727
1728
1729 class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
1730     IE_NAME = 'bbc.co.uk:iplayer:group'
1731     _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
1732     _TESTS = [{
1733         # Available for over a year unlike 30 days for most other programmes
1734         'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
1735         'info_dict': {
1736             'id': 'p02tcc32',
1737             'title': 'Bohemian Icons',
1738             'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
1739         },
1740         'playlist_mincount': 10,
1741     }, {
1742         # all pages
1743         'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
1744         'info_dict': {
1745             'id': 'p081d7j7',
1746             'title': 'Music in Scotland',
1747             'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1748         },
1749         'playlist_mincount': 47,
1750     }, {
1751         # explicit page
1752         'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
1753         'info_dict': {
1754             'id': 'p081d7j7',
1755             'title': 'Music in Scotland',
1756             'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
1757         },
1758         'playlist_mincount': 11,
1759     }]
1760     _PAGE_SIZE = 200
1761     _DESCRIPTION_KEY = 'synopses'
1762
1763     def _get_episode_image(self, episode):
1764         return self._get_default(episode, 'images', 'standard')
1765
1766     def _get_episode_field(self, episode, field):
1767         return episode.get(field)
1768
1769     @staticmethod
1770     def _get_elements(data):
1771         return data['elements']
1772
1773     @staticmethod
1774     def _get_episode(element):
1775         return element
1776
1777     def _call_api(self, pid, per_page, page=1, series_id=None):
1778         return self._download_json(
1779             f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes',
1780             pid, query={
1781                 'page': page,
1782                 'per_page': per_page,
1783             })['group_episodes']
1784
1785     @staticmethod
1786     def _get_playlist_data(data):
1787         return data['group']
1788
1789     def _get_playlist_title(self, data):
1790         return data.get('title')
1791
1792
1793 class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
1794     IE_NAME = 'bbc.co.uk:playlist'
1795     _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)'
1796     _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
1797     _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
1798     _TESTS = [{
1799         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1800         'info_dict': {
1801             'id': 'b05rcz9v',
1802             'title': 'The Disappearance - Clips - BBC Four',
1803             'description': 'French thriller serial about a missing teenager.',
1804         },
1805         'playlist_mincount': 7,
1806     }, {
1807         # multipage playlist, explicit page
1808         'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
1809         'info_dict': {
1810             'id': 'b00mfl7n',
1811             'title': 'Frozen Planet - Clips - BBC One',
1812             'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1813         },
1814         'playlist_mincount': 24,
1815     }, {
1816         # multipage playlist, all pages
1817         'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
1818         'info_dict': {
1819             'id': 'b00mfl7n',
1820             'title': 'Frozen Planet - Clips - BBC One',
1821             'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
1822         },
1823         'playlist_mincount': 142,
1824     }, {
1825         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
1826         'only_matching': True,
1827     }, {
1828         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
1829         'only_matching': True,
1830     }, {
1831         'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
1832         'only_matching': True,
1833     }]
1834
1835     def _extract_title_and_description(self, webpage):
1836         title = self._og_search_title(webpage, fatal=False)
1837         description = self._og_search_description(webpage)
1838         return title, description