[ie/TrtWorld] Add extractor (#8701)

[yt-dlp.git] / yt_dlp / extractor / bbc.py
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py

index 92e6f1bea37da50f2d4a838dc4ac0596663bb537..015af9e1d616c9b71a1ae0eb6f3b15490b6218fd 100644 (file)
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -1,27 +1,30 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import functools
  import itertools
+import json
  import re
+import xml.etree.ElementTree
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_etree_Element,
-    compat_HTTPError,
-    compat_urlparse,
-)
+from ..compat import compat_str, compat_urlparse
+from ..networking.exceptions import HTTPError
  from ..utils import (
      ExtractorError,
+    OnDemandPagedList,
      clean_html,
      dict_get,
      float_or_none,
      get_element_by_class,
      int_or_none,
+    join_nonempty,
      js_to_json,
      parse_duration,
      parse_iso8601,
+    parse_qs,
+    strip_or_none,
+    traverse_obj,
      try_get,
      unescapeHTML,
+    unified_timestamp,
      url_or_none,
      urlencode_postdata,
      urljoin,
@@ -31,7 +34,7 @@
  class BBCCoUkIE(InfoExtractor):
      IE_NAME = 'bbc.co.uk'
      IE_DESC = 'BBC iPlayer'
-    _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
+    _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})'
      _VALID_URL = r'''(?x)
                      https?://
                          (?:www\.)?bbc\.co\.uk/
@@ -40,11 +43,11 @@ class BBCCoUkIE(InfoExtractor):
                              iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
                              music/(?:clips|audiovideo/popular)[/#]|
                              radio/player/|
-                            sounds/play/|
                              events/[^/]+/play/[^/]+/
                          )
                          (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
                      ''' % _ID_REGEX
+    _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)']
  
      _LOGIN_URL = 'https://account.bbc.com/signin'
      _NETRC_MACHINE = 'bbc'
@@ -216,20 +219,6 @@ class BBCCoUkIE(InfoExtractor):
                  # rtmp download
                  'skip_download': True,
              },
-        }, {
-            'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
-            'note': 'Audio',
-            'info_dict': {
-                'id': 'm0007jz9',
-                'ext': 'mp4',
-                'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
-                'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
-                'duration': 9840,
-            },
-            'params': {
-                # rtmp download
-                'skip_download': True,
-            }
          }, {
              'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
              'only_matching': True,
@@ -256,11 +245,7 @@ class BBCCoUkIE(InfoExtractor):
              'only_matching': True,
          }]
  
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
-
+    def _perform_login(self, username, password):
          login_page = self._download_webpage(
              self._LOGIN_URL, None, 'Downloading signin page')
  
@@ -279,16 +264,13 @@ def _login(self):
              post_url, None, 'Logging in', data=urlencode_postdata(login_form),
              headers={'Referer': self._LOGIN_URL})
  
-        if self._LOGIN_URL in urlh.geturl():
+        if self._LOGIN_URL in urlh.url:
              error = clean_html(get_element_by_class('form-message', response))
              if error:
                  raise ExtractorError(
                      'Unable to login: %s' % error, expected=True)
              raise ExtractorError('Unable to log in')
  
-    def _real_initialize(self):
-        self._login()
-
      class MediaSelectionError(Exception):
          def __init__(self, id):
              self.id = id
@@ -317,7 +299,7 @@ def _get_subtitles(self, media, programme_id):
                  continue
              captions = self._download_xml(
                  cc_url, programme_id, 'Downloading captions', fatal=False)
-            if not isinstance(captions, compat_etree_Element):
+            if not isinstance(captions, xml.etree.ElementTree.Element):
                  continue
              subtitles['en'] = [
                  {
@@ -335,16 +317,25 @@ def _raise_extractor_error(self, media_selection_error):
  
      def _download_media_selector(self, programme_id):
          last_exception = None
+        formats, subtitles = [], {}
          for media_set in self._MEDIA_SETS:
              try:
-                return self._download_media_selector_url(
+                fmts, subs = self._download_media_selector_url(
                      self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
+                formats.extend(fmts)
+                if subs:
+                    self._merge_subtitles(subs, target=subtitles)
              except BBCCoUkIE.MediaSelectionError as e:
                  if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                      last_exception = e
                      continue
                  self._raise_extractor_error(e)
-        self._raise_extractor_error(last_exception)
+        if last_exception:
+            if formats or subtitles:
+                self.report_warning(f'{self.IE_NAME} returned error: {last_exception.id}')
+            else:
+                self._raise_extractor_error(last_exception)
+        return formats, subtitles
  
      def _download_media_selector_url(self, url, programme_id=None):
          media_selection = self._download_json(
@@ -387,9 +378,17 @@ def _process_media_selector(self, media_selection, programme_id):
                          formats.extend(self._extract_mpd_formats(
                              href, programme_id, mpd_id=format_id, fatal=False))
                      elif transfer_format == 'hls':
-                        formats.extend(self._extract_m3u8_formats(
-                            href, programme_id, ext='mp4', entry_protocol='m3u8_native',
-                            m3u8_id=format_id, fatal=False))
+                        # TODO: let expected_status be passed into _extract_xxx_formats() instead
+                        try:
+                            fmts = self._extract_m3u8_formats(
+                                href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+                                m3u8_id=format_id, fatal=False)
+                        except ExtractorError as e:
+                            if not (isinstance(e.exc_info[1], HTTPError)
+                                    and e.exc_info[1].status in (403, 404)):
+                                raise
+                            fmts = []
+                        formats.extend(fmts)
                      elif transfer_format == 'hds':
                          formats.extend(self._extract_f4m_formats(
                              href, programme_id, f4m_id=format_id, fatal=False))
@@ -444,9 +443,10 @@ def _download_playlist(self, playlist_id):
              playlist = self._download_json(
                  'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
                  playlist_id, 'Downloading playlist JSON')
+            formats = []
+            subtitles = {}
  
-            version = playlist.get('defaultAvailableVersion')
-            if version:
+            for version in playlist.get('allAvailableVersions', []):
                  smp_config = version['smpConfig']
                  title = smp_config['title']
                  description = smp_config['summary']
@@ -456,10 +456,19 @@ def _download_playlist(self, playlist_id):
                          continue
                      programme_id = item.get('vpid')
                      duration = int_or_none(item.get('duration'))
-                    formats, subtitles = self._download_media_selector(programme_id)
-                return programme_id, title, description, duration, formats, subtitles
+                    version_formats, version_subtitles = self._download_media_selector(programme_id)
+                    types = version['types']
+                    for f in version_formats:
+                        f['format_note'] = ', '.join(types)
+                        if any('AudioDescribed' in x for x in types):
+                            f['language_preference'] = -10
+                    formats += version_formats
+                    for tag, subformats in (version_subtitles or {}).items():
+                        subtitles.setdefault(tag, []).extend(subformats)
+
+            return programme_id, title, description, duration, formats, subtitles
          except ExtractorError as ee:
-            if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
+            if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
                  raise
  
          # fallback to legacy playlist
@@ -562,8 +571,6 @@ def _real_extract(self, url):
          else:
              programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
  
-        self._sort_formats(formats)
-
          return {
              'id': programme_id,
              'title': title,
@@ -575,14 +582,19 @@ def _real_extract(self, url):
          }
  
  
-class BBCIE(BBCCoUkIE):
+class BBCIE(BBCCoUkIE):  # XXX: Do not subclass from concrete IE
      IE_NAME = 'bbc'
      IE_DESC = 'BBC'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
+    _VALID_URL = r'''(?x)
+        https?://(?:www\.)?(?:
+            bbc\.(?:com|co\.uk)|
+            bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd\.onion|
+            bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad\.onion
+        )/(?:[^/]+/)+(?P<id>[^/#?]+)'''
  
      _MEDIA_SETS = [
-        'mobile-tablet-main',
          'pc',
+        'mobile-tablet-main',
      ]
  
      _TESTS = [{
@@ -756,23 +768,44 @@ class BBCIE(BBCCoUkIE):
          'only_matching': True,
      }, {
          # custom redirection to www.bbc.com
+        # also, video with window.__INITIAL_DATA__
          'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'p02xzws1',
+            'ext': 'mp4',
+            'title': "Pluto may have 'nitrogen glaciers'",
+            'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1437785037,
+            'upload_date': '20150725',
+        },
+    }, {
+        # video with window.__INITIAL_DATA__ and value as JSON string
+        'url': 'https://www.bbc.com/news/av/world-europe-59468682',
+        'info_dict': {
+            'id': 'p0b71qth',
+            'ext': 'mp4',
+            'title': 'Why France is making this woman a national hero',
+            'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1638230731,
+            'upload_date': '20211130',
+        },
      }, {
          # single video article embedded with data-media-vpid
          'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
          'only_matching': True,
      }, {
+        # bbcthreeConfig
          'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
          'info_dict': {
              'id': 'p06556y7',
              'ext': 'mp4',
-            'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
-            'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+            'title': 'Things Not To Say to people that live on council estates',
+            'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
+            'duration': 360,
+            'thumbnail': r're:https?://.+/.+\.jpg',
          },
-        'params': {
-            'skip_download': True,
-        }
      }, {
          # window.__PRELOADED_STATE__
          'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
@@ -807,11 +840,31 @@ class BBCIE(BBCCoUkIE):
              'upload_date': '20190604',
              'categories': ['Psychology'],
          },
+    }, {
+        # BBC Sounds
+        'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
+        'info_dict': {
+            'id': 'm001q789',
+            'ext': 'mp4',
+            'title': 'The Night Tracks Mix - Music for the darkling hour',
+            'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
+            'chapters': 'count:8',
+            'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
+            'uploader': 'Radio 3',
+            'duration': 1800,
+            'uploader_id': 'bbc_radio_three',
+        },
+    }, {  # onion routes
+        'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.bbcweb3hytmzhn5d532owbu6oqadra5z3ar726vq5kgwwn6aucdccrad.onion/sport/av/football/63195681',
+        'only_matching': True,
      }]
  
      @classmethod
      def suitable(cls, url):
-        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+        EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
          return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
                  else super(BBCIE, cls).suitable(url))
  
@@ -845,7 +898,6 @@ def _extract_from_media_meta(self, media_meta, video_id):
      def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
          programme_id, title, description, duration, formats, subtitles = \
              self._process_legacy_playlist_url(url, playlist_id)
-        self._sort_formats(formats)
          return {
              'id': programme_id,
              'title': title,
@@ -864,13 +916,8 @@ def _real_extract(self, url):
          json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
          timestamp = json_ld_info.get('timestamp')
  
-        playlist_title = json_ld_info.get('title')
-        if not playlist_title:
-            playlist_title = self._og_search_title(
-                webpage, default=None) or self._html_search_regex(
-                r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
-            if playlist_title:
-                playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+        playlist_title = json_ld_info.get('title') or re.sub(
+            r'(.+)\s*-\s*BBC.*?$', r'\1', self._generic_title('', webpage, default='')).strip() or None
  
          playlist_description = json_ld_info.get(
              'description') or self._og_search_description(webpage, default=None)
@@ -914,7 +961,6 @@ def _real_extract(self, url):
                              duration = int_or_none(items[0].get('duration'))
                              programme_id = items[0].get('vpid')
                              formats, subtitles = self._download_media_selector(programme_id)
-                            self._sort_formats(formats)
                              entries.append({
                                  'id': programme_id,
                                  'title': title,
@@ -947,11 +993,10 @@ def _real_extract(self, url):
                                      # Some playlist URL may fail with 500, at the same time
                                      # the other one may work fine (e.g.
                                      # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
-                                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+                                    if isinstance(e.cause, HTTPError) and e.cause.status == 500:
                                          continue
                                      raise
                              if entry:
-                                self._sort_formats(entry['formats'])
                                  entries.append(entry)
  
          if entries:
@@ -975,7 +1020,6 @@ def _real_extract(self, url):
  
          if programme_id:
              formats, subtitles = self._download_media_selector(programme_id)
-            self._sort_formats(formats)
              # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
              digital_data = self._parse_json(
                  self._search_regex(
@@ -1007,7 +1051,6 @@ def _real_extract(self, url):
              if version_id:
                  title = smp_data['title']
                  formats, subtitles = self._download_media_selector(version_id)
-                self._sort_formats(formats)
                  image_url = smp_data.get('holdingImageURL')
                  display_date = init_data.get('displayDate')
                  topic_title = init_data.get('topicTitle')
@@ -1049,7 +1092,6 @@ def _real_extract(self, url):
                      continue
                  title = lead_media.get('title') or self._og_search_title(webpage)
                  formats, subtitles = self._download_media_selector(programme_id)
-                self._sort_formats(formats)
                  description = lead_media.get('summary')
                  uploader = lead_media.get('masterBrand')
                  uploader_id = lead_media.get('mid')
@@ -1078,7 +1120,6 @@ def _real_extract(self, url):
              if current_programme and programme_id and current_programme.get('type') == 'playable_item':
                  title = current_programme.get('titles', {}).get('tertiary') or playlist_title
                  formats, subtitles = self._download_media_selector(programme_id)
-                self._sort_formats(formats)
                  synopses = current_programme.get('synopses') or {}
                  network = current_programme.get('network') or {}
                  duration = int_or_none(
@@ -1097,6 +1138,13 @@ def _real_extract(self, url):
                      'uploader_id': network.get('id'),
                      'formats': formats,
                      'subtitles': subtitles,
+                    'chapters': traverse_obj(preload_state, (
+                        'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
+                            'title': ('titles', {lambda x: join_nonempty(
+                                'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+                            'start_time': ('offset', 'start', {float_or_none}),
+                            'end_time': ('offset', 'end', {float_or_none}),
+                        })) or None,
                  }
  
          bbc3_config = self._parse_json(
@@ -1111,7 +1159,6 @@ def _real_extract(self, url):
              clip_title = clip.get('title')
              if clip_vpid and clip_title:
                  formats, subtitles = self._download_media_selector(clip_vpid)
-                self._sort_formats(formats)
                  return {
                      'id': clip_vpid,
                      'title': clip_title,
@@ -1133,7 +1180,6 @@ def _real_extract(self, url):
                      if not programme_id:
                          continue
                      formats, subtitles = self._download_media_selector(programme_id)
-                    self._sort_formats(formats)
                      entries.append({
                          'id': programme_id,
                          'title': playlist_title,
@@ -1145,9 +1191,16 @@ def _real_extract(self, url):
                  return self.playlist_result(
                      entries, playlist_id, playlist_title, playlist_description)
  
-        initial_data = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
+        initial_data = self._search_regex(
+            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
+            'quoted preload state', default=None)
+        if initial_data is None:
+            initial_data = self._search_regex(
+                r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
+                'preload state', default='{}')
+        else:
+            initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
+        initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
          if initial_data:
              def parse_media(media):
                  if not media:
@@ -1158,21 +1211,40 @@ def parse_media(media):
                      if not (item_id and item_title):
                          continue
                      formats, subtitles = self._download_media_selector(item_id)
-                    self._sort_formats(formats)
+                    item_desc = None
+                    blocks = try_get(media, lambda x: x['summary']['blocks'], list)
+                    if blocks:
+                        summary = []
+                        for block in blocks:
+                            text = try_get(block, lambda x: x['model']['text'], compat_str)
+                            if text:
+                                summary.append(text)
+                        if summary:
+                            item_desc = '\n\n'.join(summary)
+                    item_time = None
+                    for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
+                        if try_get(meta, lambda x: x['label']) == 'Published':
+                            item_time = unified_timestamp(meta.get('timestamp'))
+                            break
                      entries.append({
                          'id': item_id,
                          'title': item_title,
                          'thumbnail': item.get('holdingImageUrl'),
                          'formats': formats,
                          'subtitles': subtitles,
+                        'timestamp': item_time,
+                        'description': strip_or_none(item_desc),
                      })
              for resp in (initial_data.get('data') or {}).values():
                  name = resp.get('name')
                  if name == 'media-experience':
                      parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                  elif name == 'article':
-                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
-                        if block.get('type') != 'media':
+                    for block in (try_get(resp,
+                                          (lambda x: x['data']['blocks'],
+                                           lambda x: x['data']['content']['model']['blocks'],),
+                                          list) or []):
+                        if block.get('type') not in ['media', 'video']:
                              continue
                          parse_media(block.get('model'))
              return self.playlist_result(
@@ -1237,9 +1309,8 @@ def extract_all(pattern):
          entries = []
          for num, media_meta in enumerate(medias, start=1):
              formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
-            if not formats:
+            if not formats and not self.get_param('ignore_no_formats'):
                  continue
-            self._sort_formats(formats)
  
              video_id = media_meta.get('externalId')
              if not video_id:
@@ -1338,21 +1409,149 @@ def _real_extract(self, url):
              playlist_id, title, description)
  
  
-class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
-    IE_NAME = 'bbc.co.uk:iplayer:playlist'
-    _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
-    _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
-    _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
+    _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+
+    @staticmethod
+    def _get_default(episode, key, default_key='default'):
+        return try_get(episode, lambda x: x[key][default_key])
+
+    def _get_description(self, data):
+        synopsis = data.get(self._DESCRIPTION_KEY) or {}
+        return dict_get(synopsis, ('large', 'medium', 'small'))
+
+    def _fetch_page(self, programme_id, per_page, series_id, page):
+        elements = self._get_elements(self._call_api(
+            programme_id, per_page, page + 1, series_id))
+        for element in elements:
+            episode = self._get_episode(element)
+            episode_id = episode.get('id')
+            if not episode_id:
+                continue
+            thumbnail = None
+            image = self._get_episode_image(episode)
+            if image:
+                thumbnail = image.replace('{recipe}', 'raw')
+            category = self._get_default(episode, 'labels', 'category')
+            yield {
+                '_type': 'url',
+                'id': episode_id,
+                'title': self._get_episode_field(episode, 'subtitle'),
+                'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
+                'thumbnail': thumbnail,
+                'description': self._get_description(episode),
+                'categories': [category] if category else None,
+                'series': self._get_episode_field(episode, 'title'),
+                'ie_key': BBCCoUkIE.ie_key(),
+            }
+
+    def _real_extract(self, url):
+        pid = self._match_id(url)
+        qs = parse_qs(url)
+        series_id = qs.get('seriesId', [None])[0]
+        page = qs.get('page', [None])[0]
+        per_page = 36 if page else self._PAGE_SIZE
+        fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
+        entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
+        playlist_data = self._get_playlist_data(self._call_api(pid, 1))
+        return self.playlist_result(
+            entries, pid, self._get_playlist_title(playlist_data),
+            self._get_description(playlist_data))
+
+
+class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:iplayer:episodes'
+    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
      _TESTS = [{
          'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
          'info_dict': {
              'id': 'b05rcz9v',
              'title': 'The Disappearance',
-            'description': 'French thriller serial about a missing teenager.',
+            'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
+        },
+        'playlist_mincount': 8,
+    }, {
+        # all seasons
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
+        'info_dict': {
+            'id': 'b094m5t9',
+            'title': 'Doctor Foster',
+            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
+        },
+        'playlist_mincount': 10,
+    }, {
+        # explicit season
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
+        'info_dict': {
+            'id': 'b094m5t9',
+            'title': 'Doctor Foster',
+            'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
          },
-        'playlist_mincount': 6,
-        'skip': 'This programme is not currently available on BBC iPlayer',
+        'playlist_mincount': 5,
      }, {
+        # all pages
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
+        'info_dict': {
+            'id': 'm0004c4v',
+            'title': 'Beechgrove',
+            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+        },
+        'playlist_mincount': 37,
+    }, {
+        # explicit page
+        'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
+        'info_dict': {
+            'id': 'm0004c4v',
+            'title': 'Beechgrove',
+            'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
+        },
+        'playlist_mincount': 1,
+    }]
+    _PAGE_SIZE = 100
+    _DESCRIPTION_KEY = 'synopsis'
+
+    def _get_episode_image(self, episode):
+        return self._get_default(episode, 'image')
+
+    def _get_episode_field(self, episode, field):
+        return self._get_default(episode, field)
+
+    @staticmethod
+    def _get_elements(data):
+        return data['entities']['results']
+
+    @staticmethod
+    def _get_episode(element):
+        return element.get('episode') or {}
+
+    def _call_api(self, pid, per_page, page=1, series_id=None):
+        variables = {
+            'id': pid,
+            'page': page,
+            'perPage': per_page,
+        }
+        if series_id:
+            variables['sliceId'] = series_id
+        return self._download_json(
+            'https://graph.ibl.api.bbc.co.uk/', pid, headers={
+                'Content-Type': 'application/json'
+            }, data=json.dumps({
+                'id': '5692d93d5aac8d796a0305e895e61551',
+                'variables': variables,
+            }).encode('utf-8'))['data']['programme']
+
+    @staticmethod
+    def _get_playlist_data(data):
+        return data
+
+    def _get_playlist_title(self, data):
+        return self._get_default(data, 'title')
+
+
+class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
+    IE_NAME = 'bbc.co.uk:iplayer:group'
+    _VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
+    _TESTS = [{
          # Available for over a year unlike 30 days for most other programmes
          'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
          'info_dict': {
@@ -1361,14 +1560,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
              'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
          },
          'playlist_mincount': 10,
+    }, {
+        # all pages
+        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
+        'info_dict': {
+            'id': 'p081d7j7',
+            'title': 'Music in Scotland',
+            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+        },
+        'playlist_mincount': 47,
+    }, {
+        # explicit page
+        'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
+        'info_dict': {
+            'id': 'p081d7j7',
+            'title': 'Music in Scotland',
+            'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
+        },
+        'playlist_mincount': 11,
      }]
+    _PAGE_SIZE = 200
+    _DESCRIPTION_KEY = 'synopses'
  
-    def _extract_title_and_description(self, webpage):
-        title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
-        description = self._search_regex(
-            r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
-            webpage, 'description', fatal=False, group='value')
-        return title, description
+    def _get_episode_image(self, episode):
+        return self._get_default(episode, 'images', 'standard')
+
+    def _get_episode_field(self, episode, field):
+        return episode.get(field)
+
+    @staticmethod
+    def _get_elements(data):
+        return data['elements']
+
+    @staticmethod
+    def _get_episode(element):
+        return element
+
+    def _call_api(self, pid, per_page, page=1, series_id=None):
+        return self._download_json(
+            'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
+            pid, query={
+                'page': page,
+                'per_page': per_page,
+            })['group_episodes']
+
+    @staticmethod
+    def _get_playlist_data(data):
+        return data['group']
+
+    def _get_playlist_title(self, data):
+        return data.get('title')
  
  
  class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):