[ie/crunchyroll] Fix stream extraction (#10005)

[yt-dlp.git] / yt_dlp / extractor / tenplay.py
diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py

index 633032e310c5a55698f9ae5004ebcfaa96980a8d..11cc5705e9539294d40f74e8de81bf5cc926abc3 100644 (file)
--- a/yt_dlp/extractor/tenplay.py
+++ b/yt_dlp/extractor/tenplay.py
@@ -1,12 +1,11 @@
-from datetime import datetime
  import base64
+import datetime as dt
+import functools
+import itertools
  
  from .common import InfoExtractor
-from ..utils import (
-    HEADRequest,
-    int_or_none,
-    urlencode_postdata,
-)
+from ..networking import HEADRequest
+from ..utils import int_or_none, traverse_obj, urlencode_postdata, urljoin
  
  
  class TenPlayIE(InfoExtractor):
@@ -21,7 +20,8 @@ class TenPlayIE(InfoExtractor):
              'alt_title': 'Nathan Borg Is The First Aussie Actor With A Cochlear Implant To Join Neighbours',
              'description': 'md5:a02d0199c901c2dd4c796f1e7dd0de43',
              'duration': 186,
-            'season': 39,
+            'season': 'Season 39',
+            'season_number': 39,
              'series': 'Neighbours',
              'thumbnail': r're:https://.*\.jpg',
              'uploader': 'Channel 10',
@@ -70,7 +70,7 @@ def _get_bearer_token(self, video_id):
          username, password = self._get_login_info()
          if username is None or password is None:
              self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
-        _timestamp = datetime.now().strftime('%Y%m%d000000')
+        _timestamp = dt.datetime.now().strftime('%Y%m%d000000')
          _auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
          data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
              'X-Network-Ten-Auth': _auth_header,
@@ -94,7 +94,7 @@ def _real_extract(self, url):
              data.get('playbackApiEndpoint'), content_id, 'Downloading video JSON',
              headers=headers).get('source')
          m3u8_url = self._request_webpage(HEADRequest(
-            _video_url), content_id).geturl()
+            _video_url), content_id).url
          if '10play-not-in-oz' in m3u8_url:
              self.raise_geo_restricted(countries=['AU'])
          formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
@@ -109,10 +109,62 @@ def _real_extract(self, url):
              'description': data.get('description'),
              'age_limit': self._AUS_AGES.get(data.get('classification')),
              'series': data.get('tvShow'),
-            'season': int_or_none(data.get('season')),
+            'season_number': int_or_none(data.get('season')),
              'episode_number': int_or_none(data.get('episode')),
              'timestamp': data.get('published'),
              'thumbnail': data.get('imageUrl'),
              'uploader': 'Channel 10',
              'uploader_id': '2199827728001',
          }
+
+
+class TenPlaySeasonIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
+    _TESTS = [{
+        'url': 'https://10play.com.au/masterchef/episodes/season-14',
+        'info_dict': {
+            'title': 'Season 14',
+            'id': 'MjMyOTIy',
+        },
+        'playlist_mincount': 64,
+    }, {
+        'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022',
+        'info_dict': {
+            'title': 'Season 2022',
+            'id': 'Mjc0OTIw',
+        },
+        'playlist_mincount': 256,
+    }]
+
+    def _entries(self, load_more_url, display_id=None):
+        skip_ids = []
+        for page in itertools.count(1):
+            episodes_carousel = self._download_json(
+                load_more_url, display_id, query={'skipIds[]': skip_ids},
+                note=f'Fetching episodes page {page}')
+
+            episodes_chunk = episodes_carousel['items']
+            skip_ids.extend(ep['id'] for ep in episodes_chunk)
+
+            for ep in episodes_chunk:
+                yield ep['cardLink']
+            if not episodes_carousel['hasMore']:
+                break
+
+    def _real_extract(self, url):
+        show, season = self._match_valid_url(url).group('show', 'season')
+        season_info = self._download_json(
+            f'https://10play.com.au/api/shows/{show}/episodes/{season}', f'{show}/{season}')
+
+        episodes_carousel = traverse_obj(season_info, (
+            'content', 0, 'components', (
+                lambda _, v: v['title'].lower() == 'episodes',
+                (..., {dict}),
+            )), get_all=False) or {}
+
+        playlist_id = episodes_carousel['tpId']
+
+        return self.playlist_from_matches(
+            self._entries(urljoin(url, episodes_carousel['loadMoreUrl']), playlist_id),
+            playlist_id, traverse_obj(season_info, ('content', 0, 'title', {str})),
+            getter=functools.partial(urljoin, url))