- preload_state = self._parse_json(self._search_regex(
- r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), playlist_id, fatal=False)
- if preload_state:
- current_programme = preload_state.get('programmes', {}).get('current') or {}
- programme_id = current_programme.get('id')
- if current_programme and programme_id and current_programme.get('type') == 'playable_item':
- title = current_programme.get('titles', {}).get('tertiary') or playlist_title
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- synopses = current_programme.get('synopses') or {}
- network = current_programme.get('network') or {}
- duration = int_or_none(
- current_programme.get('duration', {}).get('value'))
- thumbnail = None
- image_url = current_programme.get('image_url')
- if image_url:
- thumbnail = image_url.replace('{recipe}', 'raw')
+ # various PRELOADED_STATE JSON
+ preload_state = self._search_json(
+ r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
+ 'preload state', playlist_id, transform_source=js_to_json, default={})
+ # PRELOADED_STATE with current programmme
+ current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
+ programme_id = traverse_obj(current_programme, ('id', {str}))
+ if programme_id and current_programme.get('type') == 'playable_item':
+ title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'formats': formats,
+ **traverse_obj(current_programme, {
+ 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
+ 'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
+ 'duration': ('duration', 'value', {int_or_none}),
+ 'uploader': ('network', 'short_title', {str}),
+ 'uploader_id': ('network', 'id', {str}),
+ 'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
+ 'series': ('titles', 'primary', {str}),
+ }),
+ 'subtitles': subtitles,
+ 'chapters': traverse_obj(preload_state, (
+ 'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
+ 'title': ('titles', {lambda x: join_nonempty(
+ 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
+ 'start_time': ('offset', 'start', {float_or_none}),
+ 'end_time': ('offset', 'end', {float_or_none}),
+ }),
+ ),
+ }
+
+ # PWA_PRELOADED_STATE with article video asset
+ asset_id = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
+ 'assetVideo', 0, {str}, any))
+ if asset_id:
+ video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
+ if video_id:
+ article = traverse_obj(preload_state, (
+ 'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
+
+ def image_url(image_id):
+ return traverse_obj(preload_state, (
+ 'entities', 'images', image_id, 'url',
+ {lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
+
+ formats, subtitles = self._download_media_selector(video_id)