[yt-dlp.git] / yt_dlp / extractor / nebula.py

import itertools
import json

from .art19 import Art19IE
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
    ExtractorError,
    int_or_none,
    make_archive_id,
    parse_iso8601,
    smuggle_url,
    try_call,
    unsmuggle_url,
    update_url_query,
    url_or_none,
    urljoin,
)
from ..utils.traversal import traverse_obj

_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'


class NebulaBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'watchnebula'
    _token = _api_token = None

    def _perform_login(self, username, password):
        try:
            response = self._download_json(
                'https://nebula.tv/auth/login/', None,
                'Logging in to Nebula', 'Login failed',
                data=json.dumps({'email': username, 'password': password}).encode(),
                headers={'content-type': 'application/json'})
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                raise ExtractorError('Login failed: Invalid username or password', expected=True)
            raise
        self._api_token = traverse_obj(response, ('key', {str}))
        if not self._api_token:
            raise ExtractorError('Login failed: No token')

    def _call_api(self, *args, **kwargs):
        if self._token:
            kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
        try:
            return self._download_json(*args, **kwargs)
        except ExtractorError as e:
            if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
                raise
            self.to_screen(
                f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
            self._real_initialize()
            if self._token:
                kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
            return self._download_json(*args, **kwargs)

    def _real_initialize(self):
        if not self._api_token:
            self._api_token = try_call(
                lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
        self._token = self._download_json(
            'https://users.api.nebula.app/api/v1/authorization/', None,
            headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
            note='Authorizing to Nebula', data=b'')['token']

    def _extract_formats(self, content_id, slug):
        for retry in (False, True):
            try:
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
                    slug, 'mp4', query={
                        'token': self._token,
                        'app_version': '23.10.0',
                        'platform': 'ios',
                    })
                return {'formats': fmts, 'subtitles': subs}
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                    self.raise_login_required()
                if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
                    self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
                    self._real_initialize()
                    continue
                raise

    def _extract_video_metadata(self, episode):
        channel_url = traverse_obj(
            episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
        return {
            'id': episode['id'].partition(':')[2],
            **traverse_obj(episode, {
                'display_id': 'slug',
                'title': 'title',
                'description': 'description',
                'timestamp': ('published_at', {parse_iso8601}),
                'duration': ('duration', {int_or_none}),
                'channel_id': 'channel_slug',
                'uploader_id': 'channel_slug',
                'channel': 'channel_title',
                'uploader': 'channel_title',
                'series': 'channel_title',
                'creator': 'channel_title',
                'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
                'episode_number': ('order', {int_or_none}),
                # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
                '_old_archive_ids': ('zype_id', {lambda x: [
                    make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
            }),
            'channel_url': channel_url,
            'uploader_url': channel_url,
        }


class NebulaIE(NebulaBaseIE):
    IE_NAME = 'nebula:video'
    _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
        'info_dict': {
            'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
            'ext': 'mp4',
            'title': 'That Time Disney Remade Beauty and the Beast',
            'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
            'upload_date': '20180731',
            'timestamp': 1533009600,
            'channel': 'Lindsay Ellis',
            'channel_id': 'lindsayellis',
            'uploader': 'Lindsay Ellis',
            'uploader_id': 'lindsayellis',
            'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
            'series': 'Lindsay Ellis',
            'display_id': 'that-time-disney-remade-beauty-and-the-beast',
            'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
            'creator': 'Lindsay Ellis',
            'duration': 2212,
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
        'md5': 'd05739cf6c38c09322422f696b569c23',
        'info_dict': {
            'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
            'ext': 'mp4',
            'title': 'Landing Craft - How The Allies Got Ashore',
            'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
            'upload_date': '20200327',
            'timestamp': 1585348140,
            'channel': 'Real Engineering — The Logistics of D-Day',
            'channel_id': 'd-day',
            'uploader': 'Real Engineering — The Logistics of D-Day',
            'uploader_id': 'd-day',
            'series': 'Real Engineering — The Logistics of D-Day',
            'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
            'creator': 'Real Engineering — The Logistics of D-Day',
            'duration': 841,
            'channel_url': 'https://nebula.tv/d-day',
            'uploader_url': 'https://nebula.tv/d-day',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
        'md5': 'ebe28a7ad822b9ee172387d860487868',
        'info_dict': {
            'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
            'ext': 'mp4',
            'title': 'Episode 1: The Draw',
            'description': r'contains:There’s free money on offer… if the players can all work together.',
            'upload_date': '20200323',
            'timestamp': 1584980400,
            'channel': 'Tom Scott Presents: Money',
            'channel_id': 'tom-scott-presents-money',
            'uploader': 'Tom Scott Presents: Money',
            'uploader_id': 'tom-scott-presents-money',
            'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
            'duration': 825,
            'channel_url': 'https://nebula.tv/tom-scott-presents-money',
            'series': 'Tom Scott Presents: Money',
            'display_id': 'money-episode-1-the-draw',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            'creator': 'Tom Scott Presents: Money',
            '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
        'only_matching': True,
    }, {
        'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
        'info_dict': {
            'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
            'ext': 'mp4',
            'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
            'title': 'Did the US Really Blow Up the NordStream Pipelines?',
            'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
            'upload_date': '20230223',
            'timestamp': 1677144070,
            'channel': 'TLDR News EU',
            'channel_id': 'tldrnewseu',
            'uploader': 'TLDR News EU',
            'uploader_id': 'tldrnewseu',
            'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
            'duration': 524,
            'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
            'series': 'TLDR News EU',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            'creator': 'TLDR News EU',
            '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        slug = self._match_id(url)
        url, smuggled_data = unsmuggle_url(url, {})
        if smuggled_data.get('id'):
            return {
                'id': smuggled_data['id'],
                'display_id': slug,
                'title': '',
                **self._extract_formats(smuggled_data['id'], slug),
            }

        metadata = self._call_api(
            f'https://content.api.nebula.app/content/videos/{slug}',
            slug, note='Fetching video metadata')
        return {
            **self._extract_video_metadata(metadata),
            **self._extract_formats(metadata['id'], slug),
        }


class NebulaClassIE(NebulaBaseIE):
    IE_NAME = 'nebula:media'
    _VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
    _TESTS = [{
        'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
        'info_dict': {
            'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
            'ext': 'mp4',
            'display_id': '14',
            'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
            'episode_number': 14,
            'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
            'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
            'duration': 646,
            'episode': 'Episode 14',
            'title': 'Photos, Sculpture, and Video',
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
        'info_dict': {
            'ext': 'mp3',
            'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
            'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
            'series_id': '335e8159-d663-491a-888f-1732285706ac',
            'modified_timestamp': 1599091504,
            'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
            'series': 'Extremities',
            'modified_date': '20200903',
            'upload_date': '20200902',
            'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
            'release_timestamp': 1571237958,
            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
            'duration': 1546.05714,
            'timestamp': 1599085608,
            'release_date': '20191016',
        },
    }, {
        'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
        'info_dict': {
            'ext': 'mp3',
            'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
            'episode_number': 1,
            'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
            'release_date': '20230304',
            'modified_date': '20230403',
            'series': 'The Layover',
            'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
            'modified_timestamp': 1680554566,
            'duration': 3130.46401,
            'release_timestamp': 1677943800,
            'title': 'The Layover — Episode 1',
            'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
            'upload_date': '20230303',
            'episode': 'Episode 1',
            'timestamp': 1677883672,
            'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
        },
    }]

    def _real_extract(self, url):
        slug, episode = self._match_valid_url(url).group('id', 'ep')
        url, smuggled_data = unsmuggle_url(url, {})
        if smuggled_data.get('id'):
            return {
                'id': smuggled_data['id'],
                'display_id': slug,
                'title': '',
                **self._extract_formats(smuggled_data['id'], slug),
            }

        metadata = self._call_api(
            f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
            slug, note='Fetching class/podcast metadata')
        content_type = metadata.get('type')
        if content_type == 'lesson':
            return {
                **self._extract_video_metadata(metadata),
                **self._extract_formats(metadata['id'], slug),
            }
        elif content_type == 'podcast_episode':
            episode_url = metadata['episode_url']
            if not episode_url and metadata.get('premium'):
                self.raise_login_required()

            if Art19IE.suitable(episode_url):
                return self.url_result(episode_url, Art19IE)
            return traverse_obj(metadata, {
                'id': ('id', {str}),
                'url': ('episode_url', {url_or_none}),
                'title': ('title', {str}),
                'description': ('description', {str}),
                'timestamp': ('published_at', {parse_iso8601}),
                'duration': ('duration', {int_or_none}),
                'channel_id': ('channel_id', {str}),
                'chnanel': ('channel_title', {str}),
                'thumbnail': ('assets', 'regular', {url_or_none}),
            })

        raise ExtractorError(f'Unexpected content type {content_type!r}')


class NebulaSubscriptionsIE(NebulaBaseIE):
    IE_NAME = 'nebula:subscriptions'
    _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
    _TESTS = [{
        'url': 'https://nebula.tv/myshows',
        'playlist_mincount': 1,
        'info_dict': {
            'id': 'myshows',
        },
    }]

    def _generate_playlist_entries(self):
        next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
            'following': 'true',
            'include': 'engagement',
            'ordering': '-published_at',
        })
        for page_num in itertools.count(1):
            channel = self._call_api(
                next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
            for episode in channel['results']:
                metadata = self._extract_video_metadata(episode)
                yield self.url_result(smuggle_url(
                    f'https://nebula.tv/videos/{metadata["display_id"]}',
                    {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
            next_url = channel.get('next')
            if not next_url:
                return

    def _real_extract(self, url):
        return self.playlist_result(self._generate_playlist_entries(), 'myshows')


class NebulaChannelIE(NebulaBaseIE):
    IE_NAME = 'nebula:channel'
    _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
    _TESTS = [{
        'url': 'https://nebula.tv/tom-scott-presents-money',
        'info_dict': {
            'id': 'tom-scott-presents-money',
            'title': 'Tom Scott Presents: Money',
            'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
        },
        'playlist_count': 5,
    }, {
        'url': 'https://nebula.tv/lindsayellis',
        'info_dict': {
            'id': 'lindsayellis',
            'title': 'Lindsay Ellis',
            'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://nebula.tv/johnnyharris',
        'info_dict': {
            'id': 'johnnyharris',
            'title': 'Johnny Harris',
            'description': 'I make videos about maps and many other things.',
        },
        'playlist_mincount': 90,
    }, {
        'url': 'https://nebula.tv/copyright-for-fun-and-profit',
        'info_dict': {
            'id': 'copyright-for-fun-and-profit',
            'title': 'Copyright for Fun and Profit',
            'description': 'md5:6690248223eed044a9f11cd5a24f9742',
        },
        'playlist_count': 23,
    }, {
        'url': 'https://nebula.tv/trussissuespodcast',
        'info_dict': {
            'id': 'trussissuespodcast',
            'title': 'The TLDR News Podcast',
            'description': 'md5:a08c4483bc0b705881d3e0199e721385',
        },
        'playlist_mincount': 80,
    }]

    def _generate_playlist_entries(self, collection_id, collection_slug):
        next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
        for page_num in itertools.count(1):
            episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
            for episode in episodes['results']:
                metadata = self._extract_video_metadata(episode)
                yield self.url_result(smuggle_url(
                    episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
                    {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
            next_url = episodes.get('next')
            if not next_url:
                break

    def _generate_class_entries(self, channel):
        for lesson in channel['lessons']:
            metadata = self._extract_video_metadata(lesson)
            yield self.url_result(smuggle_url(
                lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
                {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)

    def _generate_podcast_entries(self, collection_id, collection_slug):
        next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
        for page_num in itertools.count(1):
            episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')

            for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
                yield self.url_result(episode['share_url'], NebulaClassIE)
            next_url = episodes.get('next')
            if not next_url:
                break

    def _real_extract(self, url):
        collection_slug = self._match_id(url)
        channel = self._call_api(
            f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
            collection_slug, note='Retrieving channel')

        if channel.get('type') == 'class':
            entries = self._generate_class_entries(channel)
        elif channel.get('type') == 'podcast_channel':
            entries = self._generate_podcast_entries(channel['id'], collection_slug)
        else:
            entries = self._generate_playlist_entries(channel['id'], collection_slug)

        return self.playlist_result(
            entries=entries,
            playlist_id=collection_slug,
            playlist_title=channel.get('title'),
            playlist_description=channel.get('description'))
Commit	Line	Data
359df0fc	1	import itertools
bdc196a4	2	import json
bdc196a4	3
0de09c5b	4	from .art19 import Art19IE
359df0fc	5	from .common import InfoExtractor
3d2623a8	6	from ..networking.exceptions import HTTPError
45d82be6	7	from ..utils import (
	8	ExtractorError,
	9	int_or_none,
	10	make_archive_id,
	11	parse_iso8601,
	12	smuggle_url,
	13	try_call,
	14	unsmuggle_url,
	15	update_url_query,
	16	url_or_none,
	17	urljoin,
	18	)
	19	from ..utils.traversal import traverse_obj
359df0fc	20
cbfe2e5c	21	_BASE_URL_RE = r'https?://(?:www\.\|beta\.)?(?:watchnebula\.com\|nebula\.app\|nebula\.tv)'
4cca2eb1	22
359df0fc HH	23
	24	class NebulaBaseIE(InfoExtractor):
	25	_NETRC_MACHINE = 'watchnebula'
45d82be6	26	_token = _api_token = None
359df0fc	27
45d82be6	28	def _perform_login(self, username, password):
	29	try:
	30	response = self._download_json(
	31	'https://nebula.tv/auth/login/', None,
	32	'Logging in to Nebula', 'Login failed',
	33	data=json.dumps({'email': username, 'password': password}).encode(),
	34	headers={'content-type': 'application/json'})
	35	except ExtractorError as e:
	36	if isinstance(e.cause, HTTPError) and e.cause.status == 400:
	37	raise ExtractorError('Login failed: Invalid username or password', expected=True)
	38	raise
	39	self._api_token = traverse_obj(response, ('key', {str}))
	40	if not self._api_token:
	41	raise ExtractorError('Login failed: No token')
359df0fc	42
45d82be6	43	def _call_api(self, args, *kwargs):
	44	if self._token:
	45	kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
359df0fc	46	try:
45d82be6	47	return self._download_json(args, *kwargs)
	48	except ExtractorError as e:
	49	if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
359df0fc	50	raise
45d82be6	51	self.to_screen(
	52	f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
	53	self._real_initialize()
	54	if self._token:
	55	kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
	56	return self._download_json(args, *kwargs)
359df0fc	57
45d82be6	58	def _real_initialize(self):
	59	if not self._api_token:
	60	self._api_token = try_call(
	61	lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
	62	self._token = self._download_json(
	63	'https://users.api.nebula.app/api/v1/authorization/', None,
	64	headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
	65	note='Authorizing to Nebula', data=b'')['token']
bdc196a4	66
45d82be6	67	def _extract_formats(self, content_id, slug):
	68	for retry in (False, True):
	69	try:
	70	fmts, subs = self._extract_m3u8_formats_and_subtitles(
	71	f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
	72	slug, 'mp4', query={
	73	'token': self._token,
	74	'app_version': '23.10.0',
	75	'platform': 'ios',
	76	})
	77	return {'formats': fmts, 'subtitles': subs}
	78	except ExtractorError as e:
	79	if isinstance(e.cause, HTTPError) and e.cause.status == 401:
	80	self.raise_login_required()
	81	if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
	82	self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
	83	self._real_initialize()
	84	continue
	85	raise
359df0fc	86
45d82be6	87	def _extract_video_metadata(self, episode):
	88	channel_url = traverse_obj(
	89	episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
359df0fc	90	return {
45d82be6	91	'id': episode['id'].partition(':')[2],
	92	**traverse_obj(episode, {
	93	'display_id': 'slug',
	94	'title': 'title',
	95	'description': 'description',
	96	'timestamp': ('published_at', {parse_iso8601}),
	97	'duration': ('duration', {int_or_none}),
	98	'channel_id': 'channel_slug',
	99	'uploader_id': 'channel_slug',
	100	'channel': 'channel_title',
	101	'uploader': 'channel_title',
	102	'series': 'channel_title',
	103	'creator': 'channel_title',
	104	'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
	105	'episode_number': ('order', {int_or_none}),
	106	# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
	107	'_old_archive_ids': ('zype_id', {lambda x: [
	108	make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
	109	}),
	110	'channel_url': channel_url,
	111	'uploader_url': channel_url,
359df0fc HH	112	}
359df0fc HH	113
359df0fc HH	114
359df0fc HH	115	class NebulaIE(NebulaBaseIE):
0de09c5b	116	IE_NAME = 'nebula:video'
0de09c5b	117	_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
45d82be6	118	_TESTS = [{
	119	'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
	120	'info_dict': {
	121	'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
	122	'ext': 'mp4',
	123	'title': 'That Time Disney Remade Beauty and the Beast',
	124	'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
	125	'upload_date': '20180731',
	126	'timestamp': 1533009600,
	127	'channel': 'Lindsay Ellis',
	128	'channel_id': 'lindsayellis',
	129	'uploader': 'Lindsay Ellis',
	130	'uploader_id': 'lindsayellis',
	131	'uploader_url': r're:https://nebula\.(tv\|app)/lindsayellis',
	132	'series': 'Lindsay Ellis',
	133	'display_id': 'that-time-disney-remade-beauty-and-the-beast',
	134	'channel_url': r're:https://nebula\.(tv\|app)/lindsayellis',
	135	'creator': 'Lindsay Ellis',
	136	'duration': 2212,
	137	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	138	'_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
bdc196a4	139	},
45d82be6	140	'params': {'skip_download': 'm3u8'},
	141	}, {
	142	'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
	143	'md5': 'd05739cf6c38c09322422f696b569c23',
	144	'info_dict': {
	145	'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
	146	'ext': 'mp4',
	147	'title': 'Landing Craft - How The Allies Got Ashore',
	148	'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
	149	'upload_date': '20200327',
	150	'timestamp': 1585348140,
	151	'channel': 'Real Engineering — The Logistics of D-Day',
	152	'channel_id': 'd-day',
	153	'uploader': 'Real Engineering — The Logistics of D-Day',
	154	'uploader_id': 'd-day',
	155	'series': 'Real Engineering — The Logistics of D-Day',
	156	'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
	157	'creator': 'Real Engineering — The Logistics of D-Day',
	158	'duration': 841,
	159	'channel_url': 'https://nebula.tv/d-day',
	160	'uploader_url': 'https://nebula.tv/d-day',
	161	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	162	'_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
bdc196a4	163	},
45d82be6	164	'params': {'skip_download': 'm3u8'},
	165	}, {
	166	'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
	167	'md5': 'ebe28a7ad822b9ee172387d860487868',
	168	'info_dict': {
	169	'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
	170	'ext': 'mp4',
	171	'title': 'Episode 1: The Draw',
	172	'description': r'contains:There’s free money on offer… if the players can all work together.',
	173	'upload_date': '20200323',
	174	'timestamp': 1584980400,
	175	'channel': 'Tom Scott Presents: Money',
	176	'channel_id': 'tom-scott-presents-money',
	177	'uploader': 'Tom Scott Presents: Money',
	178	'uploader_id': 'tom-scott-presents-money',
	179	'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
	180	'duration': 825,
	181	'channel_url': 'https://nebula.tv/tom-scott-presents-money',
	182	'series': 'Tom Scott Presents: Money',
	183	'display_id': 'money-episode-1-the-draw',
	184	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	185	'creator': 'Tom Scott Presents: Money',
	186	'_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
bdc196a4	187	},
45d82be6	188	'params': {'skip_download': 'm3u8'},
	189	}, {
	190	'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
	191	'only_matching': True,
	192	}, {
	193	'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
	194	'info_dict': {
	195	'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
	196	'ext': 'mp4',
	197	'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
	198	'title': 'Did the US Really Blow Up the NordStream Pipelines?',
	199	'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
	200	'upload_date': '20230223',
	201	'timestamp': 1677144070,
	202	'channel': 'TLDR News EU',
	203	'channel_id': 'tldrnewseu',
	204	'uploader': 'TLDR News EU',
	205	'uploader_id': 'tldrnewseu',
	206	'uploader_url': r're:https://nebula\.(tv\|app)/tldrnewseu',
	207	'duration': 524,
	208	'channel_url': r're:https://nebula\.(tv\|app)/tldrnewseu',
	209	'series': 'TLDR News EU',
	210	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	211	'creator': 'TLDR News EU',
	212	'_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
cbfe2e5c	213	},
45d82be6	214	'params': {'skip_download': 'm3u8'},
	215	}, {
	216	'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
	217	'only_matching': True,
	218	}]
bdc196a4	219
359df0fc HH	220	def _real_extract(self, url):
359df0fc HH	221	slug = self._match_id(url)
45d82be6	222	url, smuggled_data = unsmuggle_url(url, {})
	223	if smuggled_data.get('id'):
	224	return {
	225	'id': smuggled_data['id'],
	226	'display_id': slug,
	227	'title': '',
	228	**self._extract_formats(smuggled_data['id'], slug),
	229	}
	230
	231	metadata = self._call_api(
	232	f'https://content.api.nebula.app/content/videos/{slug}',
	233	slug, note='Fetching video metadata')
	234	return {
	235	**self._extract_video_metadata(metadata),
	236	**self._extract_formats(metadata['id'], slug),
	237	}
	238
	239
	240	class NebulaClassIE(NebulaBaseIE):
0de09c5b	241	IE_NAME = 'nebula:media'
0de09c5b	242	_VALID_URL = rf'{_BASE_URL_RE}/(?!(?:myshows\|library\|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$\|[?#])'
45d82be6	243	_TESTS = [{
	244	'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
	245	'info_dict': {
	246	'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
	247	'ext': 'mp4',
	248	'display_id': '14',
	249	'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
	250	'episode_number': 14,
	251	'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
	252	'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
	253	'duration': 646,
	254	'episode': 'Episode 14',
	255	'title': 'Photos, Sculpture, and Video',
	256	},
	257	'params': {'skip_download': 'm3u8'},
0de09c5b	258	}, {
	259	'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
	260	'info_dict': {
	261	'ext': 'mp3',
	262	'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
	263	'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
	264	'series_id': '335e8159-d663-491a-888f-1732285706ac',
	265	'modified_timestamp': 1599091504,
	266	'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
	267	'series': 'Extremities',
	268	'modified_date': '20200903',
	269	'upload_date': '20200902',
	270	'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
	271	'release_timestamp': 1571237958,
	272	'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
	273	'duration': 1546.05714,
	274	'timestamp': 1599085608,
	275	'release_date': '20191016',
	276	},
	277	}, {
	278	'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
	279	'info_dict': {
	280	'ext': 'mp3',
	281	'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
	282	'episode_number': 1,
	283	'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
	284	'release_date': '20230304',
	285	'modified_date': '20230403',
	286	'series': 'The Layover',
	287	'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
	288	'modified_timestamp': 1680554566,
	289	'duration': 3130.46401,
	290	'release_timestamp': 1677943800,
	291	'title': 'The Layover — Episode 1',
	292	'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
	293	'upload_date': '20230303',
	294	'episode': 'Episode 1',
	295	'timestamp': 1677883672,
	296	'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
	297	},
45d82be6	298	}]
	299
	300	def _real_extract(self, url):
	301	slug, episode = self._match_valid_url(url).group('id', 'ep')
	302	url, smuggled_data = unsmuggle_url(url, {})
	303	if smuggled_data.get('id'):
	304	return {
	305	'id': smuggled_data['id'],
	306	'display_id': slug,
	307	'title': '',
	308	**self._extract_formats(smuggled_data['id'], slug),
	309	}
	310
	311	metadata = self._call_api(
	312	f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
0de09c5b	313	slug, note='Fetching class/podcast metadata')
	314	content_type = metadata.get('type')
	315	if content_type == 'lesson':
	316	return {
	317	**self._extract_video_metadata(metadata),
	318	**self._extract_formats(metadata['id'], slug),
	319	}
	320	elif content_type == 'podcast_episode':
	321	episode_url = metadata['episode_url']
	322	if not episode_url and metadata.get('premium'):
	323	self.raise_login_required()
	324
	325	if Art19IE.suitable(episode_url):
	326	return self.url_result(episode_url, Art19IE)
	327	return traverse_obj(metadata, {
	328	'id': ('id', {str}),
	329	'url': ('episode_url', {url_or_none}),
	330	'title': ('title', {str}),
	331	'description': ('description', {str}),
	332	'timestamp': ('published_at', {parse_iso8601}),
	333	'duration': ('duration', {int_or_none}),
	334	'channel_id': ('channel_id', {str}),
	335	'chnanel': ('channel_title', {str}),
	336	'thumbnail': ('assets', 'regular', {url_or_none}),
	337	})
	338
	339	raise ExtractorError(f'Unexpected content type {content_type!r}')
bdc196a4	340
bdc196a4	341
f3b3fe16 HH	342	class NebulaSubscriptionsIE(NebulaBaseIE):
f3b3fe16 HH	343	IE_NAME = 'nebula:subscriptions'
0de09c5b	344	_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows\|library/latest-videos)/?(?:$\|[?#])'
45d82be6	345	_TESTS = [{
	346	'url': 'https://nebula.tv/myshows',
	347	'playlist_mincount': 1,
	348	'info_dict': {
	349	'id': 'myshows',
f3b3fe16	350	},
45d82be6	351	}]
f3b3fe16 HH	352
f3b3fe16 HH	353	def _generate_playlist_entries(self):
45d82be6	354	next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
	355	'following': 'true',
	356	'include': 'engagement',
	357	'ordering': '-published_at',
	358	})
	359	for page_num in itertools.count(1):
	360	channel = self._call_api(
	361	next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
f3b3fe16	362	for episode in channel['results']:
45d82be6	363	metadata = self._extract_video_metadata(episode)
	364	yield self.url_result(smuggle_url(
	365	f'https://nebula.tv/videos/{metadata["display_id"]}',
	366	{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
	367	next_url = channel.get('next')
	368	if not next_url:
	369	return
f3b3fe16 HH	370
	371	def _real_extract(self, url):
	372	return self.playlist_result(self._generate_playlist_entries(), 'myshows')
	373
	374
	375	class NebulaChannelIE(NebulaBaseIE):
	376	IE_NAME = 'nebula:channel'
0de09c5b	377	_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows\|library\|videos)(?P<id>[\w-]+)/?(?:$\|[?#])'
45d82be6	378	_TESTS = [{
	379	'url': 'https://nebula.tv/tom-scott-presents-money',
	380	'info_dict': {
	381	'id': 'tom-scott-presents-money',
	382	'title': 'Tom Scott Presents: Money',
	383	'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
	384	},
	385	'playlist_count': 5,
	386	}, {
	387	'url': 'https://nebula.tv/lindsayellis',
	388	'info_dict': {
	389	'id': 'lindsayellis',
	390	'title': 'Lindsay Ellis',
	391	'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
	392	},
	393	'playlist_mincount': 2,
	394	}, {
	395	'url': 'https://nebula.tv/johnnyharris',
	396	'info_dict': {
	397	'id': 'johnnyharris',
	398	'title': 'Johnny Harris',
	399	'description': 'I make videos about maps and many other things.',
359df0fc	400	},
45d82be6	401	'playlist_mincount': 90,
	402	}, {
	403	'url': 'https://nebula.tv/copyright-for-fun-and-profit',
	404	'info_dict': {
	405	'id': 'copyright-for-fun-and-profit',
	406	'title': 'Copyright for Fun and Profit',
	407	'description': 'md5:6690248223eed044a9f11cd5a24f9742',
	408	},
	409	'playlist_count': 23,
0de09c5b	410	}, {
	411	'url': 'https://nebula.tv/trussissuespodcast',
	412	'info_dict': {
	413	'id': 'trussissuespodcast',
	414	'title': 'The TLDR News Podcast',
	415	'description': 'md5:a08c4483bc0b705881d3e0199e721385',
	416	},
	417	'playlist_mincount': 80,
45d82be6	418	}]
bdc196a4	419
45d82be6	420	def _generate_playlist_entries(self, collection_id, collection_slug):
	421	next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
	422	for page_num in itertools.count(1):
	423	episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
	424	for episode in episodes['results']:
	425	metadata = self._extract_video_metadata(episode)
	426	yield self.url_result(smuggle_url(
	427	episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
	428	{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
	429	next_url = episodes.get('next')
359df0fc HH	430	if not next_url:
359df0fc HH	431	break
45d82be6	432
	433	def _generate_class_entries(self, channel):
	434	for lesson in channel['lessons']:
	435	metadata = self._extract_video_metadata(lesson)
	436	yield self.url_result(smuggle_url(
	437	lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
	438	{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
bdc196a4	439
0de09c5b	440	def _generate_podcast_entries(self, collection_id, collection_slug):
	441	next_url = f'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
	442	for page_num in itertools.count(1):
	443	episodes = self._call_api(next_url, collection_slug, note=f'Retrieving podcast page {page_num}')
	444
	445	for episode in traverse_obj(episodes, ('results', lambda _, v: url_or_none(v['share_url']))):
	446	yield self.url_result(episode['share_url'], NebulaClassIE)
	447	next_url = episodes.get('next')
	448	if not next_url:
	449	break
	450
bdc196a4	451	def _real_extract(self, url):
45d82be6	452	collection_slug = self._match_id(url)
	453	channel = self._call_api(
	454	f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
	455	collection_slug, note='Retrieving channel')
	456
	457	if channel.get('type') == 'class':
	458	entries = self._generate_class_entries(channel)
0de09c5b	459	elif channel.get('type') == 'podcast_channel':
0de09c5b	460	entries = self._generate_podcast_entries(channel['id'], collection_slug)
45d82be6	461	else:
45d82be6	462	entries = self._generate_playlist_entries(channel['id'], collection_slug)
bdc196a4	463
359df0fc	464	return self.playlist_result(
45d82be6	465	entries=entries,
	466	playlist_id=collection_slug,
	467	playlist_title=channel.get('title'),
	468	playlist_description=channel.get('description'))