[yt-dlp.git] / yt_dlp / extractor / nebula.py

import itertools
import json

from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
    ExtractorError,
    int_or_none,
    make_archive_id,
    parse_iso8601,
    smuggle_url,
    try_call,
    unsmuggle_url,
    update_url_query,
    url_or_none,
    urljoin,
)
from ..utils.traversal import traverse_obj

_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'


class NebulaBaseIE(InfoExtractor):
    _NETRC_MACHINE = 'watchnebula'
    _token = _api_token = None

    def _perform_login(self, username, password):
        try:
            response = self._download_json(
                'https://nebula.tv/auth/login/', None,
                'Logging in to Nebula', 'Login failed',
                data=json.dumps({'email': username, 'password': password}).encode(),
                headers={'content-type': 'application/json'})
        except ExtractorError as e:
            if isinstance(e.cause, HTTPError) and e.cause.status == 400:
                raise ExtractorError('Login failed: Invalid username or password', expected=True)
            raise
        self._api_token = traverse_obj(response, ('key', {str}))
        if not self._api_token:
            raise ExtractorError('Login failed: No token')

    def _call_api(self, *args, **kwargs):
        if self._token:
            kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
        try:
            return self._download_json(*args, **kwargs)
        except ExtractorError as e:
            if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
                raise
            self.to_screen(
                f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
            self._real_initialize()
            if self._token:
                kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
            return self._download_json(*args, **kwargs)

    def _real_initialize(self):
        if not self._api_token:
            self._api_token = try_call(
                lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
        self._token = self._download_json(
            'https://users.api.nebula.app/api/v1/authorization/', None,
            headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
            note='Authorizing to Nebula', data=b'')['token']

    def _extract_formats(self, content_id, slug):
        for retry in (False, True):
            try:
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
                    slug, 'mp4', query={
                        'token': self._token,
                        'app_version': '23.10.0',
                        'platform': 'ios',
                    })
                return {'formats': fmts, 'subtitles': subs}
            except ExtractorError as e:
                if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                    self.raise_login_required()
                if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
                    self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
                    self._real_initialize()
                    continue
                raise

    def _extract_video_metadata(self, episode):
        channel_url = traverse_obj(
            episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
        return {
            'id': episode['id'].partition(':')[2],
            **traverse_obj(episode, {
                'display_id': 'slug',
                'title': 'title',
                'description': 'description',
                'timestamp': ('published_at', {parse_iso8601}),
                'duration': ('duration', {int_or_none}),
                'channel_id': 'channel_slug',
                'uploader_id': 'channel_slug',
                'channel': 'channel_title',
                'uploader': 'channel_title',
                'series': 'channel_title',
                'creator': 'channel_title',
                'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
                'episode_number': ('order', {int_or_none}),
                # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
                '_old_archive_ids': ('zype_id', {lambda x: [
                    make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
            }),
            'channel_url': channel_url,
            'uploader_url': channel_url,
        }


class NebulaIE(NebulaBaseIE):
    _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
    _TESTS = [{
        'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
        'info_dict': {
            'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
            'ext': 'mp4',
            'title': 'That Time Disney Remade Beauty and the Beast',
            'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
            'upload_date': '20180731',
            'timestamp': 1533009600,
            'channel': 'Lindsay Ellis',
            'channel_id': 'lindsayellis',
            'uploader': 'Lindsay Ellis',
            'uploader_id': 'lindsayellis',
            'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
            'series': 'Lindsay Ellis',
            'display_id': 'that-time-disney-remade-beauty-and-the-beast',
            'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
            'creator': 'Lindsay Ellis',
            'duration': 2212,
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
        'md5': 'd05739cf6c38c09322422f696b569c23',
        'info_dict': {
            'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
            'ext': 'mp4',
            'title': 'Landing Craft - How The Allies Got Ashore',
            'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
            'upload_date': '20200327',
            'timestamp': 1585348140,
            'channel': 'Real Engineering — The Logistics of D-Day',
            'channel_id': 'd-day',
            'uploader': 'Real Engineering — The Logistics of D-Day',
            'uploader_id': 'd-day',
            'series': 'Real Engineering — The Logistics of D-Day',
            'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
            'creator': 'Real Engineering — The Logistics of D-Day',
            'duration': 841,
            'channel_url': 'https://nebula.tv/d-day',
            'uploader_url': 'https://nebula.tv/d-day',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
        'md5': 'ebe28a7ad822b9ee172387d860487868',
        'info_dict': {
            'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
            'ext': 'mp4',
            'title': 'Episode 1: The Draw',
            'description': r'contains:There’s free money on offer… if the players can all work together.',
            'upload_date': '20200323',
            'timestamp': 1584980400,
            'channel': 'Tom Scott Presents: Money',
            'channel_id': 'tom-scott-presents-money',
            'uploader': 'Tom Scott Presents: Money',
            'uploader_id': 'tom-scott-presents-money',
            'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
            'duration': 825,
            'channel_url': 'https://nebula.tv/tom-scott-presents-money',
            'series': 'Tom Scott Presents: Money',
            'display_id': 'money-episode-1-the-draw',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            'creator': 'Tom Scott Presents: Money',
            '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
        'only_matching': True,
    }, {
        'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
        'info_dict': {
            'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
            'ext': 'mp4',
            'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
            'title': 'Did the US Really Blow Up the NordStream Pipelines?',
            'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
            'upload_date': '20230223',
            'timestamp': 1677144070,
            'channel': 'TLDR News EU',
            'channel_id': 'tldrnewseu',
            'uploader': 'TLDR News EU',
            'uploader_id': 'tldrnewseu',
            'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
            'duration': 524,
            'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
            'series': 'TLDR News EU',
            'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
            'creator': 'TLDR News EU',
            '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
        },
        'params': {'skip_download': 'm3u8'},
    }, {
        'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        slug = self._match_id(url)
        url, smuggled_data = unsmuggle_url(url, {})
        if smuggled_data.get('id'):
            return {
                'id': smuggled_data['id'],
                'display_id': slug,
                'title': '',
                **self._extract_formats(smuggled_data['id'], slug),
            }

        metadata = self._call_api(
            f'https://content.api.nebula.app/content/videos/{slug}',
            slug, note='Fetching video metadata')
        return {
            **self._extract_video_metadata(metadata),
            **self._extract_formats(metadata['id'], slug),
        }


class NebulaClassIE(NebulaBaseIE):
    IE_NAME = 'nebula:class'
    _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
    _TESTS = [{
        'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
        'info_dict': {
            'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
            'ext': 'mp4',
            'display_id': '14',
            'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
            'episode_number': 14,
            'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
            'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
            'duration': 646,
            'episode': 'Episode 14',
            'title': 'Photos, Sculpture, and Video',
        },
        'params': {'skip_download': 'm3u8'},
    }]

    def _real_extract(self, url):
        slug, episode = self._match_valid_url(url).group('id', 'ep')
        url, smuggled_data = unsmuggle_url(url, {})
        if smuggled_data.get('id'):
            return {
                'id': smuggled_data['id'],
                'display_id': slug,
                'title': '',
                **self._extract_formats(smuggled_data['id'], slug),
            }

        metadata = self._call_api(
            f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
            slug, note='Fetching video metadata')
        return {
            **self._extract_video_metadata(metadata),
            **self._extract_formats(metadata['id'], slug),
        }


class NebulaSubscriptionsIE(NebulaBaseIE):
    IE_NAME = 'nebula:subscriptions'
    _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
    _TESTS = [{
        'url': 'https://nebula.tv/myshows',
        'playlist_mincount': 1,
        'info_dict': {
            'id': 'myshows',
        },
    }]

    def _generate_playlist_entries(self):
        next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
            'following': 'true',
            'include': 'engagement',
            'ordering': '-published_at',
        })
        for page_num in itertools.count(1):
            channel = self._call_api(
                next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
            for episode in channel['results']:
                metadata = self._extract_video_metadata(episode)
                yield self.url_result(smuggle_url(
                    f'https://nebula.tv/videos/{metadata["display_id"]}',
                    {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
            next_url = channel.get('next')
            if not next_url:
                return

    def _real_extract(self, url):
        return self.playlist_result(self._generate_playlist_entries(), 'myshows')


class NebulaChannelIE(NebulaBaseIE):
    IE_NAME = 'nebula:channel'
    _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
    _TESTS = [{
        'url': 'https://nebula.tv/tom-scott-presents-money',
        'info_dict': {
            'id': 'tom-scott-presents-money',
            'title': 'Tom Scott Presents: Money',
            'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
        },
        'playlist_count': 5,
    }, {
        'url': 'https://nebula.tv/lindsayellis',
        'info_dict': {
            'id': 'lindsayellis',
            'title': 'Lindsay Ellis',
            'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'https://nebula.tv/johnnyharris',
        'info_dict': {
            'id': 'johnnyharris',
            'title': 'Johnny Harris',
            'description': 'I make videos about maps and many other things.',
        },
        'playlist_mincount': 90,
    }, {
        'url': 'https://nebula.tv/copyright-for-fun-and-profit',
        'info_dict': {
            'id': 'copyright-for-fun-and-profit',
            'title': 'Copyright for Fun and Profit',
            'description': 'md5:6690248223eed044a9f11cd5a24f9742',
        },
        'playlist_count': 23,
    }]

    def _generate_playlist_entries(self, collection_id, collection_slug):
        next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
        for page_num in itertools.count(1):
            episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
            for episode in episodes['results']:
                metadata = self._extract_video_metadata(episode)
                yield self.url_result(smuggle_url(
                    episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
                    {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
            next_url = episodes.get('next')
            if not next_url:
                break

    def _generate_class_entries(self, channel):
        for lesson in channel['lessons']:
            metadata = self._extract_video_metadata(lesson)
            yield self.url_result(smuggle_url(
                lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
                {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)

    def _real_extract(self, url):
        collection_slug = self._match_id(url)
        channel = self._call_api(
            f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
            collection_slug, note='Retrieving channel')

        if channel.get('type') == 'class':
            entries = self._generate_class_entries(channel)
        else:
            entries = self._generate_playlist_entries(channel['id'], collection_slug)

        return self.playlist_result(
            entries=entries,
            playlist_id=collection_slug,
            playlist_title=channel.get('title'),
            playlist_description=channel.get('description'))
Commit	Line	Data
359df0fc	1	import itertools
bdc196a4	2	import json
bdc196a4	3
359df0fc	4	from .common import InfoExtractor
3d2623a8	5	from ..networking.exceptions import HTTPError
45d82be6	6	from ..utils import (
	7	ExtractorError,
	8	int_or_none,
	9	make_archive_id,
	10	parse_iso8601,
	11	smuggle_url,
	12	try_call,
	13	unsmuggle_url,
	14	update_url_query,
	15	url_or_none,
	16	urljoin,
	17	)
	18	from ..utils.traversal import traverse_obj
359df0fc	19
cbfe2e5c	20	_BASE_URL_RE = r'https?://(?:www\.\|beta\.)?(?:watchnebula\.com\|nebula\.app\|nebula\.tv)'
4cca2eb1	21
359df0fc HH	22
	23	class NebulaBaseIE(InfoExtractor):
	24	_NETRC_MACHINE = 'watchnebula'
45d82be6	25	_token = _api_token = None
359df0fc	26
45d82be6	27	def _perform_login(self, username, password):
	28	try:
	29	response = self._download_json(
	30	'https://nebula.tv/auth/login/', None,
	31	'Logging in to Nebula', 'Login failed',
	32	data=json.dumps({'email': username, 'password': password}).encode(),
	33	headers={'content-type': 'application/json'})
	34	except ExtractorError as e:
	35	if isinstance(e.cause, HTTPError) and e.cause.status == 400:
	36	raise ExtractorError('Login failed: Invalid username or password', expected=True)
	37	raise
	38	self._api_token = traverse_obj(response, ('key', {str}))
	39	if not self._api_token:
	40	raise ExtractorError('Login failed: No token')
359df0fc	41
45d82be6	42	def _call_api(self, args, *kwargs):
	43	if self._token:
	44	kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
359df0fc	45	try:
45d82be6	46	return self._download_json(args, *kwargs)
	47	except ExtractorError as e:
	48	if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
359df0fc	49	raise
45d82be6	50	self.to_screen(
	51	f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
	52	self._real_initialize()
	53	if self._token:
	54	kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
	55	return self._download_json(args, *kwargs)
359df0fc	56
45d82be6	57	def _real_initialize(self):
	58	if not self._api_token:
	59	self._api_token = try_call(
	60	lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
	61	self._token = self._download_json(
	62	'https://users.api.nebula.app/api/v1/authorization/', None,
	63	headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
	64	note='Authorizing to Nebula', data=b'')['token']
bdc196a4	65
45d82be6	66	def _extract_formats(self, content_id, slug):
	67	for retry in (False, True):
	68	try:
	69	fmts, subs = self._extract_m3u8_formats_and_subtitles(
	70	f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
	71	slug, 'mp4', query={
	72	'token': self._token,
	73	'app_version': '23.10.0',
	74	'platform': 'ios',
	75	})
	76	return {'formats': fmts, 'subtitles': subs}
	77	except ExtractorError as e:
	78	if isinstance(e.cause, HTTPError) and e.cause.status == 401:
	79	self.raise_login_required()
	80	if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
	81	self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
	82	self._real_initialize()
	83	continue
	84	raise
359df0fc	85
45d82be6	86	def _extract_video_metadata(self, episode):
	87	channel_url = traverse_obj(
	88	episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
359df0fc	89	return {
45d82be6	90	'id': episode['id'].partition(':')[2],
	91	**traverse_obj(episode, {
	92	'display_id': 'slug',
	93	'title': 'title',
	94	'description': 'description',
	95	'timestamp': ('published_at', {parse_iso8601}),
	96	'duration': ('duration', {int_or_none}),
	97	'channel_id': 'channel_slug',
	98	'uploader_id': 'channel_slug',
	99	'channel': 'channel_title',
	100	'uploader': 'channel_title',
	101	'series': 'channel_title',
	102	'creator': 'channel_title',
	103	'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
	104	'episode_number': ('order', {int_or_none}),
	105	# Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
	106	'_old_archive_ids': ('zype_id', {lambda x: [
	107	make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
	108	}),
	109	'channel_url': channel_url,
	110	'uploader_url': channel_url,
359df0fc HH	111	}
359df0fc HH	112
359df0fc HH	113
359df0fc HH	114	class NebulaIE(NebulaBaseIE):
4cca2eb1	115	_VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
45d82be6	116	_TESTS = [{
	117	'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
	118	'info_dict': {
	119	'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
	120	'ext': 'mp4',
	121	'title': 'That Time Disney Remade Beauty and the Beast',
	122	'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
	123	'upload_date': '20180731',
	124	'timestamp': 1533009600,
	125	'channel': 'Lindsay Ellis',
	126	'channel_id': 'lindsayellis',
	127	'uploader': 'Lindsay Ellis',
	128	'uploader_id': 'lindsayellis',
	129	'uploader_url': r're:https://nebula\.(tv\|app)/lindsayellis',
	130	'series': 'Lindsay Ellis',
	131	'display_id': 'that-time-disney-remade-beauty-and-the-beast',
	132	'channel_url': r're:https://nebula\.(tv\|app)/lindsayellis',
	133	'creator': 'Lindsay Ellis',
	134	'duration': 2212,
	135	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	136	'_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
bdc196a4	137	},
45d82be6	138	'params': {'skip_download': 'm3u8'},
	139	}, {
	140	'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
	141	'md5': 'd05739cf6c38c09322422f696b569c23',
	142	'info_dict': {
	143	'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
	144	'ext': 'mp4',
	145	'title': 'Landing Craft - How The Allies Got Ashore',
	146	'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
	147	'upload_date': '20200327',
	148	'timestamp': 1585348140,
	149	'channel': 'Real Engineering — The Logistics of D-Day',
	150	'channel_id': 'd-day',
	151	'uploader': 'Real Engineering — The Logistics of D-Day',
	152	'uploader_id': 'd-day',
	153	'series': 'Real Engineering — The Logistics of D-Day',
	154	'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
	155	'creator': 'Real Engineering — The Logistics of D-Day',
	156	'duration': 841,
	157	'channel_url': 'https://nebula.tv/d-day',
	158	'uploader_url': 'https://nebula.tv/d-day',
	159	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	160	'_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
bdc196a4	161	},
45d82be6	162	'params': {'skip_download': 'm3u8'},
	163	}, {
	164	'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
	165	'md5': 'ebe28a7ad822b9ee172387d860487868',
	166	'info_dict': {
	167	'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
	168	'ext': 'mp4',
	169	'title': 'Episode 1: The Draw',
	170	'description': r'contains:There’s free money on offer… if the players can all work together.',
	171	'upload_date': '20200323',
	172	'timestamp': 1584980400,
	173	'channel': 'Tom Scott Presents: Money',
	174	'channel_id': 'tom-scott-presents-money',
	175	'uploader': 'Tom Scott Presents: Money',
	176	'uploader_id': 'tom-scott-presents-money',
	177	'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
	178	'duration': 825,
	179	'channel_url': 'https://nebula.tv/tom-scott-presents-money',
	180	'series': 'Tom Scott Presents: Money',
	181	'display_id': 'money-episode-1-the-draw',
	182	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	183	'creator': 'Tom Scott Presents: Money',
	184	'_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
bdc196a4	185	},
45d82be6	186	'params': {'skip_download': 'm3u8'},
	187	}, {
	188	'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
	189	'only_matching': True,
	190	}, {
	191	'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
	192	'info_dict': {
	193	'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
	194	'ext': 'mp4',
	195	'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
	196	'title': 'Did the US Really Blow Up the NordStream Pipelines?',
	197	'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
	198	'upload_date': '20230223',
	199	'timestamp': 1677144070,
	200	'channel': 'TLDR News EU',
	201	'channel_id': 'tldrnewseu',
	202	'uploader': 'TLDR News EU',
	203	'uploader_id': 'tldrnewseu',
	204	'uploader_url': r're:https://nebula\.(tv\|app)/tldrnewseu',
	205	'duration': 524,
	206	'channel_url': r're:https://nebula\.(tv\|app)/tldrnewseu',
	207	'series': 'TLDR News EU',
	208	'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
	209	'creator': 'TLDR News EU',
	210	'_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
cbfe2e5c	211	},
45d82be6	212	'params': {'skip_download': 'm3u8'},
	213	}, {
	214	'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
	215	'only_matching': True,
	216	}]
bdc196a4	217
359df0fc HH	218	def _real_extract(self, url):
359df0fc HH	219	slug = self._match_id(url)
45d82be6	220	url, smuggled_data = unsmuggle_url(url, {})
	221	if smuggled_data.get('id'):
	222	return {
	223	'id': smuggled_data['id'],
	224	'display_id': slug,
	225	'title': '',
	226	**self._extract_formats(smuggled_data['id'], slug),
	227	}
	228
	229	metadata = self._call_api(
	230	f'https://content.api.nebula.app/content/videos/{slug}',
	231	slug, note='Fetching video metadata')
	232	return {
	233	**self._extract_video_metadata(metadata),
	234	**self._extract_formats(metadata['id'], slug),
	235	}
	236
	237
	238	class NebulaClassIE(NebulaBaseIE):
	239	IE_NAME = 'nebula:class'
	240	_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
	241	_TESTS = [{
	242	'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
	243	'info_dict': {
	244	'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
	245	'ext': 'mp4',
	246	'display_id': '14',
	247	'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
	248	'episode_number': 14,
	249	'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
	250	'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
	251	'duration': 646,
	252	'episode': 'Episode 14',
	253	'title': 'Photos, Sculpture, and Video',
	254	},
	255	'params': {'skip_download': 'm3u8'},
	256	}]
	257
	258	def _real_extract(self, url):
	259	slug, episode = self._match_valid_url(url).group('id', 'ep')
	260	url, smuggled_data = unsmuggle_url(url, {})
	261	if smuggled_data.get('id'):
	262	return {
	263	'id': smuggled_data['id'],
	264	'display_id': slug,
	265	'title': '',
	266	**self._extract_formats(smuggled_data['id'], slug),
	267	}
	268
	269	metadata = self._call_api(
	270	f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
	271	slug, note='Fetching video metadata')
	272	return {
	273	**self._extract_video_metadata(metadata),
	274	**self._extract_formats(metadata['id'], slug),
	275	}
bdc196a4	276
bdc196a4	277
f3b3fe16 HH	278	class NebulaSubscriptionsIE(NebulaBaseIE):
f3b3fe16 HH	279	IE_NAME = 'nebula:subscriptions'
45d82be6	280	_VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows\|library/latest-videos)'
	281	_TESTS = [{
	282	'url': 'https://nebula.tv/myshows',
	283	'playlist_mincount': 1,
	284	'info_dict': {
	285	'id': 'myshows',
f3b3fe16	286	},
45d82be6	287	}]
f3b3fe16 HH	288
f3b3fe16 HH	289	def _generate_playlist_entries(self):
45d82be6	290	next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
	291	'following': 'true',
	292	'include': 'engagement',
	293	'ordering': '-published_at',
	294	})
	295	for page_num in itertools.count(1):
	296	channel = self._call_api(
	297	next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
f3b3fe16	298	for episode in channel['results']:
45d82be6	299	metadata = self._extract_video_metadata(episode)
	300	yield self.url_result(smuggle_url(
	301	f'https://nebula.tv/videos/{metadata["display_id"]}',
	302	{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
	303	next_url = channel.get('next')
	304	if not next_url:
	305	return
f3b3fe16 HH	306
	307	def _real_extract(self, url):
	308	return self.playlist_result(self._generate_playlist_entries(), 'myshows')
	309
	310
	311	class NebulaChannelIE(NebulaBaseIE):
	312	IE_NAME = 'nebula:channel'
45d82be6	313	_VALID_URL = rf'{_BASE_URL_RE}/(?!myshows\|library\|videos/)(?P<id>[-\w]+)/?(?:$\|[?#])'
	314	_TESTS = [{
	315	'url': 'https://nebula.tv/tom-scott-presents-money',
	316	'info_dict': {
	317	'id': 'tom-scott-presents-money',
	318	'title': 'Tom Scott Presents: Money',
	319	'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
	320	},
	321	'playlist_count': 5,
	322	}, {
	323	'url': 'https://nebula.tv/lindsayellis',
	324	'info_dict': {
	325	'id': 'lindsayellis',
	326	'title': 'Lindsay Ellis',
	327	'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
	328	},
	329	'playlist_mincount': 2,
	330	}, {
	331	'url': 'https://nebula.tv/johnnyharris',
	332	'info_dict': {
	333	'id': 'johnnyharris',
	334	'title': 'Johnny Harris',
	335	'description': 'I make videos about maps and many other things.',
359df0fc	336	},
45d82be6	337	'playlist_mincount': 90,
	338	}, {
	339	'url': 'https://nebula.tv/copyright-for-fun-and-profit',
	340	'info_dict': {
	341	'id': 'copyright-for-fun-and-profit',
	342	'title': 'Copyright for Fun and Profit',
	343	'description': 'md5:6690248223eed044a9f11cd5a24f9742',
	344	},
	345	'playlist_count': 23,
	346	}]
bdc196a4	347
45d82be6	348	def _generate_playlist_entries(self, collection_id, collection_slug):
	349	next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
	350	for page_num in itertools.count(1):
	351	episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
	352	for episode in episodes['results']:
	353	metadata = self._extract_video_metadata(episode)
	354	yield self.url_result(smuggle_url(
	355	episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
	356	{'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
	357	next_url = episodes.get('next')
359df0fc HH	358	if not next_url:
359df0fc HH	359	break
45d82be6	360
	361	def _generate_class_entries(self, channel):
	362	for lesson in channel['lessons']:
	363	metadata = self._extract_video_metadata(lesson)
	364	yield self.url_result(smuggle_url(
	365	lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
	366	{'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
bdc196a4 GS	367
bdc196a4 GS	368	def _real_extract(self, url):
45d82be6	369	collection_slug = self._match_id(url)
	370	channel = self._call_api(
	371	f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
	372	collection_slug, note='Retrieving channel')
	373
	374	if channel.get('type') == 'class':
	375	entries = self._generate_class_entries(channel)
	376	else:
	377	entries = self._generate_playlist_entries(channel['id'], collection_slug)
bdc196a4	378
359df0fc	379	return self.playlist_result(
45d82be6	380	entries=entries,
	381	playlist_id=collection_slug,
	382	playlist_title=channel.get('title'),
	383	playlist_description=channel.get('description'))