yt_dlp/extractor/radlive.py

   1 import json
   2
   3 from ..utils import ExtractorError, traverse_obj, try_get, unified_timestamp
   4 from .common import InfoExtractor
   5
   6
   7 class RadLiveIE(InfoExtractor):
   8     IE_NAME = 'radlive'
   9     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)'
  10     _TESTS = [{
  11         'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a',
  12         'md5': '6219d5d31d52de87d21c9cf5b7cb27ff',
  13         'info_dict': {
  14             'id': 'dc5acfbc-761b-4bec-9564-df999905116a',
  15             'ext': 'mp4',
  16             'title': 'Deathpact - Digital Mirage 2 [Full Set]',
  17             'language': 'en',
  18             'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png',
  19             'description': '',
  20             'release_timestamp': 1600185600.0,
  21             'channel': 'Proximity',
  22             'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009',
  23             'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009',
  24         }
  25     }, {
  26         'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
  27         'md5': '40b2175f347592125d93e9a344080125',
  28         'info_dict': {
  29             'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
  30             'ext': 'mp4',
  31             'title': 'E01: Bad Jokes 1',
  32             'language': 'en',
  33             'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg',
  34             'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype',
  35             'release_timestamp': None,
  36             'channel': None,
  37             'channel_id': None,
  38             'channel_url': None,
  39             'episode': 'E01: Bad Jokes 1',
  40             'episode_number': 1,
  41             'episode_id': '336',
  42         },
  43     }]
  44
  45     def _real_extract(self, url):
  46         content_type, video_id = self._match_valid_url(url).groups()
  47
  48         webpage = self._download_webpage(url, video_id)
  49
  50         content_info = json.loads(self._search_regex(
  51             r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
  52             webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
  53         video_info = content_info[content_type]
  54
  55         if not video_info:
  56             raise ExtractorError('Unable to extract video info, make sure the URL is valid')
  57
  58         formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
  59         self._sort_formats(formats)
  60
  61         data = video_info.get('structured_data', {})
  62
  63         release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate')))
  64         channel = next(iter(content_info.get('channels', [])), {})
  65         channel_id = channel.get('lrn', '').split(':')[-1] or None
  66
  67         result = {
  68             'id': video_id,
  69             'title': video_info['title'],
  70             'formats': formats,
  71             'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')),
  72             'thumbnail': traverse_obj(data, ('image', 'contentUrl')),
  73             'description': data.get('description'),
  74             'release_timestamp': release_date,
  75             'channel': channel.get('name'),
  76             'channel_id': channel_id,
  77             'channel_url': f'https://rad.live/content/channel/{channel_id}' if channel_id else None,
  78
  79         }
  80         if content_type == 'episode':
  81             result.update({
  82                 # TODO: Get season number when downloading single episode
  83                 'episode': video_info.get('title'),
  84                 'episode_number': video_info.get('number'),
  85                 'episode_id': video_info.get('id'),
  86             })
  87
  88         return result
  89
  90
  91 class RadLiveSeasonIE(RadLiveIE):
  92     IE_NAME = 'radlive:season'
  93     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
  94     _TESTS = [{
  95         'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75',
  96         'md5': '40b2175f347592125d93e9a344080125',
  97         'info_dict': {
  98             'id': '08a290f7-c9ef-4e22-9105-c255995a2e75',
  99             'title': 'Bad Jokes - Season 1',
 100         },
 101         'playlist_mincount': 5,
 102     }]
 103
 104     @classmethod
 105     def suitable(cls, url):
 106         return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url)
 107
 108     def _real_extract(self, url):
 109         season_id = self._match_id(url)
 110         webpage = self._download_webpage(url, season_id)
 111
 112         content_info = json.loads(self._search_regex(
 113             r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
 114             webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
 115         video_info = content_info['season']
 116
 117         entries = [{
 118             '_type': 'url_transparent',
 119             'id': episode['structured_data']['url'].split('/')[-1],
 120             'url': episode['structured_data']['url'],
 121             'series': try_get(content_info, lambda x: x['series']['title']),
 122             'season': video_info['title'],
 123             'season_number': video_info.get('number'),
 124             'season_id': video_info.get('id'),
 125             'ie_key': RadLiveIE.ie_key(),
 126         } for episode in video_info['episodes']]
 127
 128         return self.playlist_result(entries, season_id, video_info.get('title'))
 129
 130
 131 class RadLiveChannelIE(RadLiveIE):
 132     IE_NAME = 'radlive:channel'
 133     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
 134     _TESTS = [{
 135         'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274',
 136         'md5': '625156a08b7f2b0b849f234e664457ac',
 137         'info_dict': {
 138             'id': '5c4d8df4-6fa0-413c-81e3-873479b49274',
 139             'title': 'Whistle Sports',
 140         },
 141         'playlist_mincount': 7,
 142     }]
 143
 144     _QUERY = '''
 145 query WebChannelListing ($lrn: ID!) {
 146   channel (id:$lrn) {
 147     name
 148     features {
 149       structured_data
 150     }
 151   }
 152 }'''
 153
 154     @classmethod
 155     def suitable(cls, url):
 156         return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url)
 157
 158     def _real_extract(self, url):
 159         channel_id = self._match_id(url)
 160
 161         graphql = self._download_json(
 162             'https://content.mhq.12core.net/graphql', channel_id,
 163             headers={'Content-Type': 'application/json'},
 164             data=json.dumps({
 165                 'query': self._QUERY,
 166                 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}
 167             }).encode('utf-8'))
 168
 169         data = traverse_obj(graphql, ('data', 'channel'))
 170         if not data:
 171             raise ExtractorError('Unable to extract video info, make sure the URL is valid')
 172
 173         entries = [{
 174             '_type': 'url_transparent',
 175             'url': feature['structured_data']['url'],
 176             'ie_key': RadLiveIE.ie_key(),
 177         } for feature in data['features']]
 178
 179         return self.playlist_result(entries, channel_id, data.get('name'))