yt_dlp/extractor/teamcoco.py

   1 import json
   2 import re
   3
   4 from .turner import TurnerBaseIE
   5 from ..utils import (
   6     ExtractorError,
   7     clean_html,
   8     determine_ext,
   9     make_archive_id,
  10     merge_dicts,
  11     mimetype2ext,
  12     parse_duration,
  13     parse_qs,
  14     traverse_obj,
  15     unified_timestamp,
  16     url_or_none,
  17     urljoin,
  18 )
  19
  20
  21 class TeamcocoBaseIE(TurnerBaseIE):
  22     _QUALITIES = {
  23         'low': (480, 272),
  24         'sd': (640, 360),
  25         'hd': (1280, 720),
  26         'uhd': (1920, 1080),
  27     }
  28
  29     def _get_formats_and_subtitles(self, info, video_id):
  30         formats, subtitles = [], {}
  31
  32         for src in traverse_obj(info, ('src', ..., {dict})):
  33             format_id = src.get('label')
  34             src_url = src.get('src')
  35             if re.match(r'https?:/[^/]', src_url):
  36                 src_url = src_url.replace(':/', '://', 1)
  37             ext = determine_ext(src_url, mimetype2ext(src.get('type')))
  38
  39             if not format_id or not src_url:
  40                 continue
  41             elif format_id == 'hls' or ext == 'm3u8':
  42                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
  43                     src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
  44                 formats.extend(fmts)
  45                 self._merge_subtitles(subs, target=subtitles)
  46
  47             elif format_id in self._QUALITIES:
  48                 if src_url.startswith('/mp4:protected/'):
  49                     # TODO: Correct extraction for these files
  50                     continue
  51                 formats.append({
  52                     'url': src_url,
  53                     'ext': ext,
  54                     'format_id': format_id,
  55                     'width': self._QUALITIES[format_id][0],
  56                     'height': self._QUALITIES[format_id][1],
  57                 })
  58
  59         return formats, subtitles
  60
  61
  62 class TeamcocoIE(TeamcocoBaseIE):
  63     _VALID_URL = r'https?://(?:www\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
  64     _TESTS = [
  65         {
  66             'url': 'http://teamcoco.com/video/mary-kay-remote',
  67             'info_dict': {
  68                 'id': '80187',
  69                 'display_id': 'video_mary-kay-remote',
  70                 'ext': 'mp4',
  71                 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
  72                 'description': 'md5:9fb64e45b5aef6b2af1b67612b36c162',
  73                 'thumbnail': 'https://teamcoco.com/image/thumb?id=80187',
  74                 'upload_date': '20140402',
  75                 'timestamp': 1396440000,
  76             },
  77             'params': {
  78                 'skip_download': 'm3u8',
  79             },
  80         }, {
  81             'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
  82             'info_dict': {
  83                 'id': '19705',
  84                 'display_id': 'video_louis-ck-interview-george-w-bush',
  85                 'ext': 'mp4',
  86                 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
  87                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
  88                 'thumbnail': 'https://teamcoco.com/image/thumb?id=19705',
  89                 'upload_date': '20111104',
  90                 'timestamp': 1320408000,
  91             },
  92             'params': {
  93                 'skip_download': 'm3u8',
  94             },
  95         }, {
  96             'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
  97             'info_dict': {
  98                 'id': '88748',
  99                 'display_id': 'video_timothy-olyphant-drinking-whiskey',
 100                 'ext': 'mp4',
 101                 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
 102                 'description': 'md5:15501f23f020e793aeca761205e42c24',
 103                 'upload_date': '20150415',
 104                 'timestamp': 1429099200,
 105                 'thumbnail': 'https://teamcoco.com/image/thumb?id=88748',
 106             },
 107         }, {
 108             'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
 109             'info_dict': {
 110                 'id': '89341',
 111                 'ext': 'mp4',
 112                 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
 113                 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
 114             },
 115             'skip': 'This video is no longer available.',
 116         }, {
 117             'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
 118             'only_matching': True,
 119         }, {
 120             'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
 121             'only_matching': True,
 122         }, {
 123             'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
 124             'only_matching': True,
 125         }, {
 126             'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
 127             'only_matching': True,
 128         },
 129     ]
 130
 131     def _real_extract(self, url):
 132         display_id = self._match_id(url).replace('/', '_')
 133         webpage = self._download_webpage(url, display_id)
 134         data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
 135         info = merge_dicts(*traverse_obj(data, (
 136             'blocks', lambda _, v: v['name'] in ('meta-tags', 'video-player', 'video-info'), 'props', {dict})))
 137
 138         thumbnail = traverse_obj(
 139             info, (('image', 'poster'), {lambda x: urljoin('https://teamcoco.com/', x)}), get_all=False)
 140         video_id = traverse_obj(parse_qs(thumbnail), ('id', 0)) or display_id
 141
 142         formats, subtitles = self._get_formats_and_subtitles(info, video_id)
 143
 144         return {
 145             'id': video_id,
 146             'display_id': display_id,
 147             'formats': formats,
 148             'subtitles': subtitles,
 149             'thumbnail': thumbnail,
 150             **traverse_obj(info, {
 151                 'title': 'title',
 152                 'description': (('descriptionHtml', 'description'), {clean_html}),
 153                 'timestamp': ('publishedOn', {lambda x: f'{x} 12:00AM'}, {unified_timestamp}),
 154             }, get_all=False),
 155         }
 156
 157
 158 class ConanClassicIE(TeamcocoBaseIE):
 159     _VALID_URL = r'https?://(?:(?:www\.)?conanclassic|conan25\.teamcoco)\.com/(?P<id>([^/]+/)*[^/?#]+)'
 160     _TESTS = [{
 161         'url': 'https://conanclassic.com/video/ice-cube-kevin-hart-conan-share-lyft',
 162         'info_dict': {
 163             'id': '74709',
 164             'ext': 'mp4',
 165             'title': 'Ice Cube, Kevin Hart, & Conan Share A Lyft Car',
 166             'display_id': 'video/ice-cube-kevin-hart-conan-share-lyft',
 167             'description': 'The stars of "Ride Along" teach Conan how to roll around Hollywood.',
 168             'thumbnail': 'http://cdn.teamcococdn.com/image/640x360/lyft-5bd75f82b616c.png',
 169             'duration': 570.0,
 170             'upload_date': '20131211',
 171             'timestamp': 1386721620,
 172             '_old_archive_ids': ['teamcoco 74709'],
 173         },
 174         'params': {'skip_download': 'm3u8'},
 175     }, {
 176         'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
 177         'only_matching': True,
 178     }]
 179
 180     _GRAPHQL_QUERY = '''query find($id: ID!) {
 181   findRecord(id: $id) {
 182
 183 ... on MetaInterface {
 184   id
 185   title
 186   teaser
 187   publishOn
 188   slug
 189   thumb {
 190
 191 ... on FileInterface {
 192   id
 193   path
 194   preview
 195   mime
 196 }
 197
 198   }
 199 }
 200
 201 ... on Video {
 202   videoType
 203   duration
 204   isLive
 205   youtubeId
 206   turnerMediaId
 207   turnerMediaAuthToken
 208   airDate
 209 }
 210
 211 ... on Episode {
 212   airDate
 213   seasonNumber
 214   episodeNumber
 215   guestNames
 216 }
 217
 218   }
 219   findRecordVideoMetadata(id: $id) {
 220     turnerMediaId
 221     turnerMediaAuthToken
 222     duration
 223     src
 224   }
 225 }'''
 226
 227     def _real_extract(self, url):
 228         display_id = self._match_id(url)
 229         webpage = self._download_webpage(url, display_id)
 230         data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['pageData']
 231         video_id = traverse_obj(
 232             data, ('blocks', ..., 'props', 'fieldDefs', lambda _, v: v['name'] == 'incomingVideoId', 'value'),
 233             ('blocks', ..., 'props', 'fields', 'incomingVideoRecord', 'id'), get_all=False)
 234         if not video_id:
 235             self.raise_no_formats('Unable to extract video ID from webpage', expected=True)
 236
 237         response = self._download_json(
 238             'https://conanclassic.com/api/legacy/graphql', video_id, data=json.dumps({
 239                 'query': self._GRAPHQL_QUERY,
 240                 'variables': {'id': video_id},
 241             }, separators=(',', ':')).encode(), headers={
 242                 'Content-Type': 'application/json',
 243             })
 244
 245         info = traverse_obj(response, ('data', 'findRecord', {
 246             'title': 'title',
 247             'description': 'teaser',
 248             'thumbnail': ('thumb', 'preview', {url_or_none}),
 249             'duration': ('duration', {parse_duration}),
 250             'timestamp': ('publishOn', {unified_timestamp}),
 251         }))
 252
 253         media_id = traverse_obj(
 254             response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaId'), get_all=False)
 255         if media_id:
 256             token = traverse_obj(
 257                 response, ('data', ('findRecord', 'findRecordVideoMetadata'), 'turnerMediaAuthToken'), get_all=False)
 258             if not token:
 259                 raise ExtractorError('No Turner Media auth token found in API response')
 260             self._initialize_geo_bypass({
 261                 'countries': ['US'],
 262             })
 263             info.update(self._extract_ngtv_info(media_id, {
 264                 'accessToken': token,
 265                 'accessTokenType': 'jws',
 266             }))
 267         else:
 268             formats, subtitles = self._get_formats_and_subtitles(
 269                 traverse_obj(response, ('data', 'findRecordVideoMetadata')), video_id)
 270             info.update({
 271                 'formats': formats,
 272                 'subtitles': subtitles,
 273             })
 274
 275         return {
 276             'id': video_id,
 277             'display_id': display_id,
 278             '_old_archive_ids': [make_archive_id('Teamcoco', video_id)],
 279             **info,
 280         }