yt_dlp/extractor/fox.py

   1 import json
   2 import uuid
   3
   4 from .common import InfoExtractor
   5 from ..compat import (
   6     compat_str,
   7     compat_urllib_parse_unquote,
   8 )
   9 from ..networking.exceptions import HTTPError
  10 from ..utils import (
  11     ExtractorError,
  12     int_or_none,
  13     parse_age_limit,
  14     parse_duration,
  15     traverse_obj,
  16     try_get,
  17     unified_timestamp,
  18     url_or_none,
  19 )
  20
  21
  22 class FOXIE(InfoExtractor):
  23     _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P<id>[\da-fA-F]+)'
  24     _TESTS = [{
  25         # clip
  26         'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
  27         'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
  28         'info_dict': {
  29             'id': '4b765a60490325103ea69888fb2bd4e8',
  30             'ext': 'mp4',
  31             'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
  32             'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
  33             'duration': 102,
  34             'timestamp': 1504291893,
  35             'upload_date': '20170901',
  36             'creator': 'FOX',
  37             'series': 'Gotham',
  38             'age_limit': 14,
  39             'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
  40             'thumbnail': r're:^https?://.*\.jpg$',
  41         },
  42         'params': {
  43             'skip_download': True,
  44         },
  45     }, {
  46         # episode, geo-restricted
  47         'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
  48         'only_matching': True,
  49     }, {
  50         # sports event, geo-restricted
  51         'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
  52         'only_matching': True,
  53     }, {
  54         # fox sports replay, geo-restricted
  55         'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89',
  56         'only_matching': True,
  57     }]
  58     _GEO_BYPASS = False
  59     _HOME_PAGE_URL = 'https://www.fox.com/'
  60     _API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9'
  61     _access_token = None
  62     _device_id = compat_str(uuid.uuid4())
  63
  64     def _call_api(self, path, video_id, data=None):
  65         headers = {
  66             'X-Api-Key': self._API_KEY,
  67         }
  68         if self._access_token:
  69             headers['Authorization'] = 'Bearer ' + self._access_token
  70         try:
  71             return self._download_json(
  72                 'https://api3.fox.com/v2.0/' + path,
  73                 video_id, data=data, headers=headers)
  74         except ExtractorError as e:
  75             if isinstance(e.cause, HTTPError) and e.cause.status == 403:
  76                 entitlement_issues = self._parse_json(
  77                     e.cause.response.read().decode(), video_id)['entitlementIssues']
  78                 for e in entitlement_issues:
  79                     if e.get('errorCode') == 1005:
  80                         raise ExtractorError(
  81                             'This video is only available via cable service provider '
  82                             'subscription. You may want to use --cookies.', expected=True)
  83                 messages = ', '.join([e['message'] for e in entitlement_issues])
  84                 raise ExtractorError(messages, expected=True)
  85             raise
  86
  87     def _real_initialize(self):
  88         if not self._access_token:
  89             mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
  90             if mvpd_auth:
  91                 self._access_token = (self._parse_json(compat_urllib_parse_unquote(
  92                     mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
  93             if not self._access_token:
  94                 self._access_token = self._call_api(
  95                     'login', None, json.dumps({
  96                         'deviceId': self._device_id,
  97                     }).encode())['accessToken']
  98
  99     def _real_extract(self, url):
 100         video_id = self._match_id(url)
 101
 102         self._access_token = self._call_api(
 103             'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id,
 104             video_id)['accessToken']
 105
 106         video = self._call_api('watch', video_id, data=json.dumps({
 107             'capabilities': ['drm/widevine', 'fsdk/yo'],
 108             'deviceWidth': 1280,
 109             'deviceHeight': 720,
 110             'maxRes': '720p',
 111             'os': 'macos',
 112             'osv': '',
 113             'provider': {
 114                 'freewheel': {'did': self._device_id},
 115                 'vdms': {'rays': ''},
 116                 'dmp': {'kuid': '', 'seg': ''}
 117             },
 118             'playlist': '',
 119             'privacy': {'us': '1---'},
 120             'siteSection': '',
 121             'streamType': 'vod',
 122             'streamId': video_id}).encode('utf-8'))
 123
 124         title = video['name']
 125         release_url = video['url']
 126
 127         try:
 128             m3u8_url = self._download_json(release_url, video_id)['playURL']
 129         except ExtractorError as e:
 130             if isinstance(e.cause, HTTPError) and e.cause.status == 403:
 131                 error = self._parse_json(e.cause.response.read().decode(), video_id)
 132                 if error.get('exception') == 'GeoLocationBlocked':
 133                     self.raise_geo_restricted(countries=['US'])
 134                 raise ExtractorError(error['description'], expected=True)
 135             raise
 136         formats = self._extract_m3u8_formats(
 137             m3u8_url, video_id, 'mp4',
 138             entry_protocol='m3u8_native', m3u8_id='hls')
 139
 140         data = try_get(
 141             video, lambda x: x['trackingData']['properties'], dict) or {}
 142
 143         duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
 144             video.get('duration')) or parse_duration(video.get('duration'))
 145         timestamp = unified_timestamp(video.get('datePublished'))
 146         creator = data.get('brand') or data.get('network') or video.get('network')
 147         series = video.get('seriesName') or data.get(
 148             'seriesName') or data.get('show')
 149
 150         subtitles = {}
 151         for doc_rel in video.get('documentReleases', []):
 152             rel_url = doc_rel.get('url')
 153             if not url or doc_rel.get('format') != 'SCC':
 154                 continue
 155             subtitles['en'] = [{
 156                 'url': rel_url,
 157                 'ext': 'scc',
 158             }]
 159             break
 160
 161         return {
 162             'id': video_id,
 163             'title': title,
 164             'formats': formats,
 165             'description': video.get('description'),
 166             'duration': duration,
 167             'timestamp': timestamp,
 168             'age_limit': parse_age_limit(video.get('contentRating')),
 169             'creator': creator,
 170             'series': series,
 171             'season_number': int_or_none(video.get('seasonNumber')),
 172             'episode': video.get('name'),
 173             'episode_number': int_or_none(video.get('episodeNumber')),
 174             'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
 175             'release_year': int_or_none(video.get('releaseYear')),
 176             'subtitles': subtitles,
 177         }