[yt-dlp.git] / yt_dlp / extractor / fox.py

import json
import uuid

from .common import InfoExtractor
from ..compat import (
    compat_HTTPError,
    compat_str,
    compat_urllib_parse_unquote,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    parse_age_limit,
    parse_duration,
    traverse_obj,
    try_get,
    unified_timestamp,
    url_or_none,
)


class FOXIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
    _TESTS = [{
        # clip
        'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
        'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
        'info_dict': {
            'id': '4b765a60490325103ea69888fb2bd4e8',
            'ext': 'mp4',
            'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
            'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
            'duration': 102,
            'timestamp': 1504291893,
            'upload_date': '20170901',
            'creator': 'FOX',
            'series': 'Gotham',
            'age_limit': 14,
            'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # episode, geo-restricted
        'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
        'only_matching': True,
    }, {
        # sports event, geo-restricted
        'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
        'only_matching': True,
    }]
    _GEO_BYPASS = False
    _HOME_PAGE_URL = 'https://www.fox.com/'
    _API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9'
    _access_token = None
    _device_id = compat_str(uuid.uuid4())

    def _call_api(self, path, video_id, data=None):
        headers = {
            'X-Api-Key': self._API_KEY,
        }
        if self._access_token:
            headers['Authorization'] = 'Bearer ' + self._access_token
        try:
            return self._download_json(
                'https://api3.fox.com/v2.0/' + path,
                video_id, data=data, headers=headers)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                entitlement_issues = self._parse_json(
                    e.cause.read().decode(), video_id)['entitlementIssues']
                for e in entitlement_issues:
                    if e.get('errorCode') == 1005:
                        raise ExtractorError(
                            'This video is only available via cable service provider '
                            'subscription. You may want to use --cookies.', expected=True)
                messages = ', '.join([e['message'] for e in entitlement_issues])
                raise ExtractorError(messages, expected=True)
            raise

    def _real_initialize(self):
        if not self._access_token:
            mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
            if mvpd_auth:
                self._access_token = (self._parse_json(compat_urllib_parse_unquote(
                    mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
            if not self._access_token:
                self._access_token = self._call_api(
                    'login', None, json.dumps({
                        'deviceId': self._device_id,
                    }).encode())['accessToken']

    def _real_extract(self, url):
        video_id = self._match_id(url)

        self._access_token = self._call_api(
            'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id,
            video_id)['accessToken']

        video = self._call_api('watch', video_id, data=json.dumps({
            'capabilities': ['drm/widevine', 'fsdk/yo'],
            'deviceWidth': 1280,
            'deviceHeight': 720,
            'maxRes': '720p',
            'os': 'macos',
            'osv': '',
            'provider': {
                'freewheel': {'did': self._device_id},
                'vdms': {'rays': ''},
                'dmp': {'kuid': '', 'seg': ''}
            },
            'playlist': '',
            'privacy': {'us': '1---'},
            'siteSection': '',
            'streamType': 'vod',
            'streamId': video_id}).encode('utf-8'))

        title = video['name']
        release_url = video['url']

        try:
            m3u8_url = self._download_json(release_url, video_id)['playURL']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                error = self._parse_json(e.cause.read().decode(), video_id)
                if error.get('exception') == 'GeoLocationBlocked':
                    self.raise_geo_restricted(countries=['US'])
                raise ExtractorError(error['description'], expected=True)
            raise
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, 'mp4',
            entry_protocol='m3u8_native', m3u8_id='hls')

        data = try_get(
            video, lambda x: x['trackingData']['properties'], dict) or {}

        duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
            video.get('duration')) or parse_duration(video.get('duration'))
        timestamp = unified_timestamp(video.get('datePublished'))
        creator = data.get('brand') or data.get('network') or video.get('network')
        series = video.get('seriesName') or data.get(
            'seriesName') or data.get('show')

        subtitles = {}
        for doc_rel in video.get('documentReleases', []):
            rel_url = doc_rel.get('url')
            if not url or doc_rel.get('format') != 'SCC':
                continue
            subtitles['en'] = [{
                'url': rel_url,
                'ext': 'scc',
            }]
            break

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': video.get('description'),
            'duration': duration,
            'timestamp': timestamp,
            'age_limit': parse_age_limit(video.get('contentRating')),
            'creator': creator,
            'series': series,
            'season_number': int_or_none(video.get('seasonNumber')),
            'episode': video.get('name'),
            'episode_number': int_or_none(video.get('episodeNumber')),
            'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
            'release_year': int_or_none(video.get('releaseYear')),
            'subtitles': subtitles,
        }
Commit	Line	Data
41c2c254 RA	1	import json
41c2c254 RA	2	import uuid
96c186e1	3
443f8de8	4	from .common import InfoExtractor
6df196f3	5	from ..compat import (
0d08bcdb	6	compat_HTTPError,
6df196f3 RA	7	compat_str,
	8	compat_urllib_parse_unquote,
	9	)
e37b54b1	10	from ..utils import (
0d08bcdb	11	ExtractorError,
bf6ec2fe S	12	int_or_none,
	13	parse_age_limit,
	14	parse_duration,
42a44f01	15	traverse_obj,
bf6ec2fe S	16	try_get,
bf6ec2fe S	17	unified_timestamp,
42a44f01	18	url_or_none,
e37b54b1	19	)
9787c5f4	20
9787c5f4	21
443f8de8	22	class FOXIE(InfoExtractor):
6df196f3	23	_VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[\da-fA-F]+)'
bf6ec2fe S	24	_TESTS = [{
	25	# clip
	26	'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/',
5e3a6fec	27	'md5': 'ebd296fcc41dd4b19f8115d8461a3165',
9787c5f4	28	'info_dict': {
bf6ec2fe	29	'id': '4b765a60490325103ea69888fb2bd4e8',
9787c5f4	30	'ext': 'mp4',
bf6ec2fe S	31	'title': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
	32	'description': 'md5:549cd9c70d413adb32ce2a779b53b486',
	33	'duration': 102,
	34	'timestamp': 1504291893,
	35	'upload_date': '20170901',
	36	'creator': 'FOX',
	37	'series': 'Gotham',
6df196f3	38	'age_limit': 14,
42a44f01 VK	39	'episode': 'Aftermath: Bruce Wayne Develops Into The Dark Knight',
42a44f01 VK	40	'thumbnail': r're:^https?://.*\.jpg$',
9787c5f4	41	},
bf6ec2fe S	42	'params': {
	43	'skip_download': True,
	44	},
	45	}, {
	46	# episode, geo-restricted
	47	'url': 'https://www.fox.com/watch/087036ca7f33c8eb79b08152b4dd75c1/',
	48	'only_matching': True,
	49	}, {
443f8de8	50	# sports event, geo-restricted
443f8de8	51	'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/',
bf6ec2fe S	52	'only_matching': True,
bf6ec2fe S	53	}]
0d08bcdb	54	_GEO_BYPASS = False
6df196f3	55	_HOME_PAGE_URL = 'https://www.fox.com/'
443f8de8	56	_API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9'
41c2c254	57	_access_token = None
443f8de8	58	_device_id = compat_str(uuid.uuid4())
96c186e1	59
41c2c254 RA	60	def _call_api(self, path, video_id, data=None):
41c2c254 RA	61	headers = {
6df196f3	62	'X-Api-Key': self._API_KEY,
41c2c254 RA	63	}
	64	if self._access_token:
	65	headers['Authorization'] = 'Bearer ' + self._access_token
0d08bcdb RA	66	try:
0d08bcdb RA	67	return self._download_json(
443f8de8	68	'https://api3.fox.com/v2.0/' + path,
0d08bcdb RA	69	video_id, data=data, headers=headers)
0d08bcdb RA	70	except ExtractorError as e:
62d10f0d	71	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
0d08bcdb RA	72	entitlement_issues = self._parse_json(
	73	e.cause.read().decode(), video_id)['entitlementIssues']
	74	for e in entitlement_issues:
	75	if e.get('errorCode') == 1005:
	76	raise ExtractorError(
	77	'This video is only available via cable service provider '
	78	'subscription. You may want to use --cookies.', expected=True)
	79	messages = ', '.join([e['message'] for e in entitlement_issues])
	80	raise ExtractorError(messages, expected=True)
	81	raise
96c186e1	82
41c2c254	83	def _real_initialize(self):
6df196f3 RA	84	if not self._access_token:
	85	mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
	86	if mvpd_auth:
	87	self._access_token = (self._parse_json(compat_urllib_parse_unquote(
	88	mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
	89	if not self._access_token:
	90	self._access_token = self._call_api(
	91	'login', None, json.dumps({
443f8de8	92	'deviceId': self._device_id,
6df196f3	93	}).encode())['accessToken']
9787c5f4	94
	95	def _real_extract(self, url):
	96	video_id = self._match_id(url)
7aa0ee32	97
443f8de8	98	self._access_token = self._call_api(
	99	'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id,
	100	video_id)['accessToken']
	101
	102	video = self._call_api('watch', video_id, data=json.dumps({
	103	'capabilities': ['drm/widevine', 'fsdk/yo'],
	104	'deviceWidth': 1280,
	105	'deviceHeight': 720,
	106	'maxRes': '720p',
	107	'os': 'macos',
	108	'osv': '',
	109	'provider': {
	110	'freewheel': {'did': self._device_id},
	111	'vdms': {'rays': ''},
	112	'dmp': {'kuid': '', 'seg': ''}
	113	},
	114	'playlist': '',
	115	'privacy': {'us': '1---'},
	116	'siteSection': '',
	117	'streamType': 'vod',
	118	'streamId': video_id}).encode('utf-8'))
bf6ec2fe S	119
bf6ec2fe S	120	title = video['name']
41c2c254	121	release_url = video['url']
443f8de8	122
0d08bcdb RA	123	try:
	124	m3u8_url = self._download_json(release_url, video_id)['playURL']
	125	except ExtractorError as e:
e0dde1d8	126	if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
0d08bcdb RA	127	error = self._parse_json(e.cause.read().decode(), video_id)
	128	if error.get('exception') == 'GeoLocationBlocked':
	129	self.raise_geo_restricted(countries=['US'])
	130	raise ExtractorError(error['description'], expected=True)
	131	raise
96c186e1 RA	132	formats = self._extract_m3u8_formats(
	133	m3u8_url, video_id, 'mp4',
	134	entry_protocol='m3u8_native', m3u8_id='hls')
96c186e1	135
6df196f3 RA	136	data = try_get(
	137	video, lambda x: x['trackingData']['properties'], dict) or {}
	138
96c186e1 RA	139	duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
	140	video.get('duration')) or parse_duration(video.get('duration'))
	141	timestamp = unified_timestamp(video.get('datePublished'))
	142	creator = data.get('brand') or data.get('network') or video.get('network')
	143	series = video.get('seriesName') or data.get(
	144	'seriesName') or data.get('show')
684ae102 RA	145
	146	subtitles = {}
	147	for doc_rel in video.get('documentReleases', []):
	148	rel_url = doc_rel.get('url')
	149	if not url or doc_rel.get('format') != 'SCC':
	150	continue
	151	subtitles['en'] = [{
	152	'url': rel_url,
	153	'ext': 'scc',
	154	}]
	155	break
bf6ec2fe	156
96c186e1	157	return {
bf6ec2fe S	158	'id': video_id,
bf6ec2fe S	159	'title': title,
96c186e1 RA	160	'formats': formats,
96c186e1 RA	161	'description': video.get('description'),
bf6ec2fe S	162	'duration': duration,
bf6ec2fe S	163	'timestamp': timestamp,
6df196f3	164	'age_limit': parse_age_limit(video.get('contentRating')),
bf6ec2fe S	165	'creator': creator,
bf6ec2fe S	166	'series': series,
96c186e1 RA	167	'season_number': int_or_none(video.get('seasonNumber')),
	168	'episode': video.get('name'),
	169	'episode_number': int_or_none(video.get('episodeNumber')),
42a44f01	170	'thumbnail': traverse_obj(video, ('images', 'still', 'raw'), expected_type=url_or_none),
96c186e1	171	'release_year': int_or_none(video.get('releaseYear')),
684ae102	172	'subtitles': subtitles,
bf6ec2fe	173	}