]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/discoverygo.py
[extractor/generic] Don't return JW player without formats
[yt-dlp.git] / yt_dlp / extractor / discoverygo.py
CommitLineData
70497994
S
1import re
2
d92cb463 3from .common import InfoExtractor
d92cb463 4from ..utils import (
c402e7f3 5 determine_ext,
d92cb463 6 extract_attributes,
70497994 7 ExtractorError,
d92cb463
S
8 int_or_none,
9 parse_age_limit,
70497994
S
10 remove_end,
11 unescapeHTML,
3052a30d 12 url_or_none,
d92cb463
S
13)
14
15
70497994
S
16class DiscoveryGoBaseIE(InfoExtractor):
17 _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
7273e584
RA
18 discovery|
19 investigationdiscovery|
20 discoverylife|
21 animalplanet|
22 ahctv|
23 destinationamerica|
24 sciencechannel|
25 tlc|
26 velocitychannel
70497994
S
27 )go\.com/%s(?P<id>[^/?#&]+)'''
28
cb0c2310 29 def _extract_video_info(self, video, stream, display_id):
d92cb463
S
30 title = video['name']
31
f70e9229 32 if not stream:
5c13c285
RA
33 if video.get('authenticated') is True:
34 raise ExtractorError(
35 'This video is only available via cable service provider subscription that'
36 ' is not currently supported. You may want to use --cookies.', expected=True)
37 else:
38 raise ExtractorError('Unable to find stream')
d92cb463
S
39 STREAM_URL_SUFFIX = 'streamUrl'
40 formats = []
41 for stream_kind in ('', 'hds'):
42 suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
43 stream_url = stream.get('%s%s' % (stream_kind, suffix))
44 if not stream_url:
45 continue
46 if stream_kind == '':
47 formats.extend(self._extract_m3u8_formats(
48 stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
49 m3u8_id='hls', fatal=False))
50 elif stream_kind == 'hds':
51 formats.extend(self._extract_f4m_formats(
52 stream_url, display_id, f4m_id=stream_kind, fatal=False))
53 self._sort_formats(formats)
54
55 video_id = video.get('id') or display_id
56 description = video.get('description', {}).get('detailed')
57 duration = int_or_none(video.get('duration'))
58
59 series = video.get('show', {}).get('name')
60 season_number = int_or_none(video.get('season', {}).get('number'))
61 episode_number = int_or_none(video.get('episodeNumber'))
62
63 tags = video.get('tags')
64 age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
65
66 subtitles = {}
67 captions = stream.get('captions')
68 if isinstance(captions, list):
69 for caption in captions:
3052a30d
S
70 subtitle_url = url_or_none(caption.get('fileUrl'))
71 if not subtitle_url or not subtitle_url.startswith('http'):
d92cb463
S
72 continue
73 lang = caption.get('fileLang', 'en')
c402e7f3
RA
74 ext = determine_ext(subtitle_url)
75 subtitles.setdefault(lang, []).append({
76 'url': subtitle_url,
77 'ext': 'ttml' if ext == 'xml' else ext,
78 })
d92cb463
S
79
80 return {
81 'id': video_id,
82 'display_id': display_id,
83 'title': title,
84 'description': description,
85 'duration': duration,
86 'series': series,
87 'season_number': season_number,
88 'episode_number': episode_number,
89 'tags': tags,
90 'age_limit': age_limit,
91 'formats': formats,
92 'subtitles': subtitles,
93 }
70497994
S
94
95
cb0c2310
RA
96class DiscoveryGoIE(DiscoveryGoBaseIE):
97 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
98 _GEO_COUNTRIES = ['US']
99 _TEST = {
100 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
101 'info_dict': {
102 'id': '58c167d86b66d12f2addeb01',
103 'ext': 'mp4',
104 'title': 'Reaper Madness',
105 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
106 'duration': 2519,
107 'series': 'Bering Sea Gold',
108 'season_number': 8,
109 'episode_number': 6,
110 'age_limit': 14,
111 },
112 }
113
114 def _real_extract(self, url):
115 display_id = self._match_id(url)
116
117 webpage = self._download_webpage(url, display_id)
118
119 container = extract_attributes(
120 self._search_regex(
121 r'(<div[^>]+class=["\']video-player-container[^>]+>)',
122 webpage, 'video container'))
123
124 video = self._parse_json(
125 container.get('data-video') or container.get('data-json'),
126 display_id)
127
128 stream = video.get('stream')
129
130 return self._extract_video_info(video, stream, display_id)
131
132
70497994
S
133class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
134 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
135 _TEST = {
136 'url': 'https://www.discoverygo.com/bering-sea-gold/',
137 'info_dict': {
138 'id': 'bering-sea-gold',
139 'title': 'Bering Sea Gold',
140 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
141 },
142 'playlist_mincount': 6,
143 }
144
145 @classmethod
146 def suitable(cls, url):
147 return False if DiscoveryGoIE.suitable(url) else super(
148 DiscoveryGoPlaylistIE, cls).suitable(url)
149
150 def _real_extract(self, url):
151 display_id = self._match_id(url)
152
153 webpage = self._download_webpage(url, display_id)
154
155 entries = []
156 for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
157 data = self._parse_json(
158 mobj.group('json'), display_id,
159 transform_source=unescapeHTML, fatal=False)
160 if not isinstance(data, dict) or data.get('type') != 'episode':
161 continue
162 episode_url = data.get('socialUrl')
163 if not episode_url:
164 continue
165 entries.append(self.url_result(
166 episode_url, ie=DiscoveryGoIE.ie_key(),
167 video_id=data.get('id')))
168
169 return self.playlist_result(
170 entries, display_id,
171 remove_end(self._og_search_title(
172 webpage, fatal=False), ' | Discovery GO'),
173 self._og_search_description(webpage))