]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/discoverygo.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / discoverygo.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 determine_ext,
7 extract_attributes,
8 int_or_none,
9 parse_age_limit,
10 remove_end,
11 unescapeHTML,
12 url_or_none,
13 )
14
15
16 class DiscoveryGoBaseIE(InfoExtractor):
17 _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
18 discovery|
19 investigationdiscovery|
20 discoverylife|
21 animalplanet|
22 ahctv|
23 destinationamerica|
24 sciencechannel|
25 tlc|
26 velocitychannel
27 )go\.com/%s(?P<id>[^/?#&]+)'''
28
29 def _extract_video_info(self, video, stream, display_id):
30 title = video['name']
31
32 if not stream:
33 if video.get('authenticated') is True:
34 raise ExtractorError(
35 'This video is only available via cable service provider subscription that'
36 ' is not currently supported. You may want to use --cookies.', expected=True)
37 else:
38 raise ExtractorError('Unable to find stream')
39 STREAM_URL_SUFFIX = 'streamUrl'
40 formats = []
41 for stream_kind in ('', 'hds'):
42 suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
43 stream_url = stream.get('%s%s' % (stream_kind, suffix))
44 if not stream_url:
45 continue
46 if stream_kind == '':
47 formats.extend(self._extract_m3u8_formats(
48 stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
49 m3u8_id='hls', fatal=False))
50 elif stream_kind == 'hds':
51 formats.extend(self._extract_f4m_formats(
52 stream_url, display_id, f4m_id=stream_kind, fatal=False))
53
54 video_id = video.get('id') or display_id
55 description = video.get('description', {}).get('detailed')
56 duration = int_or_none(video.get('duration'))
57
58 series = video.get('show', {}).get('name')
59 season_number = int_or_none(video.get('season', {}).get('number'))
60 episode_number = int_or_none(video.get('episodeNumber'))
61
62 tags = video.get('tags')
63 age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
64
65 subtitles = {}
66 captions = stream.get('captions')
67 if isinstance(captions, list):
68 for caption in captions:
69 subtitle_url = url_or_none(caption.get('fileUrl'))
70 if not subtitle_url or not subtitle_url.startswith('http'):
71 continue
72 lang = caption.get('fileLang', 'en')
73 ext = determine_ext(subtitle_url)
74 subtitles.setdefault(lang, []).append({
75 'url': subtitle_url,
76 'ext': 'ttml' if ext == 'xml' else ext,
77 })
78
79 return {
80 'id': video_id,
81 'display_id': display_id,
82 'title': title,
83 'description': description,
84 'duration': duration,
85 'series': series,
86 'season_number': season_number,
87 'episode_number': episode_number,
88 'tags': tags,
89 'age_limit': age_limit,
90 'formats': formats,
91 'subtitles': subtitles,
92 }
93
94
95 class DiscoveryGoIE(DiscoveryGoBaseIE):
96 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
97 _GEO_COUNTRIES = ['US']
98 _TEST = {
99 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
100 'info_dict': {
101 'id': '58c167d86b66d12f2addeb01',
102 'ext': 'mp4',
103 'title': 'Reaper Madness',
104 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
105 'duration': 2519,
106 'series': 'Bering Sea Gold',
107 'season_number': 8,
108 'episode_number': 6,
109 'age_limit': 14,
110 },
111 }
112
113 def _real_extract(self, url):
114 display_id = self._match_id(url)
115
116 webpage = self._download_webpage(url, display_id)
117
118 container = extract_attributes(
119 self._search_regex(
120 r'(<div[^>]+class=["\']video-player-container[^>]+>)',
121 webpage, 'video container'))
122
123 video = self._parse_json(
124 container.get('data-video') or container.get('data-json'),
125 display_id)
126
127 stream = video.get('stream')
128
129 return self._extract_video_info(video, stream, display_id)
130
131
132 class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
133 _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
134 _TEST = {
135 'url': 'https://www.discoverygo.com/bering-sea-gold/',
136 'info_dict': {
137 'id': 'bering-sea-gold',
138 'title': 'Bering Sea Gold',
139 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
140 },
141 'playlist_mincount': 6,
142 }
143
144 @classmethod
145 def suitable(cls, url):
146 return False if DiscoveryGoIE.suitable(url) else super(
147 DiscoveryGoPlaylistIE, cls).suitable(url)
148
149 def _real_extract(self, url):
150 display_id = self._match_id(url)
151
152 webpage = self._download_webpage(url, display_id)
153
154 entries = []
155 for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
156 data = self._parse_json(
157 mobj.group('json'), display_id,
158 transform_source=unescapeHTML, fatal=False)
159 if not isinstance(data, dict) or data.get('type') != 'episode':
160 continue
161 episode_url = data.get('socialUrl')
162 if not episode_url:
163 continue
164 entries.append(self.url_result(
165 episode_url, ie=DiscoveryGoIE.ie_key(),
166 video_id=data.get('id')))
167
168 return self.playlist_result(
169 entries, display_id,
170 remove_end(self._og_search_title(
171 webpage, fatal=False), ' | Discovery GO'),
172 self._og_search_description(webpage))