]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/go.py
[mildom] Remove proxy (#260)
[yt-dlp.git] / yt_dlp / extractor / go.py
CommitLineData
2c3e0af9
RA
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
ae8d5a5c 6from .adobepass import AdobePassIE
2c3e0af9
RA
7from ..utils import (
8 int_or_none,
9 determine_ext,
10 parse_age_limit,
3b4775e0 11 remove_start,
12 remove_end,
014b7e6b
RA
13 urlencode_postdata,
14 ExtractorError,
2c3e0af9
RA
15)
16
17
ae8d5a5c
RA
18class GoIE(AdobePassIE):
19 _SITE_INFO = {
20 'abc': {
21 'brand': '001',
22 'requestor_id': 'ABC',
23 },
24 'freeform': {
25 'brand': '002',
26 'requestor_id': 'ABCFamily',
27 },
28 'watchdisneychannel': {
29 'brand': '004',
118afcf5 30 'resource_id': 'Disney',
ae8d5a5c
RA
31 },
32 'watchdisneyjunior': {
33 'brand': '008',
118afcf5 34 'resource_id': 'DisneyJunior',
ae8d5a5c
RA
35 },
36 'watchdisneyxd': {
37 'brand': '009',
118afcf5 38 'resource_id': 'DisneyXD',
a30c2f40
S
39 },
40 'disneynow': {
41 'brand': '011',
42 'resource_id': 'Disney',
29f7c58a 43 },
44 'fxnow.fxnetworks': {
45 'brand': '025',
46 'requestor_id': 'dtci',
47 },
2c3e0af9 48 }
aef9f87e
S
49 _VALID_URL = r'''(?x)
50 https?://
3b4775e0 51 (?P<sub_domain>
52 (?:%s\.)?go|fxnow\.fxnetworks|
53 (?:www\.)?(?:abc|freeform|disneynow)
aef9f87e
S
54 )\.com/
55 (?:
56 (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
57 (?:[^/]+/)*(?P<display_id>[^/?\#]+)
58 )
3b4775e0 59 ''' % r'\.|'.join(list(_SITE_INFO.keys()))
2c3e0af9 60 _TESTS = [{
bf2a5555 61 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
2c3e0af9 62 'info_dict': {
bf2a5555 63 'id': 'VDKA3807643',
2c3e0af9 64 'ext': 'mp4',
bf2a5555
RA
65 'title': 'The Traitor in the White House',
66 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
2c3e0af9
RA
67 },
68 'params': {
69 # m3u8 download
70 'skip_download': True,
71 },
dd90a21c 72 'skip': 'This content is no longer available.',
2c3e0af9 73 }, {
bf2a5555
RA
74 'url': 'http://watchdisneyxd.go.com/doraemon',
75 'info_dict': {
76 'title': 'Doraemon',
77 'id': 'SH55574025',
78 },
79 'playlist_mincount': 51,
dd90a21c
S
80 }, {
81 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
82 'info_dict': {
83 'id': 'VDKA3609139',
84 'ext': 'mp4',
85 'title': 'This Guilty Blood',
86 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
87 'age_limit': 14,
88 },
89 'params': {
90 'geo_bypass_ip_block': '3.244.239.0/24',
91 # m3u8 download
92 'skip_download': True,
93 },
94 }, {
95 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
96 'info_dict': {
97 'id': 'VDKA13435179',
98 'ext': 'mp4',
99 'title': 'The Bet',
100 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
101 'age_limit': 14,
102 },
103 'params': {
104 'geo_bypass_ip_block': '3.244.239.0/24',
105 # m3u8 download
106 'skip_download': True,
107 },
29f7c58a 108 }, {
109 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
110 'info_dict': {
111 'id': 'VDKA12782841',
112 'ext': 'mp4',
113 'title': 'First Look: Better Things - Season 2',
114 'description': 'md5:fa73584a95761c605d9d54904e35b407',
115 },
116 'params': {
117 'geo_bypass_ip_block': '3.244.239.0/24',
118 # m3u8 download
119 'skip_download': True,
120 },
692fa200
S
121 }, {
122 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
123 'only_matching': True,
124 }, {
125 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
126 'only_matching': True,
52007de8
S
127 }, {
128 # brand 004
129 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
130 'only_matching': True,
131 }, {
132 # brand 008
133 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
134 'only_matching': True,
4f71473e
S
135 }, {
136 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
137 'only_matching': True,
3b4775e0 138 }, {
139 'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
140 'only_matching': True,
2c3e0af9
RA
141 }]
142
bf2a5555
RA
143 def _extract_videos(self, brand, video_id='-1', show_id='-1'):
144 display_id = video_id if video_id != '-1' else show_id
145 return self._download_json(
146 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
147 display_id)['video']
148
2c3e0af9 149 def _real_extract(self, url):
a30c2f40 150 mobj = re.match(self._VALID_URL, url)
3b4775e0 151 sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
a30c2f40 152 video_id, display_id = mobj.group('id', 'display_id')
52007de8
S
153 site_info = self._SITE_INFO.get(sub_domain, {})
154 brand = site_info.get('brand')
155 if not video_id or not site_info:
156 webpage = self._download_webpage(url, display_id or video_id)
c54c01f8 157 video_id = self._search_regex(
dd90a21c
S
158 (
159 # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
160 # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
161 r'data-video-id=["\']*(VDKA\w+)',
61e4c6ed 162 # https://github.com/ytdl-org/youtube-dl/pull/25216/files
163 # The following is based on the pull request on the line above. Changed the ABC.com URL to a show available now.
164 # https://abc.com/shows/the-rookie/episode-guide/season-02/19-the-q-word
165 r'\bvideoIdCode["\']\s*:\s*["\'](vdka\w+)',
166 # Deprecated fallback pattern
dd90a21c
S
167 r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
168 ), webpage, 'video id', default=video_id)
52007de8
S
169 if not site_info:
170 brand = self._search_regex(
171 (r'data-brand=\s*["\']\s*(\d+)',
172 r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
173 default='004')
174 site_info = next(
175 si for _, si in self._SITE_INFO.items()
176 if si.get('brand') == brand)
bf2a5555
RA
177 if not video_id:
178 # show extraction works for Disney, DisneyJunior and DisneyXD
179 # ABC and Freeform has different layout
180 show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
181 videos = self._extract_videos(brand, show_id=show_id)
182 show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
183 entries = []
184 for video in videos:
185 entries.append(self.url_result(
186 video['url'], 'Go', video.get('id'), video.get('title')))
187 entries.reverse()
188 return self.playlist_result(entries, show_id, show_title)
189 video_data = self._extract_videos(brand, video_id)[0]
190 video_id = video_data['id']
2c3e0af9
RA
191 title = video_data['title']
192
193 formats = []
194 for asset in video_data.get('assets', {}).get('asset', []):
195 asset_url = asset.get('value')
196 if not asset_url:
197 continue
198 format_id = asset.get('format')
199 ext = determine_ext(asset_url)
200 if ext == 'm3u8':
014b7e6b 201 video_type = video_data.get('type')
8e1409fd
RA
202 data = {
203 'video_id': video_data['id'],
204 'video_type': video_type,
205 'brand': brand,
206 'device': '001',
207 }
208 if video_data.get('accesslevel') == '1':
118afcf5
RA
209 requestor_id = site_info.get('requestor_id', 'DisneyChannels')
210 resource = site_info.get('resource_id') or self._get_mvpd_resource(
8e1409fd
RA
211 requestor_id, title, video_id, None)
212 auth = self._extract_mvpd_auth(
213 url, video_id, requestor_id, resource)
214 data.update({
215 'token': auth,
216 'token_type': 'ap',
217 'adobe_requestor_id': requestor_id,
218 })
219 else:
5f95927a 220 self._initialize_geo_bypass({'countries': ['US']})
8e1409fd
RA
221 entitlement = self._download_json(
222 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
bf2a5555 223 video_id, data=urlencode_postdata(data))
8e1409fd
RA
224 errors = entitlement.get('errors', {}).get('errors', [])
225 if errors:
226 for error in errors:
227 if error.get('code') == 1002:
228 self.raise_geo_restricted(
229 error['message'], countries=['US'])
230 error_message = ', '.join([error['message'] for error in errors])
231 raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
232 asset_url += '?' + entitlement['uplynkData']['sessionKey']
2c3e0af9
RA
233 formats.extend(self._extract_m3u8_formats(
234 asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
235 else:
8e1409fd 236 f = {
2c3e0af9
RA
237 'format_id': format_id,
238 'url': asset_url,
239 'ext': ext,
8e1409fd
RA
240 }
241 if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
242 f.update({
243 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
f983b875 244 'quality': 1,
8e1409fd
RA
245 })
246 else:
247 mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
248 if mobj:
249 height = int(mobj.group(2))
250 f.update({
251 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
252 'width': int(mobj.group(1)),
253 'height': height,
254 })
255 formats.append(f)
2c3e0af9
RA
256 self._sort_formats(formats)
257
258 subtitles = {}
259 for cc in video_data.get('closedcaption', {}).get('src', []):
260 cc_url = cc.get('value')
261 if not cc_url:
262 continue
263 ext = determine_ext(cc_url)
264 if ext == 'xml':
265 ext = 'ttml'
266 subtitles.setdefault(cc.get('lang'), []).append({
267 'url': cc_url,
268 'ext': ext,
269 })
270
271 thumbnails = []
272 for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
273 thumbnail_url = thumbnail.get('value')
274 if not thumbnail_url:
275 continue
276 thumbnails.append({
277 'url': thumbnail_url,
278 'width': int_or_none(thumbnail.get('width')),
279 'height': int_or_none(thumbnail.get('height')),
280 })
281
282 return {
283 'id': video_id,
284 'title': title,
285 'description': video_data.get('longdescription') or video_data.get('description'),
286 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
287 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
288 'episode_number': int_or_none(video_data.get('episodenumber')),
289 'series': video_data.get('show', {}).get('title'),
290 'season_number': int_or_none(video_data.get('season', {}).get('num')),
291 'thumbnails': thumbnails,
292 'formats': formats,
293 'subtitles': subtitles,
294 }