]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/turner.py
2 from __future__
import unicode_literals
6 from .adobepass
import AdobePassIE
7 from ..compat
import compat_str
23 class TurnerBaseIE(AdobePassIE
):
24 _AKAMAI_SPE_TOKEN_CACHE
= {}
26 def _extract_timestamp(self
, video_data
):
27 return int_or_none(xpath_attr(video_data
, 'dateCreated', 'uts'))
29 def _add_akamai_spe_token(self
, tokenizer_src
, video_url
, content_id
, ap_data
, custom_tokenizer_query
=None):
30 secure_path
= self
._search
_regex
(r
'https?://[^/]+(.+/)', video_url
, 'secure path') + '*'
31 token
= self
._AKAMAI
_SPE
_TOKEN
_CACHE
.get(secure_path
)
36 if custom_tokenizer_query
:
37 query
.update(custom_tokenizer_query
)
39 query
['videoId'] = content_id
40 if ap_data
.get('auth_required'):
41 query
['accessToken'] = self
._extract
_mvpd
_auth
(ap_data
['url'], content_id
, ap_data
['site_name'], ap_data
['site_name'])
42 auth
= self
._download
_xml
(
43 tokenizer_src
, content_id
, query
=query
)
44 error_msg
= xpath_text(auth
, 'error/msg')
46 raise ExtractorError(error_msg
, expected
=True)
47 token
= xpath_text(auth
, 'token')
50 self
._AKAMAI
_SPE
_TOKEN
_CACHE
[secure_path
] = token
51 return video_url
+ '?hdnea=' + token
53 def _extract_cvp_info(self
, data_src
, video_id
, path_data
={}, ap_data={}
, fatal
=False):
54 video_data
= self
._download
_xml
(
56 transform_source
=lambda s
: fix_xml_ampersands(s
).strip(),
60 video_id
= video_data
.attrib
['id']
61 title
= xpath_text(video_data
, 'headline', fatal
=True)
62 content_id
= xpath_text(video_data
, 'contentId') or video_id
63 # rtmp_src = xpath_text(video_data, 'akamai/src')
65 # split_rtmp_src = rtmp_src.split(',')
66 # if len(split_rtmp_src) == 2:
67 # rtmp_src = split_rtmp_src[1]
68 # aifp = xpath_text(video_data, 'akamai/aifp', default='')
75 r
'(?P<width>[0-9]+)x(?P<height>[0-9]+)(?:_(?P<bitrate>[0-9]+))?')
76 # Possible formats locations: files/file, files/groupFiles/files
78 for video_file
in video_data
.findall('.//file'):
79 video_url
= url_or_none(video_file
.text
.strip())
82 ext
= determine_ext(video_url
)
83 if video_url
.startswith('/mp4:protected/'):
85 # TODO Correct extraction for these files
86 # protected_path_data = path_data.get('protected')
87 # if not protected_path_data or not rtmp_src:
89 # protected_path = self._search_regex(
90 # r'/mp4:(.+)\.[a-z0-9]', video_url, 'secure path')
91 # auth = self._download_webpage(
92 # protected_path_data['tokenizer_src'], query={
93 # 'path': protected_path,
94 # 'videoId': content_id,
97 # token = xpath_text(auth, 'token')
100 # video_url = rtmp_src + video_url + '?' + token
101 elif video_url
.startswith('/secure/'):
102 secure_path_data
= path_data
.get('secure')
103 if not secure_path_data
:
105 video_url
= self
._add
_akamai
_spe
_token
(
106 secure_path_data
['tokenizer_src'],
107 secure_path_data
['media_src'] + video_url
,
109 elif not re
.match('https?://', video_url
):
110 base_path_data
= path_data
.get(ext
, path_data
.get('default', {}))
111 media_src
= base_path_data
.get('media_src')
114 video_url
= media_src
+ video_url
115 if video_url
in urls
:
117 urls
.append(video_url
)
118 format_id
= video_file
.get('bitrate')
119 if ext
in ('scc', 'srt', 'vtt'):
120 subtitles
.setdefault('en', []).append({
130 formats
.extend(self
._extract
_smil
_formats
(
131 video_url
, video_id
, fatal
=False))
132 elif re
.match(r
'https?://[^/]+\.akamaihd\.net/[iz]/', video_url
):
133 formats
.extend(self
._extract
_akamai
_formats
(
134 video_url
, video_id
, {
135 'hds': path_data
.get('f4m', {}).get('host'),
136 # nba.cdn.turner.com, ht.cdn.turner.com, ht2.cdn.turner.com
137 # ht3.cdn.turner.com, i.cdn.turner.com, s.cdn.turner.com
139 'http': 'pmd.cdn.turner.com',
142 m3u8_formats
= self
._extract
_m
3u8_formats
(
143 video_url
, video_id
, 'mp4',
144 m3u8_id
=format_id
or 'hls', fatal
=False)
145 if '/secure/' in video_url
and '?hdnea=' in video_url
:
146 for f
in m3u8_formats
:
147 f
['_ffmpeg_args'] = ['-seekable', '0']
148 formats
.extend(m3u8_formats
)
150 formats
.extend(self
._extract
_f
4m
_formats
(
151 update_url_query(video_url
, {'hdcore': '3.7.0'}
),
152 video_id
, f4m_id
=format_id
or 'hds', fatal
=False))
155 'format_id': format_id
,
159 mobj
= rex
.search(video_url
)
162 'width': int(mobj
.group('width')),
163 'height': int(mobj
.group('height')),
164 'tbr': int_or_none(mobj
.group('bitrate')),
166 elif isinstance(format_id
, compat_str
):
167 if format_id
.isdigit():
168 f
['tbr'] = int(format_id
)
170 mobj
= re
.match(r
'ios_(audio|[0-9]+)$', format_id
)
172 if mobj
.group(1) == 'audio':
178 f
['tbr'] = int(mobj
.group(1))
180 self
._sort
_formats
(formats
)
182 for source
in video_data
.findall('closedCaptions/source'):
183 for track
in source
.findall('track'):
184 track_url
= url_or_none(track
.get('url'))
185 if not track_url
or track_url
.endswith('/big'):
187 lang
= track
.get('lang') or track
.get('label') or 'en'
188 subtitles
.setdefault(lang
, []).append({
194 }.get(source
.get('format'))
198 'id': image
.get('cut') or image
.get('name'),
200 'width': int_or_none(image
.get('width')),
201 'height': int_or_none(image
.get('height')),
202 } for image
in video_data
.findall('images/image'))
204 is_live
= xpath_text(video_data
, 'isLive') == 'true'
210 'subtitles': subtitles
,
211 'thumbnails': thumbnails
,
212 'thumbnail': xpath_text(video_data
, 'poster'),
213 'description': strip_or_none(xpath_text(video_data
, 'description')),
214 'duration': parse_duration(xpath_text(video_data
, 'length') or xpath_text(video_data
, 'trt')),
215 'timestamp': self
._extract
_timestamp
(video_data
),
216 'upload_date': xpath_attr(video_data
, 'metas', 'version'),
217 'series': xpath_text(video_data
, 'showTitle'),
218 'season_number': int_or_none(xpath_text(video_data
, 'seasonNumber')),
219 'episode_number': int_or_none(xpath_text(video_data
, 'episodeNumber')),
223 def _extract_ngtv_info(self
, media_id
, tokenizer_query
, ap_data
=None):
224 is_live
= ap_data
.get('is_live')
225 streams_data
= self
._download
_json
(
226 'http://medium.ngtv.io/media/%s/tv' % media_id
,
227 media_id
)['media']['tv']
231 for supported_type
in ('unprotected', 'bulkaes'):
232 stream_data
= streams_data
.get(supported_type
, {})
233 m3u8_url
= stream_data
.get('secureUrl') or stream_data
.get('url')
236 if stream_data
.get('playlistProtection') == 'spe':
237 m3u8_url
= self
._add
_akamai
_spe
_token
(
238 'http://token.ngtv.io/token/token_spe',
239 m3u8_url
, media_id
, ap_data
or {}, tokenizer_query
)
240 formats
.extend(self
._extract
_m
3u8_formats
(
241 m3u8_url
, media_id
, 'mp4', m3u8_id
='hls', live
=is_live
, fatal
=False))
243 duration
= float_or_none(stream_data
.get('totalRuntime'))
245 if not chapters
and not is_live
:
246 for chapter
in stream_data
.get('contentSegments', []):
247 start_time
= float_or_none(chapter
.get('start'))
248 chapter_duration
= float_or_none(chapter
.get('duration'))
249 if start_time
is None or chapter_duration
is None:
252 'start_time': start_time
,
253 'end_time': start_time
+ chapter_duration
,
255 self
._sort
_formats
(formats
)
259 'chapters': chapters
,
260 'duration': duration
,