]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zype.py
6f2fbb9e9d6e59d648134fae87967f7073ca6d3f
[yt-dlp.git] / yt_dlp / extractor / zype.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_HTTPError
5 from ..utils import (
6 dict_get,
7 ExtractorError,
8 int_or_none,
9 js_to_json,
10 parse_iso8601,
11 )
12
13
14 class ZypeIE(InfoExtractor):
15 _ID_RE = r'[\da-fA-F]+'
16 _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
17 _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
18 _TEST = {
19 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
20 'md5': 'eaee31d474c76a955bdaba02a505c595',
21 'info_dict': {
22 'id': '5b400b834b32992a310622b9',
23 'ext': 'mp4',
24 'title': 'Smoky Barbecue Favorites',
25 'thumbnail': r're:^https?://.*\.jpe?g',
26 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
27 'timestamp': 1504915200,
28 'upload_date': '20170909',
29 },
30 }
31
32 @staticmethod
33 def _extract_urls(webpage):
34 return [
35 mobj.group('url')
36 for mobj in re.finditer(
37 r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
38 webpage)]
39
40 def _real_extract(self, url):
41 video_id = self._match_id(url)
42
43 try:
44 response = self._download_json(re.sub(
45 r'\.(?:js|html)\?', '.json?', url), video_id)['response']
46 except ExtractorError as e:
47 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
48 raise ExtractorError(self._parse_json(
49 e.cause.read().decode(), video_id)['message'], expected=True)
50 raise
51
52 body = response['body']
53 video = response['video']
54 title = video['title']
55
56 subtitles = {}
57
58 if isinstance(body, dict):
59 formats = []
60 for output in body.get('outputs', []):
61 output_url = output.get('url')
62 if not output_url:
63 continue
64 name = output.get('name')
65 if name == 'm3u8':
66 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
67 output_url, video_id, 'mp4',
68 'm3u8_native', m3u8_id='hls', fatal=False)
69 else:
70 f = {
71 'format_id': name,
72 'tbr': int_or_none(output.get('bitrate')),
73 'url': output_url,
74 }
75 if name in ('m4a', 'mp3'):
76 f['vcodec'] = 'none'
77 else:
78 f.update({
79 'height': int_or_none(output.get('height')),
80 'width': int_or_none(output.get('width')),
81 })
82 formats.append(f)
83 text_tracks = body.get('subtitles') or []
84 else:
85 m3u8_url = self._search_regex(
86 r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
87 body, 'm3u8 url', group='url', default=None)
88 if not m3u8_url:
89 source = self._search_regex(
90 r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
91
92 def get_attr(key):
93 return self._search_regex(
94 r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
95 source, key, group='val')
96
97 if get_attr('integration') == 'verizon-media':
98 m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
99 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
100 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
101 text_tracks = self._search_regex(
102 r'textTracks\s*:\s*(\[[^]]+\])',
103 body, 'text tracks', default=None)
104 if text_tracks:
105 text_tracks = self._parse_json(
106 text_tracks, video_id, js_to_json, False)
107 self._sort_formats(formats)
108
109 if text_tracks:
110 for text_track in text_tracks:
111 tt_url = dict_get(text_track, ('file', 'src'))
112 if not tt_url:
113 continue
114 subtitles.setdefault(text_track.get('label') or 'English', []).append({
115 'url': tt_url,
116 })
117
118 thumbnails = []
119 for thumbnail in video.get('thumbnails', []):
120 thumbnail_url = thumbnail.get('url')
121 if not thumbnail_url:
122 continue
123 thumbnails.append({
124 'url': thumbnail_url,
125 'width': int_or_none(thumbnail.get('width')),
126 'height': int_or_none(thumbnail.get('height')),
127 })
128
129 return {
130 'id': video_id,
131 'display_id': video.get('friendly_title'),
132 'title': title,
133 'thumbnails': thumbnails,
134 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
135 'timestamp': parse_iso8601(video.get('published_at')),
136 'duration': int_or_none(video.get('duration')),
137 'view_count': int_or_none(video.get('request_count')),
138 'average_rating': int_or_none(video.get('rating')),
139 'season_number': int_or_none(video.get('season')),
140 'episode_number': int_or_none(video.get('episode')),
141 'formats': formats,
142 'subtitles': subtitles,
143 }