]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/zype.py
[dplay] Add extractors for site changes (#2401)
[yt-dlp.git] / yt_dlp / extractor / zype.py
CommitLineData
83852e57
S
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
a9866c03
RA
7from ..compat import compat_HTTPError
8from ..utils import (
9 dict_get,
10 ExtractorError,
11 int_or_none,
12 js_to_json,
13 parse_iso8601,
14)
83852e57
S
15
16
17class ZypeIE(InfoExtractor):
a9866c03
RA
18 _ID_RE = r'[\da-fA-F]+'
19 _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
20 _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
83852e57
S
21 _TEST = {
22 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
23 'md5': 'eaee31d474c76a955bdaba02a505c595',
24 'info_dict': {
25 'id': '5b400b834b32992a310622b9',
26 'ext': 'mp4',
27 'title': 'Smoky Barbecue Favorites',
28 'thumbnail': r're:^https?://.*\.jpe?g',
a9866c03
RA
29 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
30 'timestamp': 1504915200,
31 'upload_date': '20170909',
83852e57
S
32 },
33 }
34
35 @staticmethod
36 def _extract_urls(webpage):
37 return [
38 mobj.group('url')
39 for mobj in re.finditer(
a9866c03 40 r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
83852e57
S
41 webpage)]
42
43 def _real_extract(self, url):
44 video_id = self._match_id(url)
45
a9866c03
RA
46 try:
47 response = self._download_json(re.sub(
48 r'\.(?:js|html)\?', '.json?', url), video_id)['response']
49 except ExtractorError as e:
50 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
51 raise ExtractorError(self._parse_json(
52 e.cause.read().decode(), video_id)['message'], expected=True)
53 raise
83852e57 54
a9866c03
RA
55 body = response['body']
56 video = response['video']
57 title = video['title']
83852e57 58
1c5ce74c
F
59 subtitles = {}
60
a9866c03
RA
61 if isinstance(body, dict):
62 formats = []
63 for output in body.get('outputs', []):
64 output_url = output.get('url')
65 if not output_url:
66 continue
67 name = output.get('name')
68 if name == 'm3u8':
1c5ce74c 69 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
a9866c03
RA
70 output_url, video_id, 'mp4',
71 'm3u8_native', m3u8_id='hls', fatal=False)
72 else:
73 f = {
74 'format_id': name,
75 'tbr': int_or_none(output.get('bitrate')),
76 'url': output_url,
77 }
78 if name in ('m4a', 'mp3'):
79 f['vcodec'] = 'none'
80 else:
81 f.update({
82 'height': int_or_none(output.get('height')),
83 'width': int_or_none(output.get('width')),
84 })
85 formats.append(f)
86 text_tracks = body.get('subtitles') or []
87 else:
88 m3u8_url = self._search_regex(
89 r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
29f7c58a 90 body, 'm3u8 url', group='url', default=None)
91 if not m3u8_url:
2181983a 92 source = self._search_regex(
93 r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, 'source')
94
95 def get_attr(key):
96 return self._search_regex(
97 r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key,
98 source, key, group='val')
99
100 if get_attr('integration') == 'verizon-media':
101 m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id')
1c5ce74c 102 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
a9866c03
RA
103 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
104 text_tracks = self._search_regex(
105 r'textTracks\s*:\s*(\[[^]]+\])',
106 body, 'text tracks', default=None)
107 if text_tracks:
108 text_tracks = self._parse_json(
109 text_tracks, video_id, js_to_json, False)
83852e57
S
110 self._sort_formats(formats)
111
a9866c03
RA
112 if text_tracks:
113 for text_track in text_tracks:
114 tt_url = dict_get(text_track, ('file', 'src'))
115 if not tt_url:
116 continue
117 subtitles.setdefault(text_track.get('label') or 'English', []).append({
118 'url': tt_url,
119 })
120
121 thumbnails = []
122 for thumbnail in video.get('thumbnails', []):
123 thumbnail_url = thumbnail.get('url')
124 if not thumbnail_url:
125 continue
126 thumbnails.append({
127 'url': thumbnail_url,
128 'width': int_or_none(thumbnail.get('width')),
129 'height': int_or_none(thumbnail.get('height')),
130 })
83852e57
S
131
132 return {
133 'id': video_id,
a9866c03 134 'display_id': video.get('friendly_title'),
83852e57 135 'title': title,
a9866c03
RA
136 'thumbnails': thumbnails,
137 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
138 'timestamp': parse_iso8601(video.get('published_at')),
139 'duration': int_or_none(video.get('duration')),
140 'view_count': int_or_none(video.get('request_count')),
141 'average_rating': int_or_none(video.get('rating')),
142 'season_number': int_or_none(video.get('season')),
143 'episode_number': int_or_none(video.get('episode')),
83852e57 144 'formats': formats,
a9866c03 145 'subtitles': subtitles,
83852e57 146 }