]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tvplay.py
[extractor/youtube] Misc cleanup
[yt-dlp.git] / yt_dlp / extractor / tvplay.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import (
5 compat_HTTPError,
6 compat_urlparse,
7 )
8 from ..utils import (
9 determine_ext,
10 ExtractorError,
11 int_or_none,
12 parse_iso8601,
13 qualities,
14 traverse_obj,
15 try_get,
16 update_url_query,
17 url_or_none,
18 urljoin,
19 )
20
21
22 class TVPlayIE(InfoExtractor):
23 IE_NAME = 'mtg'
24 IE_DESC = 'MTG services'
25 _VALID_URL = r'''(?x)
26 (?:
27 mtg:|
28 https?://
29 (?:www\.)?
30 (?:
31 tvplay(?:\.skaties)?\.lv(?:/parraides)?|
32 (?:tv3play|play\.tv3)\.lt(?:/programos)?|
33 tv3play(?:\.tv3)?\.ee/sisu
34 )
35 /(?:[^/]+/)+
36 )
37 (?P<id>\d+)
38 '''
39 _TESTS = [
40 {
41 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
42 'md5': 'a1612fe0849455423ad8718fe049be21',
43 'info_dict': {
44 'id': '418113',
45 'ext': 'mp4',
46 'title': 'Kādi ir īri? - Viņas melo labāk',
47 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
48 'series': 'Viņas melo labāk',
49 'season': '2.sezona',
50 'season_number': 2,
51 'duration': 25,
52 'timestamp': 1406097056,
53 'upload_date': '20140723',
54 },
55 },
56 {
57 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
58 'info_dict': {
59 'id': '409229',
60 'ext': 'flv',
61 'title': 'Moterys meluoja geriau',
62 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
63 'series': 'Moterys meluoja geriau',
64 'episode_number': 47,
65 'season': '1 sezonas',
66 'season_number': 1,
67 'duration': 1330,
68 'timestamp': 1403769181,
69 'upload_date': '20140626',
70 },
71 'params': {
72 # rtmp download
73 'skip_download': True,
74 },
75 },
76 {
77 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
78 'info_dict': {
79 'id': '238551',
80 'ext': 'flv',
81 'title': 'Kodu keset linna 398537',
82 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
83 'duration': 1257,
84 'timestamp': 1292449761,
85 'upload_date': '20101215',
86 },
87 'params': {
88 # rtmp download
89 'skip_download': True,
90 },
91 },
92 {
93 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
94 'only_matching': True,
95 },
96 {
97 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
98 'only_matching': True,
99 },
100 {
101 # views is null
102 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
103 'only_matching': True,
104 },
105 {
106 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
107 'only_matching': True,
108 },
109 {
110 'url': 'mtg:418113',
111 'only_matching': True,
112 }
113 ]
114
115 def _real_extract(self, url):
116 video_id = self._match_id(url)
117 geo_country = self._search_regex(
118 r'https?://[^/]+\.([a-z]{2})', url,
119 'geo country', default=None)
120 if geo_country:
121 self._initialize_geo_bypass({'countries': [geo_country.upper()]})
122 video = self._download_json(
123 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
124
125 title = video['title']
126
127 try:
128 streams = self._download_json(
129 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
130 video_id, 'Downloading streams JSON')
131 except ExtractorError as e:
132 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
133 msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
134 raise ExtractorError(msg['msg'], expected=True)
135 raise
136
137 quality = qualities(['hls', 'medium', 'high'])
138 formats = []
139 for format_id, video_url in streams.get('streams', {}).items():
140 video_url = url_or_none(video_url)
141 if not video_url:
142 continue
143 ext = determine_ext(video_url)
144 if ext == 'f4m':
145 formats.extend(self._extract_f4m_formats(
146 update_url_query(video_url, {
147 'hdcore': '3.5.0',
148 'plugin': 'aasp-3.5.0.151.81'
149 }), video_id, f4m_id='hds', fatal=False))
150 elif ext == 'm3u8':
151 formats.extend(self._extract_m3u8_formats(
152 video_url, video_id, 'mp4', 'm3u8_native',
153 m3u8_id='hls', fatal=False))
154 else:
155 fmt = {
156 'format_id': format_id,
157 'quality': quality(format_id),
158 'ext': ext,
159 }
160 if video_url.startswith('rtmp'):
161 m = re.search(
162 r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
163 if not m:
164 continue
165 fmt.update({
166 'ext': 'flv',
167 'url': m.group('url'),
168 'app': m.group('app'),
169 'play_path': m.group('playpath'),
170 'preference': -1,
171 })
172 else:
173 fmt.update({
174 'url': video_url,
175 })
176 formats.append(fmt)
177
178 if not formats and video.get('is_geo_blocked'):
179 self.raise_geo_restricted(
180 'This content might not be available in your country due to copyright reasons',
181 metadata_available=True)
182
183 # TODO: webvtt in m3u8
184 subtitles = {}
185 sami_path = video.get('sami_path')
186 if sami_path:
187 lang = self._search_regex(
188 r'_([a-z]{2})\.xml', sami_path, 'lang',
189 default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
190 subtitles[lang] = [{
191 'url': sami_path,
192 }]
193
194 series = video.get('format_title')
195 episode_number = int_or_none(video.get('format_position', {}).get('episode'))
196 season = video.get('_embedded', {}).get('season', {}).get('title')
197 season_number = int_or_none(video.get('format_position', {}).get('season'))
198
199 return {
200 'id': video_id,
201 'title': title,
202 'description': video.get('description'),
203 'series': series,
204 'episode_number': episode_number,
205 'season': season,
206 'season_number': season_number,
207 'duration': int_or_none(video.get('duration')),
208 'timestamp': parse_iso8601(video.get('created_at')),
209 'view_count': try_get(video, lambda x: x['views']['total'], int),
210 'age_limit': int_or_none(video.get('age_limit', 0)),
211 'formats': formats,
212 'subtitles': subtitles,
213 }
214
215
216 class TVPlayHomeIE(InfoExtractor):
217 _VALID_URL = r'''(?x)
218 https?://
219 (?:tv3?)?
220 play\.(?:tv3|skaties)\.(?P<country>lv|lt|ee)/
221 (?P<live>lives/)?
222 [^?#&]+(?:episode|programme|clip)-(?P<id>\d+)
223 '''
224 _TESTS = [{
225 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828',
226 'info_dict': {
227 'id': '2343828',
228 'ext': 'mp4',
229 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8',
230 'description': 'md5:f6fcfbb236429f05531131640dfa7c81',
231 'duration': 2710,
232 'season': 'Gaujų karai. Karveliai',
233 'season_number': 1,
234 'release_year': 2021,
235 'episode': 'Serija 8',
236 'episode_number': 8,
237 },
238 'params': {
239 'skip_download': 'm3u8',
240 },
241 }, {
242 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937',
243 'info_dict': {
244 'id': '3284937',
245 'ext': 'mp4',
246 'season': 'Moterys meluoja geriau [N-7]',
247 'season_number': 14,
248 'release_year': 2021,
249 'episode': 'Serija 25',
250 'episode_number': 25,
251 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25',
252 'description': 'md5:c6926e9710f1a126f028fbe121eddb79',
253 'duration': 2440,
254 },
255 'skip': '404'
256 }, {
257 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014',
258 'only_matching': True,
259 }, {
260 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762',
261 'only_matching': True,
262 }, {
263 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474',
264 'only_matching': True,
265 }, {
266 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509',
267 'only_matching': True,
268 }]
269
270 def _real_extract(self, url):
271 country, is_live, video_id = self._match_valid_url(url).groups()
272
273 api_path = 'lives/programmes' if is_live else 'vods'
274 data = self._download_json(
275 urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'),
276 video_id)
277
278 video_type = 'CATCHUP' if is_live else 'MOVIE'
279 stream_id = data['programRecordingId'] if is_live else video_id
280 stream = self._download_json(
281 urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
282 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
283 stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
284
285 thumbnails = set(traverse_obj(
286 data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
287
288 return {
289 'id': video_id,
290 'title': self._resolve_title(data),
291 'description': traverse_obj(data, 'description', 'lead'),
292 'duration': int_or_none(data.get('duration')),
293 'season': traverse_obj(data, ('season', 'serial', 'title')),
294 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))),
295 'episode': data.get('title'),
296 'episode_number': int_or_none(data.get('episode')),
297 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))),
298 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails],
299 'formats': formats,
300 'subtitles': subtitles,
301 }
302
303 @staticmethod
304 def _resolve_title(data):
305 return try_get(data, lambda x: (
306 f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | '
307 f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}'
308 )) or data.get('title')