]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/zaiko.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / zaiko.py
1 import base64
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 extract_attributes,
7 int_or_none,
8 str_or_none,
9 traverse_obj,
10 try_call,
11 unescapeHTML,
12 url_basename,
13 url_or_none,
14 )
15
16
17 class ZaikoBaseIE(InfoExtractor):
18 def _download_real_webpage(self, url, video_id):
19 webpage, urlh = self._download_webpage_handle(url, video_id)
20 final_url = urlh.url
21 if 'zaiko.io/login' in final_url:
22 self.raise_login_required()
23 elif '/_buy/' in final_url:
24 raise ExtractorError('Your account does not have tickets to this event', expected=True)
25 return webpage
26
27 def _parse_vue_element_attr(self, name, string, video_id):
28 page_elem = self._search_regex(rf'(<{name}[^>]+>)', string, name)
29 attrs = {}
30 for key, value in extract_attributes(page_elem).items():
31 if key.startswith(':'):
32 attrs[key[1:]] = self._parse_json(
33 value, video_id, transform_source=unescapeHTML, fatal=False)
34 return attrs
35
36
37 class ZaikoIE(ZaikoBaseIE):
38 _VALID_URL = r'https?://(?:[\w-]+\.)?zaiko\.io/event/(?P<id>\d+)/stream(?:/\d+)+'
39 _TESTS = [{
40 'url': 'https://zaiko.io/event/324868/stream/20571/20571',
41 'info_dict': {
42 'id': '324868',
43 'ext': 'mp4',
44 'title': 'ZAIKO STREAMING TEST',
45 'alt_title': '[VOD] ZAIKO STREAMING TEST_20210603(Do Not Delete)',
46 'uploader_id': '454',
47 'uploader': 'ZAIKO ZERO',
48 'release_timestamp': 1583809200,
49 'thumbnail': r're:^https://[\w.-]+/\w+/\w+',
50 'thumbnails': 'maxcount:2',
51 'release_date': '20200310',
52 'categories': ['Tech House'],
53 'live_status': 'was_live',
54 },
55 'params': {'skip_download': 'm3u8'},
56 'skip': 'Your account does not have tickets to this event',
57 }]
58
59 def _real_extract(self, url):
60 video_id = self._match_id(url)
61
62 webpage = self._download_real_webpage(url, video_id)
63 stream_meta = self._parse_vue_element_attr('stream-page', webpage, video_id)
64
65 player_page = self._download_webpage(
66 stream_meta['stream-access']['video_source'], video_id,
67 'Downloading player page', headers={'referer': 'https://zaiko.io/'})
68 player_meta = self._parse_vue_element_attr('player', player_page, video_id)
69 status = traverse_obj(player_meta, ('initial_event_info', 'status', {str}))
70 live_status, msg, expected = {
71 'vod': ('was_live', 'No VOD stream URL was found', False),
72 'archiving': ('post_live', 'Event VOD is still being processed', True),
73 'deleting': ('post_live', 'This event has ended', True),
74 'deleted': ('post_live', 'This event has ended', True),
75 'error': ('post_live', 'This event has ended', True),
76 'disconnected': ('post_live', 'Stream has been disconnected', True),
77 'live_to_disconnected': ('post_live', 'Stream has been disconnected', True),
78 'live': ('is_live', 'No livestream URL found was found', False),
79 'waiting': ('is_upcoming', 'Live event has not yet started', True),
80 'cancelled': ('not_live', 'Event has been cancelled', True),
81 }.get(status) or ('not_live', f'Unknown event status "{status}"', False)
82
83 stream_url = traverse_obj(player_meta, ('initial_event_info', 'endpoint', {url_or_none}))
84 formats = self._extract_m3u8_formats(
85 stream_url, video_id, live=True, fatal=False) if stream_url else []
86 if not formats:
87 self.raise_no_formats(msg, expected=expected)
88
89 thumbnail_urls = [
90 traverse_obj(player_meta, ('initial_event_info', 'poster_url')),
91 self._og_search_thumbnail(self._download_webpage(
92 f'https://zaiko.io/event/{video_id}', video_id, 'Downloading event page', fatal=False) or ''),
93 ]
94
95 return {
96 'id': video_id,
97 'formats': formats,
98 'live_status': live_status,
99 **traverse_obj(stream_meta, {
100 'title': ('event', 'name', {str}),
101 'uploader': ('profile', 'name', {str}),
102 'uploader_id': ('profile', 'id', {str_or_none}),
103 'release_timestamp': ('stream', 'start', 'timestamp', {int_or_none}),
104 'categories': ('event', 'genres', ..., {lambda x: x or None}),
105 }),
106 **traverse_obj(player_meta, ('initial_event_info', {
107 'alt_title': ('title', {str}),
108 })),
109 'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)]
110 }
111
112
113 class ZaikoETicketIE(ZaikoBaseIE):
114 _VALID_URL = r'https?://(?:www.)?zaiko\.io/account/eticket/(?P<id>[\w=-]{49})'
115 _TESTS = [{
116 'url': 'https://zaiko.io/account/eticket/TZjMwMzQ2Y2EzMXwyMDIzMDYwNzEyMTMyNXw1MDViOWU2Mw==',
117 'playlist_count': 1,
118 'info_dict': {
119 'id': 'f30346ca31-20230607121325-505b9e63',
120 'title': 'ZAIKO STREAMING TEST',
121 'thumbnail': 'https://media.zkocdn.net/pf_1/1_3wdyjcjyupseatkwid34u',
122 },
123 'skip': 'Only available with the ticketholding account',
124 }]
125
126 def _real_extract(self, url):
127 ticket_id = self._match_id(url)
128 ticket_id = try_call(
129 lambda: base64.urlsafe_b64decode(ticket_id[1:]).decode().replace('|', '-')) or ticket_id
130
131 webpage = self._download_real_webpage(url, ticket_id)
132 eticket = self._parse_vue_element_attr('eticket', webpage, ticket_id)
133
134 return self.playlist_result(
135 [self.url_result(stream, ZaikoIE) for stream in traverse_obj(eticket, ('streams', ..., 'url'))],
136 ticket_id, **traverse_obj(eticket, ('ticket-details', {
137 'title': 'event_name',
138 'thumbnail': 'event_img_url',
139 })))