]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/wppilot.py
[panopto] Add extractors (#2908)
[yt-dlp.git] / yt_dlp / extractor / wppilot.py
CommitLineData
c0599d4f
LL
1# coding: utf-8
2
3from .common import InfoExtractor
4from ..utils import (
5 try_get,
6 ExtractorError,
7)
8
9import json
10import random
11import re
12
13
14class WPPilotBaseIE(InfoExtractor):
15 _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
16 _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
17
18 _HEADERS_WEB = {
19 'Content-Type': 'application/json; charset=UTF-8',
20 'Referer': 'https://pilot.wp.pl/tv/',
21 }
22
23 def _get_channel_list(self, cache=True):
24 if cache is True:
25 cache_res = self._downloader.cache.load('wppilot', 'channel-list')
26 if cache_res:
27 return cache_res, True
28 webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
29 page_data_base_url = self._search_regex(
30 r'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
31 webpage, 'gatsby build version') + '/page-data'
32 page_data = self._download_json(f'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
33 for qhash in page_data['staticQueryHashes']:
34 qhash_content = self._download_json(
35 f'{page_data_base_url}/sq/d/{qhash}.json', None,
36 'Searching for channel list')
37 channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
38 if channel_list is None:
39 continue
40 self._downloader.cache.store('wppilot', 'channel-list', channel_list)
41 return channel_list, False
42 raise ExtractorError('Unable to find the channel list')
43
44 def _parse_channel(self, chan):
45 return {
46 'id': str(chan['id']),
47 'title': chan['name'],
48 'is_live': True,
49 'thumbnails': [{
50 'id': key,
51 'url': chan[key],
52 } for key in ('thumbnail', 'thumbnail_mobile', 'icon') if chan.get(key)],
53 }
54
55
56class WPPilotIE(WPPilotBaseIE):
57 _VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
58 IE_NAME = 'wppilot'
59
60 _TESTS = [{
61 'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
62 'info_dict': {
63 'id': '158',
64 'ext': 'mp4',
65 'title': 'Telewizja WP HD',
66 },
67 'params': {
68 'format': 'bestvideo',
69 },
70 }, {
71 # audio only
72 'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
73 'info_dict': {
74 'id': '238',
75 'ext': 'm4a',
76 'title': 'Radio Nowy Świat',
77 },
78 'params': {
79 'format': 'bestaudio',
80 },
81 }, {
82 'url': 'wppilot:9',
83 'only_matching': True,
84 }]
85
86 def _get_channel(self, id_or_slug):
87 video_list, is_cached = self._get_channel_list(cache=True)
88 key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
89 for video in video_list:
90 if video.get(key) == id_or_slug:
91 return self._parse_channel(video)
92 # if cached channel not found, download and retry
93 if is_cached:
94 video_list, _ = self._get_channel_list(cache=False)
95 for video in video_list:
96 if video.get(key) == id_or_slug:
97 return self._parse_channel(video)
98 raise ExtractorError('Channel not found')
99
100 def _real_extract(self, url):
101 video_id = self._match_id(url)
102
103 channel = self._get_channel(video_id)
104 video_id = str(channel['id'])
105
106 is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None)
107 # cookies starting with "g:" are assigned to guests
108 is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
109
110 video = self._download_json(
111 (self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id,
112 video_id, query={
113 'device_type': 'web',
114 }, headers=self._HEADERS_WEB,
115 expected_status=(200, 422))
116
117 stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
118 if stream_token:
119 close = self._download_json(
120 'https://pilot.wp.pl/api/v1/channels/close', video_id,
121 'Invalidating previous stream session', headers=self._HEADERS_WEB,
122 data=json.dumps({
123 'channelId': video_id,
124 't': stream_token,
125 }).encode('utf-8'))
126 if try_get(close, lambda x: x['data']['status']) == 'ok':
127 return self.url_result(url, ie=WPPilotIE.ie_key())
128
129 formats = []
130
131 for fmt in video['data']['stream_channel']['streams']:
132 # live DASH does not work for now
133 # if fmt['type'] == 'dash@live:abr':
134 # formats.extend(
135 # self._extract_mpd_formats(
136 # random.choice(fmt['url']), video_id))
137 if fmt['type'] == 'hls@live:abr':
138 formats.extend(
139 self._extract_m3u8_formats(
140 random.choice(fmt['url']),
141 video_id, live=True))
142
143 self._sort_formats(formats)
144
145 channel['formats'] = formats
146 return channel
147
148
149class WPPilotChannelsIE(WPPilotBaseIE):
150 _VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
151 IE_NAME = 'wppilot:channels'
152
153 _TESTS = [{
154 'url': 'wppilot:',
155 'info_dict': {
156 'id': 'wppilot',
157 'title': 'WP Pilot',
158 },
159 'playlist_mincount': 100,
160 }, {
161 'url': 'https://pilot.wp.pl/',
162 'only_matching': True,
163 }]
164
165 def _entries(self):
166 channel_list, _ = self._get_channel_list()
167 for chan in channel_list:
168 entry = self._parse_channel(chan)
169 entry.update({
170 '_type': 'url_transparent',
171 'url': f'wppilot:{chan["id"]}',
172 'ie_key': WPPilotIE.ie_key(),
173 })
174 yield entry
175
176 def _real_extract(self, url):
177 return self.playlist_result(self._entries(), 'wppilot', 'WP Pilot')