]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/wppilot.py
3 from .common
import InfoExtractor
14 class WPPilotBaseIE(InfoExtractor
):
15 _VIDEO_URL
= 'https://pilot.wp.pl/api/v1/channel/%s'
16 _VIDEO_GUEST_URL
= 'https://pilot.wp.pl/api/v1/guest/channel/%s'
19 'Content-Type': 'application/json; charset=UTF-8',
20 'Referer': 'https://pilot.wp.pl/tv/',
23 def _get_channel_list(self
, cache
=True):
25 cache_res
= self
._downloader
.cache
.load('wppilot', 'channel-list')
27 return cache_res
, True
28 webpage
= self
._download
_webpage
('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
29 page_data_base_url
= self
._search
_regex
(
30 r
'<script src="(https://wp-pilot-gatsby\.wpcdn\.pl/v[\d.-]+/desktop)',
31 webpage
, 'gatsby build version') + '/page-data'
32 page_data
= self
._download
_json
(f
'{page_data_base_url}/tv/page-data.json', None, 'Downloading page data')
33 for qhash
in page_data
['staticQueryHashes']:
34 qhash_content
= self
._download
_json
(
35 f
'{page_data_base_url}/sq/d/{qhash}.json', None,
36 'Searching for channel list')
37 channel_list
= try_get(qhash_content
, lambda x
: x
['data']['allChannels']['nodes'])
38 if channel_list
is None:
40 self
._downloader
.cache
.store('wppilot', 'channel-list', channel_list
)
41 return channel_list
, False
42 raise ExtractorError('Unable to find the channel list')
44 def _parse_channel(self
, chan
):
46 'id': str(chan
['id']),
47 'title': chan
['name'],
52 } for key
in ('thumbnail', 'thumbnail_mobile', 'icon') if chan
.get(key
)],
56 class WPPilotIE(WPPilotBaseIE
):
57 _VALID_URL
= r
'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
61 'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
65 'title': 'Telewizja WP HD',
68 'format': 'bestvideo',
72 'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
76 'title': 'Radio Nowy Świat',
79 'format': 'bestaudio',
83 'only_matching': True,
86 def _get_channel(self
, id_or_slug
):
87 video_list
, is_cached
= self
._get
_channel
_list
(cache
=True)
88 key
= 'id' if re
.match(r
'^\d+$', id_or_slug
) else 'slug'
89 for video
in video_list
:
90 if video
.get(key
) == id_or_slug
:
91 return self
._parse
_channel
(video
)
92 # if cached channel not found, download and retry
94 video_list
, _
= self
._get
_channel
_list
(cache
=False)
95 for video
in video_list
:
96 if video
.get(key
) == id_or_slug
:
97 return self
._parse
_channel
(video
)
98 raise ExtractorError('Channel not found')
100 def _real_extract(self
, url
):
101 video_id
= self
._match
_id
(url
)
103 channel
= self
._get
_channel
(video_id
)
104 video_id
= str(channel
['id'])
106 is_authorized
= next((c
for c
in self
._downloader
.cookiejar
if c
.name
== 'netviapisessid'), None)
107 # cookies starting with "g:" are assigned to guests
108 is_authorized
= True if is_authorized
is not None and not is_authorized
.value
.startswith('g:') else False
110 video
= self
._download
_json
(
111 (self
._VIDEO
_URL
if is_authorized
else self
._VIDEO
_GUEST
_URL
) % video_id
,
113 'device_type': 'web',
114 }, headers
=self
._HEADERS
_WEB
,
115 expected_status
=(200, 422))
117 stream_token
= try_get(video
, lambda x
: x
['_meta']['error']['info']['stream_token'])
119 close
= self
._download
_json
(
120 'https://pilot.wp.pl/api/v1/channels/close', video_id
,
121 'Invalidating previous stream session', headers
=self
._HEADERS
_WEB
,
123 'channelId': video_id
,
126 if try_get(close
, lambda x
: x
['data']['status']) == 'ok':
127 return self
.url_result(url
, ie
=WPPilotIE
.ie_key())
131 for fmt
in video
['data']['stream_channel']['streams']:
132 # live DASH does not work for now
133 # if fmt['type'] == 'dash@live:abr':
135 # self._extract_mpd_formats(
136 # random.choice(fmt['url']), video_id))
137 if fmt
['type'] == 'hls@live:abr':
139 self
._extract
_m
3u8_formats
(
140 random
.choice(fmt
['url']),
141 video_id
, live
=True))
143 self
._sort
_formats
(formats
)
145 channel
['formats'] = formats
149 class WPPilotChannelsIE(WPPilotBaseIE
):
150 _VALID_URL
= r
'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
151 IE_NAME
= 'wppilot:channels'
159 'playlist_mincount': 100,
161 'url': 'https://pilot.wp.pl/',
162 'only_matching': True,
166 channel_list
, _
= self
._get
_channel
_list
()
167 for chan
in channel_list
:
168 entry
= self
._parse
_channel
(chan
)
170 '_type': 'url_transparent',
171 'url': f
'wppilot:{chan["id"]}',
172 'ie_key': WPPilotIE
.ie_key(),
176 def _real_extract(self
, url
):
177 return self
.playlist_result(self
._entries
(), 'wppilot', 'WP Pilot')