4 from .common
import InfoExtractor
15 class IPrimaIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
18 _NETRC_MACHINE
= 'iprima'
19 _AUTH_ROOT
= 'https://auth.iprima.cz'
23 'url': 'https://prima.iprima.cz/particka/92-epizoda',
27 'title': 'Partička (92)',
28 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
29 'upload_date': '20201103',
30 'timestamp': 1604437480,
33 'skip_download': True, # m3u8 download
36 'url': 'http://play.iprima.cz/particka/particka-92',
37 'only_matching': True,
40 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
41 'only_matching': True,
43 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
44 'only_matching': True,
46 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
47 'only_matching': True,
49 'url': 'http://www.iprima.cz/filmy/desne-rande',
50 'only_matching': True,
52 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
53 'only_matching': True,
55 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
56 'only_matching': True,
58 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
59 'only_matching': True,
61 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
62 'only_matching': True,
65 def _perform_login(self
, username
, password
):
69 login_page
= self
._download
_webpage
(
70 f
'{self._AUTH_ROOT}/oauth2/login', None, note
='Downloading login page',
71 errnote
='Downloading login page failed')
73 login_form
= self
._hidden
_inputs
(login_page
)
77 '_password': password
})
79 profile_select_html
, login_handle
= self
._download
_webpage
_handle
(
80 f
'{self._AUTH_ROOT}/oauth2/login', None, data
=urlencode_postdata(login_form
),
83 # a profile may need to be selected first, even when there is only a single one
84 if '/profile-select' in login_handle
.url
:
85 profile_id
= self
._search
_regex
(
86 r
'data-identifier\s*=\s*["\']?
(\w
+)', profile_select_html, 'profile
id')
88 login_handle = self._request_webpage(
89 f'{self._AUTH_ROOT}
/user
/profile
-select
-perform
/{profile_id}
', None,
90 query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile
')
92 code = traverse_obj(login_handle.url, ({parse_qs}, 'code
', 0))
94 raise ExtractorError('Login failed
', expected=True)
96 token_request_data = {
97 'scope
': 'openid
+email
+profile
+phone
+address
+offline_access
',
98 'client_id
': 'prima_sso
',
99 'grant_type
': 'authorization_code
',
101 'redirect_uri
': f'{self._AUTH_ROOT}
/sso
/auth
-check
'}
103 token_data = self._download_json(
104 f'{self._AUTH_ROOT}
/oauth2
/token
', None,
105 note='Downloading token
', errnote='Downloading token failed
',
106 data=urlencode_postdata(token_request_data))
108 self.access_token = token_data.get('access_token
')
109 if self.access_token is None:
110 raise ExtractorError('Getting token failed
', expected=True)
112 def _real_initialize(self):
113 if not self.access_token:
114 self.raise_login_required('Login
is required to access any iPrima content
', method='password
')
116 def _raise_access_error(self, error_code):
117 if error_code == 'PLAY_GEOIP_DENIED
':
118 self.raise_geo_restricted(countries=['CZ
'], metadata_available=True)
119 elif error_code is not None:
120 self.raise_no_formats('Access to stream infos forbidden
', expected=True)
122 def _real_extract(self, url):
123 video_id = self._match_id(url)
125 webpage = self._download_webpage(url, video_id)
127 title = self._html_extract_title(webpage) or self._html_search_meta(
128 ['og
:title
', 'twitter
:title
'],
129 webpage, 'title
', default=None)
131 video_id = self._search_regex((
132 r'productId\s
*=\s
*([\'"])(?P<id>p\d+)\1',
133 r'pproduct_id\s*=\s*([\'"])(?P
<id>p\d
+)\
1',
134 ), webpage, 'real
id', group='id', default=None)
137 nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data
')
138 video_id = traverse_obj(
139 nuxt_data, (..., 'content
', 'additionals
', 'videoPlayId
', {str}), get_all=False)
142 self.raise_no_formats('Unable to extract video ID
from webpage
')
144 metadata = self._download_json(
145 f'https
://api
.play
-backend
.iprima
.cz
/api
/v1
//products
/id-{video_id}
/play
',
146 video_id, note='Getting manifest URLs
', errnote='Failed to get manifest URLs
',
147 headers={'X-OTT-Access-Token': self.access_token},
150 self._raise_access_error(metadata.get('errorCode
'))
152 stream_infos = metadata.get('streamInfos
')
154 if stream_infos is None:
155 self.raise_no_formats('Reading stream infos failed
', expected=True)
157 for manifest in stream_infos:
158 manifest_type = manifest.get('type')
159 manifest_url = manifest.get('url
')
160 ext = determine_ext(manifest_url)
161 if manifest_type == 'HLS
' or ext == 'm3u8
':
162 formats += self._extract_m3u8_formats(
163 manifest_url, video_id, 'mp4
', entry_protocol='m3u8_native
',
164 m3u8_id='hls
', fatal=False)
165 elif manifest_type == 'DASH
' or ext == 'mpd
':
166 formats += self._extract_mpd_formats(
167 manifest_url, video_id, mpd_id='dash
', fatal=False)
169 final_result = self._search_json_ld(webpage, video_id, default={})
170 final_result.update({
173 'thumbnail
': self._html_search_meta(
174 ['thumbnail
', 'og
:image
', 'twitter
:image
'],
175 webpage, 'thumbnail
', default=None),
177 'description
': self._html_search_meta(
178 ['description
', 'og
:description
', 'twitter
:description
'],
179 webpage, 'description
', default=None)})
184 class IPrimaCNNIE(InfoExtractor):
185 _VALID_URL = r'https?
://cnn\
.iprima\
.cz
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)'
189 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
193 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
196 'skip_download': 'm3u8'
200 def _real_extract(self
, url
):
201 video_id
= self
._match
_id
(url
)
203 self
._set
_cookie
('play.iprima.cz', 'ott_adult_confirmed', '1')
205 webpage
= self
._download
_webpage
(url
, video_id
)
207 title
= self
._og
_search
_title
(
208 webpage
, default
=None) or self
._search
_regex
(
209 r
'<h1>([^<]+)', webpage
, 'title')
211 video_id
= self
._search
_regex
(
212 (r
'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//(?
:api\
.play
-backend\
.iprima\
.cz
/prehravac
/embedded|prima\
.iprima\
.cz
/[^
/]+/[^
/]+)\?.*?
\bid
=(p\d
+)',
213 r'data
-product
="([^"]+)">',
214 r'id=["\']player
-(p\d
+)"',
215 r'playerId\s*:\s*["\']player
-(p\d
+)',
216 r'\bvideos\s
*=\s
*["\'](p\d+)'),
219 playerpage = self._download_webpage(
220 'http://play.iprima.cz/prehravac/init',
221 video_id, note='Downloading player', query={
223 '_ts': round(time.time()),
224 'productId': video_id,
225 }, headers={'Referer': url})
229 def extract_formats(format_url, format_key=None, lang=None):
230 ext = determine_ext(format_url)
232 if format_key == 'hls' or ext == 'm3u8':
233 new_formats = self._extract_m3u8_formats(
234 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
235 m3u8_id='hls', fatal=False)
236 elif format_key == 'dash' or ext == 'mpd':
238 new_formats = self._extract_mpd_formats(
239 format_url, video_id, mpd_id='dash', fatal=False)
241 for f in new_formats:
242 if not f.get('language'):
244 formats.extend(new_formats)
246 options = self._parse_json(
248 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
249 playerpage, 'player options', default='{}'),
250 video_id, transform_source=js_to_json, fatal=False)
252 for key, tracks in options.get('tracks', {}).items():
253 if not isinstance(tracks, list):
256 src = track.get('src')
258 extract_formats(src, key.lower(), track.get('lang'))
261 for _, src in re.findall(r'src["\']\s
*:\s
*(["\'])(.+?)\1', playerpage):
264 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
265 self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
270 'thumbnail': self._og_search_thumbnail(webpage, default=None),
272 'description': self._og_search_description(webpage, default=None),