4 from .common
import InfoExtractor
15 class IPrimaIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
18 _NETRC_MACHINE
= 'iprima'
19 _AUTH_ROOT
= 'https://auth.iprima.cz'
23 'url': 'https://prima.iprima.cz/particka/92-epizoda',
27 'title': 'Partička (92)',
28 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
29 'upload_date': '20201103',
30 'timestamp': 1604437480,
33 'skip_download': True, # m3u8 download
36 'url': 'http://play.iprima.cz/particka/particka-92',
37 'only_matching': True,
40 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
41 'only_matching': True,
43 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
44 'only_matching': True,
46 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
47 'only_matching': True,
49 'url': 'http://www.iprima.cz/filmy/desne-rande',
50 'only_matching': True,
52 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
53 'only_matching': True,
55 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
56 'only_matching': True,
58 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
59 'only_matching': True,
61 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
62 'only_matching': True,
65 def _perform_login(self
, username
, password
):
69 login_page
= self
._download
_webpage
(
70 f
'{self._AUTH_ROOT}/oauth2/login', None, note
='Downloading login page',
71 errnote
='Downloading login page failed')
73 login_form
= self
._hidden
_inputs
(login_page
)
77 '_password': password
})
79 profile_select_html
, login_handle
= self
._download
_webpage
_handle
(
80 f
'{self._AUTH_ROOT}/oauth2/login', None, data
=urlencode_postdata(login_form
),
83 # a profile may need to be selected first, even when there is only a single one
84 if '/profile-select' in login_handle
.url
:
85 profile_id
= self
._search
_regex
(
86 r
'data-identifier\s*=\s*["\']?
(\w
+)', profile_select_html, 'profile
id')
88 login_handle = self._request_webpage(
89 f'{self._AUTH_ROOT}
/user
/profile
-select
-perform
/{profile_id}
', None,
90 query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile
')
92 code = traverse_obj(login_handle.url, ({parse_qs}, 'code
', 0))
94 raise ExtractorError('Login failed
', expected=True)
96 token_request_data = {
97 'scope
': 'openid
+email
+profile
+phone
+address
+offline_access
',
98 'client_id
': 'prima_sso
',
99 'grant_type
': 'authorization_code
',
101 'redirect_uri
': f'{self._AUTH_ROOT}
/sso
/auth
-check
'}
103 token_data = self._download_json(
104 f'{self._AUTH_ROOT}
/oauth2
/token
', None,
105 note='Downloading token
', errnote='Downloading token failed
',
106 data=urlencode_postdata(token_request_data))
108 self.access_token = token_data.get('access_token
')
109 if self.access_token is None:
110 raise ExtractorError('Getting token failed
', expected=True)
112 def _real_initialize(self):
113 if not self.access_token:
114 self.raise_login_required('Login
is required to access any iPrima content
', method='password
')
116 def _raise_access_error(self, error_code):
117 if error_code == 'PLAY_GEOIP_DENIED
':
118 self.raise_geo_restricted(countries=['CZ
'], metadata_available=True)
119 elif error_code is not None:
120 self.raise_no_formats('Access to stream infos forbidden
', expected=True)
122 def _real_extract(self, url):
123 video_id = self._match_id(url)
125 webpage = self._download_webpage(url, video_id)
127 title = self._html_extract_title(webpage) or self._html_search_meta(
128 ['og
:title
', 'twitter
:title
'],
129 webpage, 'title
', default=None)
131 video_id = self._search_regex((
132 r'productId\s
*=\s
*([\'"])(?P<id>p\d+)\1',
133 r'pproduct_id\s*=\s*([\'"])(?P
<id>p\d
+)\
1',
134 ), webpage, 'real
id', group='id', default=None)
137 nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data
', fatal=False)
138 video_id = traverse_obj(
139 nuxt_data, (..., 'content
', 'additionals
', 'videoPlayId
', {str}), get_all=False)
142 nuxt_data = self._search_json(
143 r'<script
[^
>]+\bid
=["\']__NUXT_DATA__["\'][^
>]*>',
144 webpage, 'nuxt data
', None, end_pattern=r'</script
>', contains_pattern=r'\
[(?s
:.+)\
]')
146 video_id = traverse_obj(nuxt_data, lambda _, v: re.fullmatch(r'p\d
+', v), get_all=False)
149 self.raise_no_formats('Unable to extract video ID
from webpage
')
151 metadata = self._download_json(
152 f'https
://api
.play
-backend
.iprima
.cz
/api
/v1
//products
/id-{video_id}
/play
',
153 video_id, note='Getting manifest URLs
', errnote='Failed to get manifest URLs
',
154 headers={'X-OTT-Access-Token': self.access_token},
157 self._raise_access_error(metadata.get('errorCode
'))
159 stream_infos = metadata.get('streamInfos
')
161 if stream_infos is None:
162 self.raise_no_formats('Reading stream infos failed
', expected=True)
164 for manifest in stream_infos:
165 manifest_type = manifest.get('type')
166 manifest_url = manifest.get('url
')
167 ext = determine_ext(manifest_url)
168 if manifest_type == 'HLS
' or ext == 'm3u8
':
169 formats += self._extract_m3u8_formats(
170 manifest_url, video_id, 'mp4
', entry_protocol='m3u8_native
',
171 m3u8_id='hls
', fatal=False)
172 elif manifest_type == 'DASH
' or ext == 'mpd
':
173 formats += self._extract_mpd_formats(
174 manifest_url, video_id, mpd_id='dash
', fatal=False)
176 final_result = self._search_json_ld(webpage, video_id, default={})
177 final_result.update({
180 'thumbnail
': self._html_search_meta(
181 ['thumbnail
', 'og
:image
', 'twitter
:image
'],
182 webpage, 'thumbnail
', default=None),
184 'description
': self._html_search_meta(
185 ['description
', 'og
:description
', 'twitter
:description
'],
186 webpage, 'description
', default=None)})
191 class IPrimaCNNIE(InfoExtractor):
192 _VALID_URL = r'https?
://cnn\
.iprima\
.cz
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)'
196 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
200 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
203 'skip_download': 'm3u8',
207 def _real_extract(self
, url
):
208 video_id
= self
._match
_id
(url
)
210 self
._set
_cookie
('play.iprima.cz', 'ott_adult_confirmed', '1')
212 webpage
= self
._download
_webpage
(url
, video_id
)
214 title
= self
._og
_search
_title
(
215 webpage
, default
=None) or self
._search
_regex
(
216 r
'<h1>([^<]+)', webpage
, 'title')
218 video_id
= self
._search
_regex
(
219 (r
'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//(?
:api\
.play
-backend\
.iprima\
.cz
/prehravac
/embedded|prima\
.iprima\
.cz
/[^
/]+/[^
/]+)\?.*?
\bid
=(p\d
+)',
220 r'data
-product
="([^"]+)">',
221 r'id=["\']player
-(p\d
+)"',
222 r'playerId\s*:\s*["\']player
-(p\d
+)',
223 r'\bvideos\s
*=\s
*["\'](p\d+)'),
226 playerpage = self._download_webpage(
227 'http://play.iprima.cz/prehravac/init',
228 video_id, note='Downloading player', query={
230 '_ts': round(time.time()),
231 'productId': video_id,
232 }, headers={'Referer': url})
236 def extract_formats(format_url, format_key=None, lang=None):
237 ext = determine_ext(format_url)
239 if format_key == 'hls' or ext == 'm3u8':
240 new_formats = self._extract_m3u8_formats(
241 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
242 m3u8_id='hls', fatal=False)
243 elif format_key == 'dash' or ext == 'mpd':
245 new_formats = self._extract_mpd_formats(
246 format_url, video_id, mpd_id='dash', fatal=False)
248 for f in new_formats:
249 if not f.get('language'):
251 formats.extend(new_formats)
253 options = self._parse_json(
255 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
256 playerpage, 'player options', default='{}'),
257 video_id, transform_source=js_to_json, fatal=False)
259 for key, tracks in options.get('tracks', {}).items():
260 if not isinstance(tracks, list):
263 src = track.get('src')
265 extract_formats(src, key.lower(), track.get('lang'))
268 for _, src in re.findall(r'src["\']\s
*:\s
*(["\'])(.+?)\1', playerpage):
271 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
272 self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
277 'thumbnail': self._og_search_thumbnail(webpage, default=None),
279 'description': self._og_search_description(webpage, default=None),