2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
17 class IPrimaIE(InfoExtractor
):
18 _VALID_URL
= r
'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
20 _NETRC_MACHINE
= 'iprima'
21 _LOGIN_URL
= 'https://auth.iprima.cz/oauth2/login'
22 _TOKEN_URL
= 'https://auth.iprima.cz/oauth2/token'
26 'url': 'https://prima.iprima.cz/particka/92-epizoda',
30 'title': 'Partička (92)',
31 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
32 'upload_date': '20201103',
33 'timestamp': 1604437480,
36 'skip_download': True, # m3u8 download
39 'url': 'http://play.iprima.cz/particka/particka-92',
40 'only_matching': True,
43 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
44 'only_matching': True,
46 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
47 'only_matching': True,
49 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
50 'only_matching': True,
52 'url': 'http://www.iprima.cz/filmy/desne-rande',
53 'only_matching': True,
55 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
56 'only_matching': True,
58 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
59 'only_matching': True,
61 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
62 'only_matching': True,
64 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
65 'only_matching': True,
68 def _perform_login(self
, username
, password
):
72 login_page
= self
._download
_webpage
(
73 self
._LOGIN
_URL
, None, note
='Downloading login page',
74 errnote
='Downloading login page failed')
76 login_form
= self
._hidden
_inputs
(login_page
)
80 '_password': password
})
82 _
, login_handle
= self
._download
_webpage
_handle
(
83 self
._LOGIN
_URL
, None, data
=urlencode_postdata(login_form
),
86 code
= parse_qs(login_handle
.geturl()).get('code')[0]
88 raise ExtractorError('Login failed', expected
=True)
90 token_request_data
= {
91 'scope': 'openid+email+profile+phone+address+offline_access',
92 'client_id': 'prima_sso',
93 'grant_type': 'authorization_code',
95 'redirect_uri': 'https://auth.iprima.cz/sso/auth-check'}
97 token_data
= self
._download
_json
(
98 self
._TOKEN
_URL
, None,
99 note
='Downloading token', errnote
='Downloading token failed',
100 data
=urlencode_postdata(token_request_data
))
102 self
.access_token
= token_data
.get('access_token')
103 if self
.access_token
is None:
104 raise ExtractorError('Getting token failed', expected
=True)
106 def _real_initialize(self
):
107 if not self
.access_token
:
108 self
.raise_login_required('Login is required to access any iPrima content', method
='password')
110 def _raise_access_error(self
, error_code
):
111 if error_code
== 'PLAY_GEOIP_DENIED':
112 self
.raise_geo_restricted(countries
=['CZ'], metadata_available
=True)
113 elif error_code
is not None:
114 self
.raise_no_formats('Access to stream infos forbidden', expected
=True)
116 def _real_extract(self
, url
):
117 video_id
= self
._match
_id
(url
)
119 webpage
= self
._download
_webpage
(url
, video_id
)
121 title
= self
._html
_search
_meta
(
122 ['og:title', 'twitter:title'],
123 webpage
, 'title', default
=None)
125 video_id
= self
._search
_regex
((
126 r
'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
127 r'pproduct_id\s*=\s*([\'"])(?P
<id>p\d
+)\
1'),
128 webpage, 'real
id', group='id')
130 metadata = self._download_json(
131 f'https
://api
.play
-backend
.iprima
.cz
/api
/v1
//products
/id-{video_id}
/play
',
132 video_id, note='Getting manifest URLs
', errnote='Failed to get manifest URLs
',
133 headers={'X-OTT-Access-Token': self.access_token},
136 self._raise_access_error(metadata.get('errorCode
'))
138 stream_infos = metadata.get('streamInfos
')
140 if stream_infos is None:
141 self.raise_no_formats('Reading stream infos failed
', expected=True)
143 for manifest in stream_infos:
144 manifest_type = manifest.get('type')
145 manifest_url = manifest.get('url
')
146 ext = determine_ext(manifest_url)
147 if manifest_type == 'HLS
' or ext == 'm3u8
':
148 formats += self._extract_m3u8_formats(
149 manifest_url, video_id, 'mp4
', entry_protocol='m3u8_native
',
150 m3u8_id='hls
', fatal=False)
151 elif manifest_type == 'DASH
' or ext == 'mpd
':
152 formats += self._extract_mpd_formats(
153 manifest_url, video_id, mpd_id='dash
', fatal=False)
154 self._sort_formats(formats)
156 final_result = self._search_json_ld(webpage, video_id) or {}
157 final_result.update({
160 'thumbnail
': self._html_search_meta(
161 ['thumbnail
', 'og
:image
', 'twitter
:image
'],
162 webpage, 'thumbnail
', default=None),
164 'description
': self._html_search_meta(
165 ['description
', 'og
:description
', 'twitter
:description
'],
166 webpage, 'description
', default=None)})
171 class IPrimaCNNIE(InfoExtractor):
172 _VALID_URL = r'https?
://cnn\
.iprima\
.cz
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)'
176 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
180 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
183 'skip_download': 'm3u8'
187 def _real_extract(self
, url
):
188 video_id
= self
._match
_id
(url
)
190 self
._set
_cookie
('play.iprima.cz', 'ott_adult_confirmed', '1')
192 webpage
= self
._download
_webpage
(url
, video_id
)
194 title
= self
._og
_search
_title
(
195 webpage
, default
=None) or self
._search
_regex
(
196 r
'<h1>([^<]+)', webpage
, 'title')
198 video_id
= self
._search
_regex
(
199 (r
'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//(?
:api\
.play
-backend\
.iprima\
.cz
/prehravac
/embedded|prima\
.iprima\
.cz
/[^
/]+/[^
/]+)\?.*?
\bid
=(p\d
+)',
200 r'data
-product
="([^"]+)">',
201 r'id=["\']player
-(p\d
+)"',
202 r'playerId\s*:\s*["\']player
-(p\d
+)',
203 r'\bvideos\s
*=\s
*["\'](p\d+)'),
206 playerpage = self._download_webpage(
207 'http://play.iprima.cz/prehravac/init',
208 video_id, note='Downloading player', query={
210 '_ts': round(time.time()),
211 'productId': video_id,
212 }, headers={'Referer': url})
216 def extract_formats(format_url, format_key=None, lang=None):
217 ext = determine_ext(format_url)
219 if format_key == 'hls' or ext == 'm3u8':
220 new_formats = self._extract_m3u8_formats(
221 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
222 m3u8_id='hls', fatal=False)
223 elif format_key == 'dash' or ext == 'mpd':
225 new_formats = self._extract_mpd_formats(
226 format_url, video_id, mpd_id='dash', fatal=False)
228 for f in new_formats:
229 if not f.get('language'):
231 formats.extend(new_formats)
233 options = self._parse_json(
235 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
236 playerpage, 'player options', default='{}'),
237 video_id, transform_source=js_to_json, fatal=False)
239 for key, tracks in options.get('tracks', {}).items():
240 if not isinstance(tracks, list):
243 src = track.get('src')
245 extract_formats(src, key.lower(), track.get('lang'))
248 for _, src in re.findall(r'src["\']\s
*:\s
*(["\'])(.+?)\1', playerpage):
251 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
252 self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
254 self._sort_formats(formats)
259 'thumbnail': self._og_search_thumbnail(webpage, default=None),
261 'description': self._og_search_description(webpage, default=None),