2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
17 class IPrimaIE(InfoExtractor
):
18 _VALID_URL
= r
'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
20 _NETRC_MACHINE
= 'iprima'
21 _LOGIN_URL
= 'https://auth.iprima.cz/oauth2/login'
22 _TOKEN_URL
= 'https://auth.iprima.cz/oauth2/token'
26 'url': 'https://prima.iprima.cz/particka/92-epizoda',
30 'title': 'Partička (92)',
31 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
32 'upload_date': '20201103',
33 'timestamp': 1604437480,
36 'skip_download': True, # m3u8 download
39 'url': 'http://play.iprima.cz/particka/particka-92',
40 'only_matching': True,
43 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
44 'only_matching': True,
46 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
47 'only_matching': True,
49 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
50 'only_matching': True,
52 'url': 'http://www.iprima.cz/filmy/desne-rande',
53 'only_matching': True,
55 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
56 'only_matching': True,
58 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
59 'only_matching': True,
61 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
62 'only_matching': True,
64 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
65 'only_matching': True,
69 username
, password
= self
._get
_login
_info
()
71 if username
is None or password
is None:
72 self
.raise_login_required('Login is required to access any iPrima content', method
='password')
74 login_page
= self
._download
_webpage
(
75 self
._LOGIN
_URL
, None, note
='Downloading login page',
76 errnote
='Downloading login page failed')
78 login_form
= self
._hidden
_inputs
(login_page
)
82 '_password': password
})
84 _
, login_handle
= self
._download
_webpage
_handle
(
85 self
._LOGIN
_URL
, None, data
=urlencode_postdata(login_form
),
88 code
= parse_qs(login_handle
.geturl()).get('code')[0]
90 raise ExtractorError('Login failed', expected
=True)
92 token_request_data
= {
93 'scope': 'openid+email+profile+phone+address+offline_access',
94 'client_id': 'prima_sso',
95 'grant_type': 'authorization_code',
97 'redirect_uri': 'https://auth.iprima.cz/sso/auth-check'}
99 token_data
= self
._download
_json
(
100 self
._TOKEN
_URL
, None,
101 note
='Downloading token', errnote
='Downloading token failed',
102 data
=urlencode_postdata(token_request_data
))
104 self
.access_token
= token_data
.get('access_token')
105 if self
.access_token
is None:
106 raise ExtractorError('Getting token failed', expected
=True)
108 def _raise_access_error(self
, error_code
):
109 if error_code
== 'PLAY_GEOIP_DENIED':
110 self
.raise_geo_restricted(countries
=['CZ'], metadata_available
=True)
111 elif error_code
is not None:
112 self
.raise_no_formats('Access to stream infos forbidden', expected
=True)
114 def _real_initialize(self
):
115 if not self
.access_token
:
118 def _real_extract(self
, url
):
119 video_id
= self
._match
_id
(url
)
121 webpage
= self
._download
_webpage
(url
, video_id
)
123 title
= self
._html
_search
_meta
(
124 ['og:title', 'twitter:title'],
125 webpage
, 'title', default
=None)
127 video_id
= self
._search
_regex
((
128 r
'productId\s*=\s*([\'"])(?P<id>p\d+)\1',
129 r'pproduct_id\s*=\s*([\'"])(?P
<id>p\d
+)\
1'),
130 webpage, 'real
id', group='id')
132 metadata = self._download_json(
133 f'https
://api
.play
-backend
.iprima
.cz
/api
/v1
//products
/id-{video_id}
/play
',
134 video_id, note='Getting manifest URLs
', errnote='Failed to get manifest URLs
',
135 headers={'X-OTT-Access-Token': self.access_token},
138 self._raise_access_error(metadata.get('errorCode
'))
140 stream_infos = metadata.get('streamInfos
')
142 if stream_infos is None:
143 self.raise_no_formats('Reading stream infos failed
', expected=True)
145 for manifest in stream_infos:
146 manifest_type = manifest.get('type')
147 manifest_url = manifest.get('url
')
148 ext = determine_ext(manifest_url)
149 if manifest_type == 'HLS
' or ext == 'm3u8
':
150 formats += self._extract_m3u8_formats(
151 manifest_url, video_id, 'mp4
', entry_protocol='m3u8_native
',
152 m3u8_id='hls
', fatal=False)
153 elif manifest_type == 'DASH
' or ext == 'mpd
':
154 formats += self._extract_mpd_formats(
155 manifest_url, video_id, mpd_id='dash
', fatal=False)
156 self._sort_formats(formats)
158 final_result = self._search_json_ld(webpage, video_id) or {}
159 final_result.update({
162 'thumbnail
': self._html_search_meta(
163 ['thumbnail
', 'og
:image
', 'twitter
:image
'],
164 webpage, 'thumbnail
', default=None),
166 'description
': self._html_search_meta(
167 ['description
', 'og
:description
', 'twitter
:description
'],
168 webpage, 'description
', default=None)})
173 class IPrimaCNNIE(InfoExtractor):
174 _VALID_URL = r'https?
://cnn\
.iprima\
.cz
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)'
178 'url': 'https://cnn.iprima.cz/porady/strunc/24072020-koronaviru-mam-plne-zuby-strasit-druhou-vlnou-je-absurdni-rika-senatorka-dernerova',
182 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e',
185 'skip_download': 'm3u8'
189 def _real_extract(self
, url
):
190 video_id
= self
._match
_id
(url
)
192 self
._set
_cookie
('play.iprima.cz', 'ott_adult_confirmed', '1')
194 webpage
= self
._download
_webpage
(url
, video_id
)
196 title
= self
._og
_search
_title
(
197 webpage
, default
=None) or self
._search
_regex
(
198 r
'<h1>([^<]+)', webpage
, 'title')
200 video_id
= self
._search
_regex
(
201 (r
'<iframe[^>]+\bsrc=["\'](?
:https?
:)?
//(?
:api\
.play
-backend\
.iprima\
.cz
/prehravac
/embedded|prima\
.iprima\
.cz
/[^
/]+/[^
/]+)\?.*?
\bid
=(p\d
+)',
202 r'data
-product
="([^"]+)">',
203 r'id=["\']player
-(p\d
+)"',
204 r'playerId\s*:\s*["\']player
-(p\d
+)',
205 r'\bvideos\s
*=\s
*["\'](p\d+)'),
208 playerpage = self._download_webpage(
209 'http://play.iprima.cz/prehravac/init',
210 video_id, note='Downloading player', query={
212 '_ts': round(time.time()),
213 'productId': video_id,
214 }, headers={'Referer': url})
218 def extract_formats(format_url, format_key=None, lang=None):
219 ext = determine_ext(format_url)
221 if format_key == 'hls' or ext == 'm3u8':
222 new_formats = self._extract_m3u8_formats(
223 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
224 m3u8_id='hls', fatal=False)
225 elif format_key == 'dash' or ext == 'mpd':
227 new_formats = self._extract_mpd_formats(
228 format_url, video_id, mpd_id='dash', fatal=False)
230 for f in new_formats:
231 if not f.get('language'):
233 formats.extend(new_formats)
235 options = self._parse_json(
237 r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
238 playerpage, 'player options', default='{}'),
239 video_id, transform_source=js_to_json, fatal=False)
241 for key, tracks in options.get('tracks', {}).items():
242 if not isinstance(tracks, list):
245 src = track.get('src')
247 extract_formats(src, key.lower(), track.get('lang'))
250 for _, src in re.findall(r'src["\']\s
*:\s
*(["\'])(.+?)\1', playerpage):
253 if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
254 self.raise_geo_restricted(countries=['CZ'], metadata_available=True)
256 self._sort_formats(formats)
261 'thumbnail': self._og_search_thumbnail(webpage, default=None),
263 'description': self._og_search_description(webpage, default=None),