]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/hidive.py
4 from .common
import InfoExtractor
14 class HiDiveIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
16 # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
17 # so disabling geo bypass completely
19 _NETRC_MACHINE
= 'hidive'
20 _LOGIN_URL
= 'https://www.hidive.com/account/login'
23 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
25 'id': 'the-comic-artist-and-his-assistants/s01e001',
27 'title': 'the-comic-artist-and-his-assistants/s01e001',
28 'series': 'the-comic-artist-and-his-assistants',
33 'skip_download': True,
35 'skip': 'Requires Authentication',
38 def _real_initialize(self
):
39 email
, password
= self
._get
_login
_info
()
43 webpage
= self
._download
_webpage
(self
._LOGIN
_URL
, None)
44 form
= self
._search
_regex
(
45 r
'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
46 webpage
, 'login form')
47 data
= self
._hidden
_inputs
(form
)
52 self
._download
_webpage
(
53 self
._LOGIN
_URL
, None, 'Logging in', data
=urlencode_postdata(data
))
55 def _call_api(self
, video_id
, title
, key
, data
={}, **kwargs
):
60 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
62 return self
._download
_json
(
63 'https://www.hidive.com/play/settings', video_id
,
64 data
=urlencode_postdata(data
), **kwargs
) or {}
66 def _extract_subtitles_from_rendition(self
, rendition
, subtitles
, parsed_urls
):
67 for cc_file
in rendition
.get('ccFiles', []):
68 cc_url
= url_or_none(try_get(cc_file
, lambda x
: x
[2]))
69 # name is used since we cant distinguish subs with same language code
70 cc_lang
= try_get(cc_file
, (lambda x
: x
[1].replace(' ', '-').lower(), lambda x
: x
[0]), str)
71 if cc_url
not in parsed_urls
and cc_lang
:
72 parsed_urls
.add(cc_url
)
73 subtitles
.setdefault(cc_lang
, []).append({'url': cc_url}
)
75 def _get_subtitles(self
, url
, video_id
, title
, key
, parsed_urls
):
76 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False) or ''
78 for caption
in set(re
.findall(r
'data-captions=\"([^\"]+)\"', webpage
)):
79 renditions
= self
._call
_api
(
80 video_id
, title
, key
, {'Captions': caption}
, fatal
=False,
81 note
=f
'Downloading {caption} subtitle information').get('renditions') or {}
82 for rendition_id
, rendition
in renditions
.items():
83 self
._extract
_subtitles
_from
_rendition
(rendition
, subtitles
, parsed_urls
)
86 def _real_extract(self
, url
):
87 video_id
, title
, key
= self
._match
_valid
_url
(url
).group('id', 'title', 'key')
88 settings
= self
._call
_api
(video_id
, title
, key
)
90 restriction
= settings
.get('restrictionReason')
91 if restriction
== 'RegionRestricted':
92 self
.raise_geo_restricted()
93 if restriction
and restriction
!= 'None':
95 '%s said: %s' % (self
.IE_NAME
, restriction
), expected
=True)
97 formats
, parsed_urls
= [], {None}
98 for rendition_id
, rendition
in settings
['renditions'].items():
99 audio
, version
, extra
= rendition_id
.split('_')
100 m3u8_url
= url_or_none(try_get(rendition
, lambda x
: x
['bitrates']['hls']))
101 if m3u8_url
not in parsed_urls
:
102 parsed_urls
.add(m3u8_url
)
103 frmt
= self
._extract
_m
3u8_formats
(
104 m3u8_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', m3u8_id
=rendition_id
, fatal
=False)
106 f
['language'] = audio
107 f
['format_note'] = f
'{version}, {extra}'
109 self
._sort
_formats
(formats
)
114 'subtitles': self
.extract_subtitles(url
, video_id
, title
, key
, parsed_urls
),
117 'season_number': int_or_none(
118 self
._search
_regex
(r
's(\d+)', key
, 'season number', default
=None)),
119 'episode_number': int_or_none(
120 self
._search
_regex
(r
'e(\d+)', key
, 'episode number', default
=None)),
121 'http_headers': {'Referer': url}