]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/hidive.py
18ae4d37923408095ed36360704ae64db9912753
4 from .common
import InfoExtractor
14 class HiDiveIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
16 # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
17 # so disabling geo bypass completely
19 _NETRC_MACHINE
= 'hidive'
20 _LOGIN_URL
= 'https://www.hidive.com/account/login'
23 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
25 'id': 'the-comic-artist-and-his-assistants/s01e001',
27 'title': 'the-comic-artist-and-his-assistants/s01e001',
28 'series': 'the-comic-artist-and-his-assistants',
33 'skip_download': True,
35 'skip': 'Requires Authentication',
38 def _real_initialize(self
):
39 email
, password
= self
._get
_login
_info
()
43 webpage
= self
._download
_webpage
(self
._LOGIN
_URL
, None)
44 form
= self
._search
_regex
(
45 r
'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
46 webpage
, 'login form')
47 data
= self
._hidden
_inputs
(form
)
52 self
._download
_webpage
(
53 self
._LOGIN
_URL
, None, 'Logging in', data
=urlencode_postdata(data
))
55 def _call_api(self
, video_id
, title
, key
, data
={}, **kwargs
):
60 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
62 return self
._download
_json
(
63 'https://www.hidive.com/play/settings', video_id
,
64 data
=urlencode_postdata(data
), **kwargs
) or {}
66 def _extract_subtitles_from_rendition(self
, rendition
, subtitles
, parsed_urls
):
67 for cc_file
in rendition
.get('ccFiles', []):
68 cc_url
= url_or_none(try_get(cc_file
, lambda x
: x
[2]))
69 # name is used since we cant distinguish subs with same language code
70 cc_lang
= try_get(cc_file
, (lambda x
: x
[1].replace(' ', '-').lower(), lambda x
: x
[0]), str)
71 if cc_url
not in parsed_urls
and cc_lang
:
72 parsed_urls
.add(cc_url
)
73 subtitles
.setdefault(cc_lang
, []).append({'url': cc_url}
)
75 def _get_subtitles(self
, url
, video_id
, title
, key
, subtitles
, parsed_urls
):
76 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False) or ''
77 for caption
in set(re
.findall(r
'data-captions=\"([^\"]+)\"', webpage
)):
78 renditions
= self
._call
_api
(
79 video_id
, title
, key
, {'Captions': caption}
, fatal
=False,
80 note
=f
'Downloading {caption} subtitle information').get('renditions') or {}
81 for rendition_id
, rendition
in renditions
.items():
82 self
._extract
_subtitles
_from
_rendition
(rendition
, subtitles
, parsed_urls
)
85 def _real_extract(self
, url
):
86 video_id
, title
, key
= self
._match
_valid
_url
(url
).group('id', 'title', 'key')
87 settings
= self
._call
_api
(video_id
, title
, key
)
89 restriction
= settings
.get('restrictionReason')
90 if restriction
== 'RegionRestricted':
91 self
.raise_geo_restricted()
92 if restriction
and restriction
!= 'None':
94 '%s said: %s' % (self
.IE_NAME
, restriction
), expected
=True)
96 formats
, subtitles
, parsed_urls
= [], {}, {None}
97 for rendition_id
, rendition
in settings
['renditions'].items():
98 audio
, version
, extra
= rendition_id
.split('_')
99 m3u8_url
= url_or_none(try_get(rendition
, lambda x
: x
['bitrates']['hls']))
100 if m3u8_url
not in parsed_urls
:
101 parsed_urls
.add(m3u8_url
)
102 frmt
= self
._extract
_m
3u8_formats
(
103 m3u8_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', m3u8_id
=rendition_id
, fatal
=False)
105 f
['language'] = audio
106 f
['format_note'] = f
'{version}, {extra}'
109 self
._extract
_subtitles
_from
_rendition
(rendition
, subtitles
, parsed_urls
)
110 self
._sort
_formats
(formats
)
115 'subtitles': self
.extract_subtitles(url
, video_id
, title
, key
, subtitles
, parsed_urls
),
118 'season_number': int_or_none(
119 self
._search
_regex
(r
's(\d+)', key
, 'season number', default
=None)),
120 'episode_number': int_or_none(
121 self
._search
_regex
(r
'e(\d+)', key
, 'episode number', default
=None)),
122 'http_headers': {'Referer': url}