]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/hidive.py
3 from .common
import InfoExtractor
13 class HiDiveIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))'
15 # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
16 # so disabling geo bypass completely
18 _NETRC_MACHINE
= 'hidive'
19 _LOGIN_URL
= 'https://www.hidive.com/account/login'
22 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
24 'id': 'the-comic-artist-and-his-assistants/s01e001',
26 'title': 'the-comic-artist-and-his-assistants/s01e001',
27 'series': 'the-comic-artist-and-his-assistants',
32 'skip_download': True,
34 'skip': 'Requires Authentication',
37 def _perform_login(self
, username
, password
):
38 webpage
= self
._download
_webpage
(self
._LOGIN
_URL
, None)
39 form
= self
._search
_regex
(
40 r
'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>',
41 webpage
, 'login form')
42 data
= self
._hidden
_inputs
(form
)
47 self
._download
_webpage
(
48 self
._LOGIN
_URL
, None, 'Logging in', data
=urlencode_postdata(data
))
50 def _call_api(self
, video_id
, title
, key
, data
={}, **kwargs
):
55 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783',
57 return self
._download
_json
(
58 'https://www.hidive.com/play/settings', video_id
,
59 data
=urlencode_postdata(data
), **kwargs
) or {}
61 def _extract_subtitles_from_rendition(self
, rendition
, subtitles
, parsed_urls
):
62 for cc_file
in rendition
.get('ccFiles', []):
63 cc_url
= url_or_none(try_get(cc_file
, lambda x
: x
[2]))
64 # name is used since we cant distinguish subs with same language code
65 cc_lang
= try_get(cc_file
, (lambda x
: x
[1].replace(' ', '-').lower(), lambda x
: x
[0]), str)
66 if cc_url
not in parsed_urls
and cc_lang
:
67 parsed_urls
.add(cc_url
)
68 subtitles
.setdefault(cc_lang
, []).append({'url': cc_url}
)
70 def _get_subtitles(self
, url
, video_id
, title
, key
, parsed_urls
):
71 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False) or ''
73 for caption
in set(re
.findall(r
'data-captions=\"([^\"]+)\"', webpage
)):
74 renditions
= self
._call
_api
(
75 video_id
, title
, key
, {'Captions': caption}
, fatal
=False,
76 note
=f
'Downloading {caption} subtitle information').get('renditions') or {}
77 for rendition_id
, rendition
in renditions
.items():
78 self
._extract
_subtitles
_from
_rendition
(rendition
, subtitles
, parsed_urls
)
81 def _real_extract(self
, url
):
82 video_id
, title
, key
= self
._match
_valid
_url
(url
).group('id', 'title', 'key')
83 settings
= self
._call
_api
(video_id
, title
, key
)
85 restriction
= settings
.get('restrictionReason')
86 if restriction
== 'RegionRestricted':
87 self
.raise_geo_restricted()
88 if restriction
and restriction
!= 'None':
90 '%s said: %s' % (self
.IE_NAME
, restriction
), expected
=True)
92 formats
, parsed_urls
= [], {None}
93 for rendition_id
, rendition
in settings
['renditions'].items():
94 audio
, version
, extra
= rendition_id
.split('_')
95 m3u8_url
= url_or_none(try_get(rendition
, lambda x
: x
['bitrates']['hls']))
96 if m3u8_url
not in parsed_urls
:
97 parsed_urls
.add(m3u8_url
)
98 frmt
= self
._extract
_m
3u8_formats
(
99 m3u8_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', m3u8_id
=rendition_id
, fatal
=False)
101 f
['language'] = audio
102 f
['format_note'] = f
'{version}, {extra}'
104 self
._sort
_formats
(formats
)
109 'subtitles': self
.extract_subtitles(url
, video_id
, title
, key
, parsed_urls
),
112 'season_number': int_or_none(
113 self
._search
_regex
(r
's(\d+)', key
, 'season number', default
=None)),
114 'episode_number': int_or_none(
115 self
._search
_regex
(r
'e(\d+)', key
, 'episode number', default
=None)),
116 'http_headers': {'Referer': url}