]>
Commit | Line | Data |
---|---|---|
62f49dd3 | 1 | # coding: utf-8 |
f2cad2e4 | 2 | import re |
62f49dd3 | 3 | |
62f49dd3 | 4 | from .common import InfoExtractor |
62f49dd3 S |
5 | from ..utils import ( |
6 | ExtractorError, | |
7 | int_or_none, | |
a8cb7eca | 8 | try_get, |
3052a30d | 9 | url_or_none, |
62f49dd3 S |
10 | urlencode_postdata, |
11 | ) | |
12 | ||
13 | ||
14 | class HiDiveIE(InfoExtractor): | |
705e7c20 | 15 | _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))' |
62f49dd3 S |
16 | # Using X-Forwarded-For results in 403 HTTP error for HLS fragments, |
17 | # so disabling geo bypass completely | |
18 | _GEO_BYPASS = False | |
e8e58c22 | 19 | _NETRC_MACHINE = 'hidive' |
e8e58c22 | 20 | _LOGIN_URL = 'https://www.hidive.com/account/login' |
62f49dd3 S |
21 | |
22 | _TESTS = [{ | |
23 | 'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001', | |
24 | 'info_dict': { | |
25 | 'id': 'the-comic-artist-and-his-assistants/s01e001', | |
26 | 'ext': 'mp4', | |
27 | 'title': 'the-comic-artist-and-his-assistants/s01e001', | |
28 | 'series': 'the-comic-artist-and-his-assistants', | |
29 | 'season_number': 1, | |
30 | 'episode_number': 1, | |
31 | }, | |
32 | 'params': { | |
33 | 'skip_download': True, | |
62f49dd3 | 34 | }, |
e8e58c22 | 35 | 'skip': 'Requires Authentication', |
62f49dd3 S |
36 | }] |
37 | ||
e8e58c22 | 38 | def _real_initialize(self): |
68217024 | 39 | email, password = self._get_login_info() |
e8e58c22 RA |
40 | if email is None: |
41 | return | |
42 | ||
43 | webpage = self._download_webpage(self._LOGIN_URL, None) | |
44 | form = self._search_regex( | |
45 | r'(?s)<form[^>]+action="/account/login"[^>]*>(.+?)</form>', | |
46 | webpage, 'login form') | |
47 | data = self._hidden_inputs(form) | |
48 | data.update({ | |
49 | 'Email': email, | |
50 | 'Password': password, | |
51 | }) | |
52 | self._download_webpage( | |
53 | self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) | |
e8e58c22 | 54 | |
f2cad2e4 | 55 | def _call_api(self, video_id, title, key, data={}, **kwargs): |
56 | data = { | |
57 | **data, | |
58 | 'Title': title, | |
59 | 'Key': key, | |
60 | 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', | |
61 | } | |
62 | return self._download_json( | |
63 | 'https://www.hidive.com/play/settings', video_id, | |
64 | data=urlencode_postdata(data), **kwargs) or {} | |
65 | ||
66 | def _extract_subtitles_from_rendition(self, rendition, subtitles, parsed_urls): | |
67 | for cc_file in rendition.get('ccFiles', []): | |
68 | cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) | |
69 | # name is used since we cant distinguish subs with same language code | |
70 | cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) | |
71 | if cc_url not in parsed_urls and cc_lang: | |
72 | parsed_urls.add(cc_url) | |
73 | subtitles.setdefault(cc_lang, []).append({'url': cc_url}) | |
74 | ||
e8f726a5 | 75 | def _get_subtitles(self, url, video_id, title, key, parsed_urls): |
f2cad2e4 | 76 | webpage = self._download_webpage(url, video_id, fatal=False) or '' |
e8f726a5 | 77 | subtitles = {} |
f2cad2e4 | 78 | for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)): |
79 | renditions = self._call_api( | |
80 | video_id, title, key, {'Captions': caption}, fatal=False, | |
81 | note=f'Downloading {caption} subtitle information').get('renditions') or {} | |
82 | for rendition_id, rendition in renditions.items(): | |
83 | self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) | |
84 | return subtitles | |
85 | ||
62f49dd3 | 86 | def _real_extract(self, url): |
705e7c20 | 87 | video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') |
f2cad2e4 | 88 | settings = self._call_api(video_id, title, key) |
62f49dd3 | 89 | |
705e7c20 | 90 | restriction = settings.get('restrictionReason') |
91 | if restriction == 'RegionRestricted': | |
92 | self.raise_geo_restricted() | |
93 | if restriction and restriction != 'None': | |
94 | raise ExtractorError( | |
95 | '%s said: %s' % (self.IE_NAME, restriction), expected=True) | |
62f49dd3 | 96 | |
e8f726a5 | 97 | formats, parsed_urls = [], {None} |
705e7c20 | 98 | for rendition_id, rendition in settings['renditions'].items(): |
99 | audio, version, extra = rendition_id.split('_') | |
100 | m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) | |
f2cad2e4 | 101 | if m3u8_url not in parsed_urls: |
102 | parsed_urls.add(m3u8_url) | |
a8cb7eca | 103 | frmt = self._extract_m3u8_formats( |
705e7c20 | 104 | m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) |
a8cb7eca AG |
105 | for f in frmt: |
106 | f['language'] = audio | |
705e7c20 | 107 | f['format_note'] = f'{version}, {extra}' |
a8cb7eca | 108 | formats.extend(frmt) |
e8e58c22 | 109 | self._sort_formats(formats) |
62f49dd3 | 110 | |
62f49dd3 S |
111 | return { |
112 | 'id': video_id, | |
113 | 'title': video_id, | |
b5ae35ee | 114 | 'subtitles': self.extract_subtitles(url, video_id, title, key, parsed_urls), |
62f49dd3 S |
115 | 'formats': formats, |
116 | 'series': title, | |
705e7c20 | 117 | 'season_number': int_or_none( |
118 | self._search_regex(r's(\d+)', key, 'season number', default=None)), | |
119 | 'episode_number': int_or_none( | |
120 | self._search_regex(r'e(\d+)', key, 'episode number', default=None)), | |
a8cb7eca | 121 | 'http_headers': {'Referer': url} |
62f49dd3 | 122 | } |