]> jfr.im git - yt-dlp.git/blame - youtube_dlc/extractor/videa.py
[skip travis] renaming
[yt-dlp.git] / youtube_dlc / extractor / videa.py
CommitLineData
e7460215
B
1# coding: utf-8
2from __future__ import unicode_literals
3
e186a9ec 4import re
34675f9d
AH
5import random
6import string
7import struct
e186a9ec 8
e7460215
B
9from .common import InfoExtractor
10from ..utils import (
34675f9d 11 ExtractorError,
e7460215 12 int_or_none,
69677f3e
S
13 mimetype2ext,
14 parse_codecs,
e7460215
B
15 xpath_element,
16 xpath_text,
e7460215 17)
34675f9d
AH
18from ..compat import (
19 compat_b64decode,
20 compat_ord,
21 compat_parse_qs,
22)
e7460215
B
23
24
25class VideaIE(InfoExtractor):
69677f3e
S
26 _VALID_URL = r'''(?x)
27 https?://
99c30918 28 videa(?:kid)?\.hu/
69677f3e
S
29 (?:
30 videok/(?:[^/]+/)*[^?#&]+-|
31 player\?.*?\bv=|
32 player/v/
33 )
34 (?P<id>[^?#&]+)
35 '''
e7460215
B
36 _TESTS = [{
37 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
38 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
39 'info_dict': {
40 'id': '8YfIAjxwWGwT8HVQ',
e7460215
B
41 'ext': 'mp4',
42 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
99c30918 43 'thumbnail': r're:^https?://.*',
e7460215
B
44 'duration': 21,
45 },
46 }, {
47 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
48 'only_matching': True,
69677f3e
S
49 }, {
50 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
51 'only_matching': True,
52 }, {
53 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
54 'only_matching': True,
99c30918
AMB
55 }, {
56 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
57 'only_matching': True,
58 }, {
59 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
60 'only_matching': True,
61 }, {
62 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
63 'only_matching': True,
e7460215
B
64 }]
65
e186a9ec
S
66 @staticmethod
67 def _extract_urls(webpage):
68 return [url for _, url in re.findall(
69 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
70 webpage)]
71
34675f9d
AH
72 def rc4(self, ciphertext, key):
73 res = b''
74
75 keyLen = len(key)
76 S = list(range(256))
77
78 j = 0
79 for i in range(256):
80 j = (j + S[i] + ord(key[i % keyLen])) % 256
81 S[i], S[j] = S[j], S[i]
82
83 i = 0
84 j = 0
85 for m in range(len(ciphertext)):
86 i = (i + 1) % 256
87 j = (j + S[i]) % 256
88 S[i], S[j] = S[j], S[i]
89 k = S[(S[i] + S[j]) % 256]
90 res += struct.pack("B", k ^ compat_ord(ciphertext[m]))
91
92 return res
93
e7460215
B
94 def _real_extract(self, url):
95 video_id = self._match_id(url)
34675f9d
AH
96 webpage = self._download_webpage(url, video_id, fatal=True)
97 error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None)
98 if error:
99 raise ExtractorError(error, expected=True)
100
101 video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params')
102 video_src_params = compat_parse_qs(video_src_params_raw)
103 player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True)
104 nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
105 random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8))
106 static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
107 l = nonce[:32]
108 s = nonce[32:]
109 result = ''
110 for i in range(0, 32):
111 result += s[i - (static_secret.index(l[i]) - 31)]
e7460215 112
34675f9d
AH
113 video_src_params['_s'] = random_seed
114 video_src_params['_t'] = result[:16]
115 encryption_key_stem = result[16:] + random_seed
116
117 [b64_info, handle] = self._download_webpage_handle(
69677f3e 118 'http://videa.hu/videaplayer_get_xml.php', video_id,
34675f9d
AH
119 query=video_src_params, fatal=True)
120
121 encrypted_info = compat_b64decode(b64_info)
122 key = encryption_key_stem + handle.info()['x-videa-xs']
123 info_str = self.rc4(encrypted_info, key).decode('utf8')
124 info = self._parse_xml(info_str, video_id)
e7460215 125
69677f3e
S
126 video = xpath_element(info, './/video', 'video', fatal=True)
127 sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
34675f9d 128 hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True)
e7460215 129
69677f3e 130 title = xpath_text(video, './title', fatal=True)
e7460215 131
69677f3e
S
132 formats = []
133 for source in sources.findall('./video_source'):
134 source_url = source.text
135 if not source_url:
136 continue
34675f9d 137 source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp'))
69677f3e
S
138 f = parse_codecs(source.get('codecs'))
139 f.update({
140 'url': source_url,
141 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
142 'format_id': source.get('name'),
143 'width': int_or_none(source.get('width')),
144 'height': int_or_none(source.get('height')),
145 })
146 formats.append(f)
147 self._sort_formats(formats)
e7460215 148
69677f3e
S
149 thumbnail = xpath_text(video, './poster_src')
150 duration = int_or_none(xpath_text(video, './duration'))
e7460215 151
69677f3e
S
152 age_limit = None
153 is_adult = xpath_text(video, './is_adult_content', default=None)
154 if is_adult:
155 age_limit = 18 if is_adult == '1' else 0
e7460215 156
69677f3e
S
157 return {
158 'id': video_id,
159 'title': title,
160 'thumbnail': thumbnail,
161 'duration': duration,
162 'age_limit': age_limit,
163 'formats': formats,
164 }