]>
Commit | Line | Data |
---|---|---|
91a6adde JMF |
1 | # encoding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | import base64 | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | struct_unpack, | |
10 | ) | |
11 | ||
12 | ||
13 | class RTVEALaCartaIE(InfoExtractor): | |
14 | IE_NAME = 'rtve.es:alacarta' | |
15 | IE_DESC = 'RTVE a la carta' | |
16 | _VALID_URL = r'http://www\.rtve\.es/alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' | |
17 | ||
18 | _TEST = { | |
19 | 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', | |
20 | 'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', | |
21 | 'info_dict': { | |
22 | 'id': '2491869', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', | |
25 | }, | |
26 | } | |
27 | ||
28 | def _decrypt_url(self, png): | |
29 | encrypted_data = base64.b64decode(png) | |
30 | text_index = encrypted_data.find(b'tEXt') | |
31 | text_chunk = encrypted_data[text_index-4:] | |
32 | length = struct_unpack('!I', text_chunk[:4])[0] | |
33 | # Use bytearray to get integers when iterating in both python 2.x and 3.x | |
34 | data = bytearray(text_chunk[8:8+length]) | |
35 | data = [chr(b) for b in data if b != 0] | |
36 | hash_index = data.index('#') | |
37 | alphabet_data = data[:hash_index] | |
38 | url_data = data[hash_index+1:] | |
39 | ||
40 | alphabet = [] | |
41 | e = 0 | |
42 | d = 0 | |
43 | for l in alphabet_data: | |
44 | if d == 0: | |
45 | alphabet.append(l) | |
46 | d = e = (e + 1) % 4 | |
47 | else: | |
48 | d -= 1 | |
49 | url = '' | |
50 | f = 0 | |
51 | e = 3 | |
52 | b = 1 | |
53 | for letter in url_data: | |
54 | if f == 0: | |
55 | l = int(letter)*10 | |
56 | f = 1 | |
57 | else: | |
58 | if e == 0: | |
59 | l += int(letter) | |
60 | url += alphabet[l] | |
61 | e = (b + 3) % 4 | |
62 | f = 0 | |
63 | b += 1 | |
64 | else: | |
65 | e -= 1 | |
66 | ||
67 | return url | |
68 | ||
69 | def _real_extract(self, url): | |
70 | mobj = re.match(self._VALID_URL, url) | |
71 | video_id = mobj.group('id') | |
72 | info = self._download_json( | |
73 | 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, | |
74 | video_id)['page']['items'][0] | |
75 | png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id | |
76 | png = self._download_webpage(png_url, video_id, 'Downloading url information') | |
77 | video_url = self._decrypt_url(png) | |
78 | ||
79 | return { | |
80 | 'id': video_id, | |
81 | 'title': info['title'], | |
82 | 'url': video_url, | |
83 | 'thumbnail': info['image'], | |
84 | } |