]>
Commit | Line | Data |
---|---|---|
e897bd82 | 1 | import base64 |
182b6ae8 | 2 | import json |
e897bd82 | 3 | import re |
182b6ae8 | 4 | import urllib.parse |
e897bd82 SS |
5 | |
6 | from .common import InfoExtractor | |
7 | from ..utils import js_to_json | |
c3f3b29b NJ |
8 | |
9 | ||
10 | class RTPIE(InfoExtractor): | |
bad5c1a3 PH |
11 | _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' |
12 | _TESTS = [{ | |
c3f3b29b | 13 | 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', |
ad5747ba | 14 | 'md5': 'e736ce0c665e459ddb818546220b4ef8', |
c3f3b29b | 15 | 'info_dict': { |
a86cbf58 | 16 | 'id': 'e174042', |
c3f3b29b NJ |
17 | 'ext': 'mp3', |
18 | 'title': 'Paixões Cruzadas', | |
19 | 'description': 'As paixões musicais de António Cartaxo e António Macedo', | |
ec85ded8 | 20 | 'thumbnail': r're:^https?://.*\.jpg', |
c3f3b29b | 21 | }, |
bad5c1a3 PH |
22 | }, { |
23 | 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', | |
24 | 'only_matching': True, | |
25 | }] | |
c3f3b29b | 26 | |
182b6ae8 F |
27 | _RX_OBFUSCATION = re.compile(r'''(?xs) |
28 | atob\s*\(\s*decodeURIComponent\s*\(\s* | |
29 | (\[[0-9A-Za-z%,'"]*\]) | |
30 | \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) | |
31 | ''') | |
32 | ||
33 | def __unobfuscate(self, data, *, video_id): | |
34 | if data.startswith('{'): | |
35 | data = self._RX_OBFUSCATION.sub( | |
36 | lambda m: json.dumps( | |
37 | base64.b64decode(urllib.parse.unquote( | |
add96eb9 | 38 | ''.join(self._parse_json(m.group(1), video_id)), |
182b6ae8 F |
39 | )).decode('iso-8859-1')), |
40 | data) | |
41 | return js_to_json(data) | |
42 | ||
c3f3b29b NJ |
43 | def _real_extract(self, url): |
44 | video_id = self._match_id(url) | |
45 | ||
46 | webpage = self._download_webpage(url, video_id) | |
47 | title = self._html_search_meta( | |
48 | 'twitter:title', webpage, display_name='title', fatal=True) | |
ad5747ba | 49 | |
182b6ae8 F |
50 | f, config = self._search_regex( |
51 | r'''(?sx) | |
52 | var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* | |
53 | var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) | |
54 | ''', webpage, | |
55 | 'player config', group=('f', 'config')) | |
56 | ||
57 | f = self._parse_json( | |
58 | f, video_id, | |
59 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
60 | config = self._parse_json( | |
61 | config, video_id, | |
62 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
63 | ||
64 | formats = [] | |
65 | if isinstance(f, dict): | |
66 | f_hls = f.get('hls') | |
67 | if f_hls is not None: | |
68 | formats.extend(self._extract_m3u8_formats( | |
69 | f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) | |
70 | ||
71 | f_dash = f.get('dash') | |
72 | if f_dash is not None: | |
73 | formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) | |
ead467a9 | 74 | else: |
182b6ae8 F |
75 | formats.append({ |
76 | 'format_id': 'f', | |
77 | 'url': f, | |
78 | 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, | |
79 | }) | |
80 | ||
81 | subtitles = {} | |
82 | ||
83 | vtt = config.get('vtt') | |
84 | if vtt is not None: | |
85 | for lcode, lname, url in vtt: | |
86 | subtitles.setdefault(lcode, []).append({ | |
87 | 'name': lname, | |
88 | 'url': url, | |
89 | }) | |
ad5747ba | 90 | |
c3f3b29b NJ |
91 | return { |
92 | 'id': video_id, | |
93 | 'title': title, | |
94 | 'formats': formats, | |
ead467a9 RA |
95 | 'description': self._html_search_meta(['description', 'twitter:description'], webpage), |
96 | 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), | |
182b6ae8 | 97 | 'subtitles': subtitles, |
c3f3b29b | 98 | } |