]>
Commit | Line | Data |
---|---|---|
c3f3b29b | 1 | from .common import InfoExtractor |
182b6ae8 F |
2 | from ..utils import js_to_json |
3 | import re | |
4 | import json | |
5 | import urllib.parse | |
6 | import base64 | |
c3f3b29b NJ |
7 | |
8 | ||
9 | class RTPIE(InfoExtractor): | |
bad5c1a3 PH |
10 | _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' |
11 | _TESTS = [{ | |
c3f3b29b | 12 | 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', |
ad5747ba | 13 | 'md5': 'e736ce0c665e459ddb818546220b4ef8', |
c3f3b29b | 14 | 'info_dict': { |
a86cbf58 | 15 | 'id': 'e174042', |
c3f3b29b NJ |
16 | 'ext': 'mp3', |
17 | 'title': 'Paixões Cruzadas', | |
18 | 'description': 'As paixões musicais de António Cartaxo e António Macedo', | |
ec85ded8 | 19 | 'thumbnail': r're:^https?://.*\.jpg', |
c3f3b29b | 20 | }, |
bad5c1a3 PH |
21 | }, { |
22 | 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', | |
23 | 'only_matching': True, | |
24 | }] | |
c3f3b29b | 25 | |
182b6ae8 F |
26 | _RX_OBFUSCATION = re.compile(r'''(?xs) |
27 | atob\s*\(\s*decodeURIComponent\s*\(\s* | |
28 | (\[[0-9A-Za-z%,'"]*\]) | |
29 | \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) | |
30 | ''') | |
31 | ||
32 | def __unobfuscate(self, data, *, video_id): | |
33 | if data.startswith('{'): | |
34 | data = self._RX_OBFUSCATION.sub( | |
35 | lambda m: json.dumps( | |
36 | base64.b64decode(urllib.parse.unquote( | |
37 | ''.join(self._parse_json(m.group(1), video_id)) | |
38 | )).decode('iso-8859-1')), | |
39 | data) | |
40 | return js_to_json(data) | |
41 | ||
c3f3b29b NJ |
42 | def _real_extract(self, url): |
43 | video_id = self._match_id(url) | |
44 | ||
45 | webpage = self._download_webpage(url, video_id) | |
46 | title = self._html_search_meta( | |
47 | 'twitter:title', webpage, display_name='title', fatal=True) | |
ad5747ba | 48 | |
182b6ae8 F |
49 | f, config = self._search_regex( |
50 | r'''(?sx) | |
51 | var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* | |
52 | var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) | |
53 | ''', webpage, | |
54 | 'player config', group=('f', 'config')) | |
55 | ||
56 | f = self._parse_json( | |
57 | f, video_id, | |
58 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
59 | config = self._parse_json( | |
60 | config, video_id, | |
61 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
62 | ||
63 | formats = [] | |
64 | if isinstance(f, dict): | |
65 | f_hls = f.get('hls') | |
66 | if f_hls is not None: | |
67 | formats.extend(self._extract_m3u8_formats( | |
68 | f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) | |
69 | ||
70 | f_dash = f.get('dash') | |
71 | if f_dash is not None: | |
72 | formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) | |
ead467a9 | 73 | else: |
182b6ae8 F |
74 | formats.append({ |
75 | 'format_id': 'f', | |
76 | 'url': f, | |
77 | 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, | |
78 | }) | |
79 | ||
80 | subtitles = {} | |
81 | ||
82 | vtt = config.get('vtt') | |
83 | if vtt is not None: | |
84 | for lcode, lname, url in vtt: | |
85 | subtitles.setdefault(lcode, []).append({ | |
86 | 'name': lname, | |
87 | 'url': url, | |
88 | }) | |
ad5747ba | 89 | |
c3f3b29b NJ |
90 | return { |
91 | 'id': video_id, | |
92 | 'title': title, | |
93 | 'formats': formats, | |
ead467a9 RA |
94 | 'description': self._html_search_meta(['description', 'twitter:description'], webpage), |
95 | 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), | |
182b6ae8 | 96 | 'subtitles': subtitles, |
c3f3b29b | 97 | } |