]>
Commit | Line | Data |
---|---|---|
c3f3b29b NJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
c3f3b29b | 4 | from .common import InfoExtractor |
182b6ae8 F |
5 | from ..utils import js_to_json |
6 | import re | |
7 | import json | |
8 | import urllib.parse | |
9 | import base64 | |
c3f3b29b NJ |
10 | |
11 | ||
12 | class RTPIE(InfoExtractor): | |
bad5c1a3 PH |
13 | _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)/?' |
14 | _TESTS = [{ | |
c3f3b29b | 15 | 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', |
ad5747ba | 16 | 'md5': 'e736ce0c665e459ddb818546220b4ef8', |
c3f3b29b | 17 | 'info_dict': { |
a86cbf58 | 18 | 'id': 'e174042', |
c3f3b29b NJ |
19 | 'ext': 'mp3', |
20 | 'title': 'Paixões Cruzadas', | |
21 | 'description': 'As paixões musicais de António Cartaxo e António Macedo', | |
ec85ded8 | 22 | 'thumbnail': r're:^https?://.*\.jpg', |
c3f3b29b | 23 | }, |
bad5c1a3 PH |
24 | }, { |
25 | 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', | |
26 | 'only_matching': True, | |
27 | }] | |
c3f3b29b | 28 | |
182b6ae8 F |
29 | _RX_OBFUSCATION = re.compile(r'''(?xs) |
30 | atob\s*\(\s*decodeURIComponent\s*\(\s* | |
31 | (\[[0-9A-Za-z%,'"]*\]) | |
32 | \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) | |
33 | ''') | |
34 | ||
35 | def __unobfuscate(self, data, *, video_id): | |
36 | if data.startswith('{'): | |
37 | data = self._RX_OBFUSCATION.sub( | |
38 | lambda m: json.dumps( | |
39 | base64.b64decode(urllib.parse.unquote( | |
40 | ''.join(self._parse_json(m.group(1), video_id)) | |
41 | )).decode('iso-8859-1')), | |
42 | data) | |
43 | return js_to_json(data) | |
44 | ||
c3f3b29b NJ |
45 | def _real_extract(self, url): |
46 | video_id = self._match_id(url) | |
47 | ||
48 | webpage = self._download_webpage(url, video_id) | |
49 | title = self._html_search_meta( | |
50 | 'twitter:title', webpage, display_name='title', fatal=True) | |
ad5747ba | 51 | |
182b6ae8 F |
52 | f, config = self._search_regex( |
53 | r'''(?sx) | |
54 | var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s* | |
55 | var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) | |
56 | ''', webpage, | |
57 | 'player config', group=('f', 'config')) | |
58 | ||
59 | f = self._parse_json( | |
60 | f, video_id, | |
61 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
62 | config = self._parse_json( | |
63 | config, video_id, | |
64 | lambda data: self.__unobfuscate(data, video_id=video_id)) | |
65 | ||
66 | formats = [] | |
67 | if isinstance(f, dict): | |
68 | f_hls = f.get('hls') | |
69 | if f_hls is not None: | |
70 | formats.extend(self._extract_m3u8_formats( | |
71 | f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) | |
72 | ||
73 | f_dash = f.get('dash') | |
74 | if f_dash is not None: | |
75 | formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) | |
ead467a9 | 76 | else: |
182b6ae8 F |
77 | formats.append({ |
78 | 'format_id': 'f', | |
79 | 'url': f, | |
80 | 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, | |
81 | }) | |
82 | ||
83 | subtitles = {} | |
84 | ||
85 | vtt = config.get('vtt') | |
86 | if vtt is not None: | |
87 | for lcode, lname, url in vtt: | |
88 | subtitles.setdefault(lcode, []).append({ | |
89 | 'name': lname, | |
90 | 'url': url, | |
91 | }) | |
ad5747ba | 92 | |
c3f3b29b NJ |
93 | return { |
94 | 'id': video_id, | |
95 | 'title': title, | |
96 | 'formats': formats, | |
ead467a9 RA |
97 | 'description': self._html_search_meta(['description', 'twitter:description'], webpage), |
98 | 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), | |
182b6ae8 | 99 | 'subtitles': subtitles, |
c3f3b29b | 100 | } |