]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/caracoltv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / caracoltv.py
1 import base64
2 import json
3 import uuid
4
5 from .common import InfoExtractor
6 from ..utils import (
7 int_or_none,
8 js_to_json,
9 traverse_obj,
10 urljoin,
11 )
12
13
14 class CaracolTvPlayIE(InfoExtractor):
15 _VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
16 _NETRC_MACHINE = 'caracoltv-play'
17
18 _TESTS = [{
19 'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
20 'info_dict': {
21 'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
22 'title': 'La teorĂ­a del promedio',
23 'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
24 },
25 'playlist_count': 6,
26 }, {
27 'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
28 'info_dict': {
29 'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
30 'title': 'Ella',
31 'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
32 },
33 'playlist_count': 10,
34 }, {
35 'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
36 'info_dict': {
37 'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
38 'title': 'La vuelta al mundo en 80 risas 2022',
39 'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
40 },
41 'playlist_count': 17,
42 }, {
43 'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
44 'only_matching': True,
45 }]
46
47 _USER_TOKEN = None
48
49 def _extract_app_token(self, webpage):
50 config_js_path = self._search_regex(
51 r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
52
53 mediation_config = {} if not config_js_path else self._search_json(
54 r'mediation\s*:', self._download_webpage(
55 urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
56 'mediation_config', None, transform_source=js_to_json, fatal=False)
57
58 key = traverse_obj(
59 mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
60 secret = traverse_obj(
61 mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
62
63 return base64.b64encode(f'{key}:{secret}'.encode()).decode()
64
65 def _perform_login(self, email, password):
66 webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
67 app_token = self._extract_app_token(webpage)
68
69 bearer_token = self._download_json(
70 'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
71 headers={'Authorization': f'Basic {app_token}'})['token']
72
73 self._USER_TOKEN = self._download_json(
74 'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
75 'Content-Type': 'application/json',
76 'Authorization': f'Bearer {bearer_token}',
77 }, data=json.dumps({
78 'device_data': {
79 'device_id': str(uuid.uuid4()),
80 'device_token': '',
81 'device_type': 'web'
82 },
83 'login_data': {
84 'enabled': True,
85 'email': email,
86 'password': password,
87 }
88 }).encode())['user_token']
89
90 def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
91 formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
92
93 return {
94 'id': video_data['id'],
95 'title': video_data.get('name'),
96 'description': video_data.get('description'),
97 'formats': formats,
98 'subtitles': subtitles,
99 'thumbnails': traverse_obj(
100 video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
101 'series_id': series_id,
102 'season_id': season_id,
103 'season_number': int_or_none(season_number),
104 'episode_number': int_or_none(video_data.get('item_order')),
105 'is_live': video_data.get('entry_type') == 3,
106 }
107
108 def _extract_series_seasons(self, seasons, series_id):
109 for season in seasons:
110 api_response = self._download_json(
111 'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
112 headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
113
114 season_number = season.get('order')
115 for episode in api_response['items']:
116 yield self._extract_video(episode, series_id, season['id'], season_number)
117
118 def _real_extract(self, url):
119 series_id = self._match_id(url)
120
121 if self._USER_TOKEN is None:
122 self._perform_login('guest@inmobly.com', 'Test@gus1')
123
124 api_response = self._download_json(
125 'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
126 headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
127
128 if not api_response.get('seasons'):
129 return self._extract_video(api_response)
130
131 return self.playlist_result(
132 self._extract_series_seasons(api_response['seasons'], series_id),
133 series_id, **traverse_obj(api_response, {
134 'title': 'name',
135 'description': 'description',
136 }))