]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/peloton.py
[extractor/FranceCulture] Fix extractor (#3874)
[yt-dlp.git] / yt_dlp / extractor / peloton.py
CommitLineData
356ac009
I
1import json
2import re
3
4from .common import InfoExtractor
5from ..compat import (
6 compat_HTTPError,
7 compat_urllib_parse,
8)
9from ..utils import (
10 ExtractorError,
11 float_or_none,
12 str_or_none,
13 traverse_obj,
14 url_or_none,
15)
16
17
18class PelotonIE(InfoExtractor):
19 IE_NAME = 'peloton'
20 _NETRC_MACHINE = 'peloton'
21 _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)'
22 _TESTS = [{
23 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86',
24 'info_dict': {
25 'id': '0e9653eb53544eeb881298c8d7a87b86',
26 'title': '20 min Chest & Back Strength',
27 'ext': 'mp4',
28 'thumbnail': r're:^https?://.+\.jpg',
29 'description': 'md5:fcd5be9b9eda0194b470e13219050a66',
30 'creator': 'Chase Tucker',
31 'release_timestamp': 1556141400,
32 'timestamp': 1556141400,
33 'upload_date': '20190424',
34 'duration': 1389,
35 'categories': ['Strength'],
36 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'],
37 'is_live': False,
38 'chapters': 'count:1',
39 'subtitles': {'en': [{
40 'url': r're:^https?://.+',
41 'ext': 'vtt'
42 }]},
43 }, 'params': {
44 'skip_download': 'm3u8',
45 },
46 '_skip': 'Account needed'
47 }, {
48 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
49 'info_dict': {
50 'id': '26603d53d6bb4de1b340514864a6a6a8',
51 'title': '30 min Earth Day Run',
52 'ext': 'm4a',
53 'thumbnail': r're:https://.+\.jpg',
54 'description': 'md5:adc065a073934d7ee0475d217afe0c3d',
55 'creator': 'Selena Samuela',
56 'release_timestamp': 1587567600,
57 'timestamp': 1587567600,
58 'upload_date': '20200422',
59 'duration': 1802,
60 'categories': ['Running'],
61 'is_live': False,
62 'chapters': 'count:3'
63 }, 'params': {
64 'skip_download': 'm3u8',
65 },
66 '_skip': 'Account needed'
67 }]
68
69 _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
70
71 def _start_session(self, video_id):
72 self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session')
73
74 def _login(self, video_id):
75 username, password = self._get_login_info()
76 if not (username and password):
77 self.raise_login_required()
78 try:
79 self._download_json(
80 'https://api.onepeloton.com/auth/login', video_id, note='Logging in',
81 data=json.dumps({
82 'username_or_email': username,
83 'password': password,
84 'with_pubsub': False
85 }).encode(),
86 headers={'Content-Type': 'application/json', 'User-Agent': 'web'})
87 except ExtractorError as e:
88 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
89 json_string = self._webpage_read_content(e.cause, None, video_id)
90 res = self._parse_json(json_string, video_id)
91 raise ExtractorError(res['message'], expected=res['message'] == 'Login failed')
92 else:
93 raise
94
95 def _get_token(self, video_id):
96 try:
97 subscription = self._download_json(
98 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token',
99 data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'})
100 except ExtractorError as e:
101 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
102 json_string = self._webpage_read_content(e.cause, None, video_id)
103 res = self._parse_json(json_string, video_id)
104 raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached')
105 else:
106 raise
107 return subscription['token']
108
109 def _real_extract(self, url):
110 video_id = self._match_id(url)
111 try:
112 self._start_session(video_id)
113 except ExtractorError as e:
114 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
115 self._login(video_id)
116 self._start_session(video_id)
117 else:
118 raise
119
120 metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id)
121 ride_data = metadata.get('ride')
122 if not ride_data:
123 raise ExtractorError('Missing stream metadata')
124 token = self._get_token(video_id)
125
126 is_live = False
127 if ride_data.get('content_format') == 'audio':
128 url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token))
129 formats = [{
130 'url': url,
131 'ext': 'm4a',
132 'format_id': 'audio',
133 'vcodec': 'none',
134 }]
135 subtitles = {}
136 else:
137 if ride_data.get('vod_stream_url'):
138 url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
139 ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
140 ride_data['vod_stream_url'],
141 compat_urllib_parse.quote(compat_urllib_parse.quote(token)))
142 elif ride_data.get('live_stream_url'):
143 url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token))
144 is_live = True
145 else:
146 raise ExtractorError('Missing video URL')
147 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
148
149 if metadata.get('instructor_cues'):
150 subtitles['cues'] = [{
151 'data': json.dumps(metadata.get('instructor_cues')),
152 'ext': 'json'
153 }]
154
155 category = ride_data.get('fitness_discipline_display_name')
156 chapters = [{
157 'start_time': segment.get('start_time_offset'),
158 'end_time': segment.get('start_time_offset') + segment.get('length'),
159 'title': segment.get('name')
160 } for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
161
162 self._sort_formats(formats)
163 return {
164 'id': video_id,
165 'title': ride_data.get('title'),
166 'formats': formats,
167 'thumbnail': url_or_none(ride_data.get('image_url')),
168 'description': str_or_none(ride_data.get('description')),
169 'creator': traverse_obj(ride_data, ('instructor', 'name')),
170 'release_timestamp': ride_data.get('original_air_time'),
171 'timestamp': ride_data.get('original_air_time'),
172 'subtitles': subtitles,
173 'duration': float_or_none(ride_data.get('length')),
174 'categories': [category] if category else None,
175 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')),
176 'is_live': is_live,
177 'chapters': chapters
178 }
179
180
181class PelotonLiveIE(InfoExtractor):
182 IE_NAME = 'peloton:live'
183 IE_DESC = 'Peloton Live'
184 _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)'
185 _TEST = {
186 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b',
187 'info_dict': {
188 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc',
189 'title': '30 min HIIT Ride: Live from Home',
190 'ext': 'mp4',
191 'thumbnail': r're:^https?://.+\.png',
192 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817',
193 'creator': 'Alex Toussaint',
194 'release_timestamp': 1587736620,
195 'timestamp': 1587736620,
196 'upload_date': '20200424',
197 'duration': 2014,
198 'categories': ['Cycling'],
199 'is_live': False,
200 'chapters': 'count:3'
201 },
202 'params': {
356ac009
I
203 'skip_download': 'm3u8',
204 },
205 '_skip': 'Account needed'
206 }
207
208 def _real_extract(self, url):
209 workout_id = self._match_id(url)
210 peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id)
211
212 if peloton.get('ride_id'):
213 if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START':
214 return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id'])
215 else:
216 raise ExtractorError('Ride has not started', expected=True)
217 else:
218 raise ExtractorError('Missing video ID')