]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/peloton.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / peloton.py
CommitLineData
356ac009
I
1import json
2import re
ac668111 3import urllib.parse
356ac009
I
4
5from .common import InfoExtractor
3d2623a8 6from ..networking.exceptions import HTTPError
356ac009
I
7from ..utils import (
8 ExtractorError,
9 float_or_none,
10 str_or_none,
11 traverse_obj,
12 url_or_none,
13)
14
15
16class PelotonIE(InfoExtractor):
17 IE_NAME = 'peloton'
18 _NETRC_MACHINE = 'peloton'
19 _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)'
20 _TESTS = [{
21 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86',
22 'info_dict': {
23 'id': '0e9653eb53544eeb881298c8d7a87b86',
24 'title': '20 min Chest & Back Strength',
25 'ext': 'mp4',
26 'thumbnail': r're:^https?://.+\.jpg',
27 'description': 'md5:fcd5be9b9eda0194b470e13219050a66',
28 'creator': 'Chase Tucker',
29 'release_timestamp': 1556141400,
30 'timestamp': 1556141400,
31 'upload_date': '20190424',
32 'duration': 1389,
33 'categories': ['Strength'],
34 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'],
35 'is_live': False,
36 'chapters': 'count:1',
37 'subtitles': {'en': [{
38 'url': r're:^https?://.+',
39 'ext': 'vtt'
40 }]},
41 }, 'params': {
42 'skip_download': 'm3u8',
43 },
44 '_skip': 'Account needed'
45 }, {
46 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
47 'info_dict': {
48 'id': '26603d53d6bb4de1b340514864a6a6a8',
49 'title': '30 min Earth Day Run',
50 'ext': 'm4a',
51 'thumbnail': r're:https://.+\.jpg',
52 'description': 'md5:adc065a073934d7ee0475d217afe0c3d',
53 'creator': 'Selena Samuela',
54 'release_timestamp': 1587567600,
55 'timestamp': 1587567600,
56 'upload_date': '20200422',
57 'duration': 1802,
58 'categories': ['Running'],
59 'is_live': False,
60 'chapters': 'count:3'
61 }, 'params': {
62 'skip_download': 'm3u8',
63 },
64 '_skip': 'Account needed'
65 }]
66
67 _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
68
69 def _start_session(self, video_id):
70 self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session')
71
72 def _login(self, video_id):
73 username, password = self._get_login_info()
74 if not (username and password):
75 self.raise_login_required()
76 try:
77 self._download_json(
78 'https://api.onepeloton.com/auth/login', video_id, note='Logging in',
79 data=json.dumps({
80 'username_or_email': username,
81 'password': password,
82 'with_pubsub': False
83 }).encode(),
84 headers={'Content-Type': 'application/json', 'User-Agent': 'web'})
85 except ExtractorError as e:
3d2623a8 86 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
87 json_string = self._webpage_read_content(e.cause.response, None, video_id)
356ac009
I
88 res = self._parse_json(json_string, video_id)
89 raise ExtractorError(res['message'], expected=res['message'] == 'Login failed')
90 else:
91 raise
92
93 def _get_token(self, video_id):
94 try:
95 subscription = self._download_json(
96 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token',
97 data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'})
98 except ExtractorError as e:
3d2623a8 99 if isinstance(e.cause, HTTPError) and e.cause.status == 403:
100 json_string = self._webpage_read_content(e.cause.response, None, video_id)
356ac009
I
101 res = self._parse_json(json_string, video_id)
102 raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached')
103 else:
104 raise
105 return subscription['token']
106
107 def _real_extract(self, url):
108 video_id = self._match_id(url)
109 try:
110 self._start_session(video_id)
111 except ExtractorError as e:
3d2623a8 112 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
356ac009
I
113 self._login(video_id)
114 self._start_session(video_id)
115 else:
116 raise
117
118 metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id)
119 ride_data = metadata.get('ride')
120 if not ride_data:
121 raise ExtractorError('Missing stream metadata')
122 token = self._get_token(video_id)
123
124 is_live = False
125 if ride_data.get('content_format') == 'audio':
ac668111 126 url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token))
356ac009
I
127 formats = [{
128 'url': url,
129 'ext': 'm4a',
130 'format_id': 'audio',
131 'vcodec': 'none',
132 }]
133 subtitles = {}
134 else:
135 if ride_data.get('vod_stream_url'):
136 url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
137 ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
138 ride_data['vod_stream_url'],
ac668111 139 urllib.parse.quote(urllib.parse.quote(token)))
356ac009 140 elif ride_data.get('live_stream_url'):
ac668111 141 url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token))
356ac009
I
142 is_live = True
143 else:
144 raise ExtractorError('Missing video URL')
145 formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
146
147 if metadata.get('instructor_cues'):
148 subtitles['cues'] = [{
149 'data': json.dumps(metadata.get('instructor_cues')),
150 'ext': 'json'
151 }]
152
153 category = ride_data.get('fitness_discipline_display_name')
154 chapters = [{
155 'start_time': segment.get('start_time_offset'),
156 'end_time': segment.get('start_time_offset') + segment.get('length'),
157 'title': segment.get('name')
158 } for segment in traverse_obj(metadata, ('segments', 'segment_list'))]
159
356ac009
I
160 return {
161 'id': video_id,
162 'title': ride_data.get('title'),
163 'formats': formats,
164 'thumbnail': url_or_none(ride_data.get('image_url')),
165 'description': str_or_none(ride_data.get('description')),
166 'creator': traverse_obj(ride_data, ('instructor', 'name')),
167 'release_timestamp': ride_data.get('original_air_time'),
168 'timestamp': ride_data.get('original_air_time'),
169 'subtitles': subtitles,
170 'duration': float_or_none(ride_data.get('length')),
171 'categories': [category] if category else None,
172 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')),
173 'is_live': is_live,
174 'chapters': chapters
175 }
176
177
178class PelotonLiveIE(InfoExtractor):
179 IE_NAME = 'peloton:live'
180 IE_DESC = 'Peloton Live'
181 _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)'
182 _TEST = {
183 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b',
184 'info_dict': {
185 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc',
186 'title': '30 min HIIT Ride: Live from Home',
187 'ext': 'mp4',
188 'thumbnail': r're:^https?://.+\.png',
189 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817',
190 'creator': 'Alex Toussaint',
191 'release_timestamp': 1587736620,
192 'timestamp': 1587736620,
193 'upload_date': '20200424',
194 'duration': 2014,
195 'categories': ['Cycling'],
196 'is_live': False,
197 'chapters': 'count:3'
198 },
199 'params': {
356ac009
I
200 'skip_download': 'm3u8',
201 },
202 '_skip': 'Account needed'
203 }
204
205 def _real_extract(self, url):
206 workout_id = self._match_id(url)
207 peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id)
208
209 if peloton.get('ride_id'):
210 if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START':
211 return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id'])
212 else:
213 raise ExtractorError('Ride has not started', expected=True)
214 else:
215 raise ExtractorError('Missing video ID')