]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/playsuisse.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / playsuisse.py
CommitLineData
ee164987
SB
1import json
2
3from .common import InfoExtractor
cae6e461
CK
4from ..utils import (
5 ExtractorError,
6 int_or_none,
7 parse_qs,
8 traverse_obj,
9 update_url_query,
10 urlencode_postdata,
11)
ee164987
SB
12
13
14class PlaySuisseIE(InfoExtractor):
cae6e461 15 _NETRC_MACHINE = 'playsuisse'
94627c5d 16 _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)'
ee164987
SB
17 _TESTS = [
18 {
94627c5d 19 # Old URL
ee164987 20 'url': 'https://www.playsuisse.ch/watch/763211/0',
94627c5d
SB
21 'only_matching': True,
22 },
23 {
24 # episode in a series
25 'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211',
ee164987
SB
26 'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
27 'info_dict': {
28 'id': '763211',
29 'ext': 'mp4',
30 'title': 'Knochen',
31 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
32 'duration': 3344,
33 'series': 'Wilder',
34 'season': 'Season 1',
35 'season_number': 1,
36 'episode': 'Knochen',
37 'episode_number': 1,
94627c5d 38 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
ee164987 39 }
94627c5d
SB
40 }, {
41 # film
42 'url': 'https://www.playsuisse.ch/watch/808675',
ee164987
SB
43 'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
44 'info_dict': {
45 'id': '808675',
46 'ext': 'mp4',
47 'title': 'Der Läufer',
48 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
49 'duration': 5280,
94627c5d 50 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
ee164987 51 }
94627c5d
SB
52 }, {
53 # series (treated as a playlist)
54 'url': 'https://www.playsuisse.ch/detail/1115687',
ee164987 55 'info_dict': {
94627c5d
SB
56 'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3',
57 'id': '1115687',
58 'series': 'They all came out to Montreux',
59 'title': 'They all came out to Montreux',
60 },
61 'playlist': [{
62 'info_dict': {
63 'description': 'md5:f2462744834b959a31adc6292380cda2',
64 'duration': 3180,
65 'episode': 'Folge 1',
66 'episode_number': 1,
67 'id': '1112663',
68 'season': 'Season 1',
69 'season_number': 1,
70 'series': 'They all came out to Montreux',
71 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
72 'title': 'Folge 1',
73 'ext': 'mp4'
74 },
75 }, {
76 'info_dict': {
77 'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27',
78 'duration': 2935,
79 'episode': 'Folge 2',
80 'episode_number': 2,
81 'id': '1112661',
82 'season': 'Season 1',
83 'season_number': 1,
84 'series': 'They all came out to Montreux',
85 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
86 'title': 'Folge 2',
87 'ext': 'mp4'
88 },
89 }, {
90 'info_dict': {
91 'description': 'md5:14a93a3356b2492a8f786ab2227ef602',
92 'duration': 2994,
93 'episode': 'Folge 3',
94 'episode_number': 3,
95 'id': '1112664',
96 'season': 'Season 1',
97 'season_number': 1,
98 'series': 'They all came out to Montreux',
99 'thumbnail': 're:https://playsuisse-img.akamaized.net/',
100 'title': 'Folge 3',
101 'ext': 'mp4'
102 }
103 }],
ee164987
SB
104 }
105 ]
106
107 _GRAPHQL_QUERY = '''
108 query AssetWatch($assetId: ID!) {
109 assetV2(id: $assetId) {
110 ...Asset
111 episodes {
112 ...Asset
113 }
114 }
115 }
116 fragment Asset on AssetV2 {
117 id
118 name
119 description
120 duration
121 episodeNumber
122 seasonNumber
123 seriesName
124 medias {
125 type
126 url
127 }
128 thumbnail16x9 {
129 ...ImageDetails
130 }
131 thumbnail2x3 {
132 ...ImageDetails
133 }
134 thumbnail16x9WithTitle {
135 ...ImageDetails
136 }
137 thumbnail2x3WithTitle {
138 ...ImageDetails
139 }
140 }
141 fragment ImageDetails on AssetImage {
142 id
143 url
144 }'''
cae6e461
CK
145 _LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com'
146 _LOGIN_PATH = 'B2C_1A__SignInV2'
147 _ID_TOKEN = None
148
149 def _perform_login(self, username, password):
150 login_page = self._download_webpage(
151 'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page',
152 query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'})
153 settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None)
154
155 csrf_token = settings['csrf']
156 query = {'tx': settings['transId'], 'p': self._LOGIN_PATH}
157
158 status = traverse_obj(self._download_json(
159 f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in',
160 query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({
161 'request_type': 'RESPONSE',
162 'signInName': username,
163 'password': password
164 }), expected_status=400), ('status', {int_or_none}))
165 if status == 400:
166 raise ExtractorError('Invalid username or password', expected=True)
167
168 urlh = self._request_webpage(
169 f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed',
170 None, 'Downloading ID token', query={
171 'rememberMe': 'false',
172 'csrf_token': csrf_token,
173 **query,
174 'diags': '',
175 })
176
177 self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0))
178 if not self._ID_TOKEN:
179 raise ExtractorError('Login failed')
ee164987
SB
180
181 def _get_media_data(self, media_id):
182 # NOTE In the web app, the "locale" header is used to switch between languages,
183 # However this doesn't seem to take effect when passing the header here.
184 response = self._download_json(
cae6e461 185 'https://www.playsuisse.ch/api/graphql',
ee164987
SB
186 media_id, data=json.dumps({
187 'operationName': 'AssetWatch',
188 'query': self._GRAPHQL_QUERY,
189 'variables': {'assetId': media_id}
190 }).encode('utf-8'),
191 headers={'Content-Type': 'application/json', 'locale': 'de'})
192
193 return response['data']['assetV2']
194
195 def _real_extract(self, url):
cae6e461
CK
196 if not self._ID_TOKEN:
197 self.raise_login_required(method='password')
198
ee164987
SB
199 media_id = self._match_id(url)
200 media_data = self._get_media_data(media_id)
201 info = self._extract_single(media_data)
202 if media_data.get('episodes'):
203 info.update({
204 '_type': 'playlist',
205 'entries': map(self._extract_single, media_data['episodes']),
206 })
207 return info
208
209 def _extract_single(self, media_data):
210 thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
211
212 formats, subtitles = [], {}
213 for media in traverse_obj(media_data, 'medias', default=[]):
214 if not media.get('url') or media.get('type') != 'HLS':
215 continue
216 f, subs = self._extract_m3u8_formats_and_subtitles(
cae6e461
CK
217 update_url_query(media['url'], {'id_token': self._ID_TOKEN}),
218 media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
ee164987
SB
219 formats.extend(f)
220 self._merge_subtitles(subs, target=subtitles)
221
222 return {
223 'id': media_data['id'],
224 'title': media_data.get('name'),
225 'description': media_data.get('description'),
226 'thumbnails': thumbnails,
227 'duration': int_or_none(media_data.get('duration')),
228 'formats': formats,
229 'subtitles': subtitles,
230 'series': media_data.get('seriesName'),
231 'season_number': int_or_none(media_data.get('seasonNumber')),
94627c5d 232 'episode': media_data.get('name') if media_data.get('episodeNumber') else None,
ee164987
SB
233 'episode_number': int_or_none(media_data.get('episodeNumber')),
234 }