]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/digitalconcerthall.py
[extractor] Add `_perform_login` function (#2943)
[yt-dlp.git] / yt_dlp / extractor / digitalconcerthall.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6 from ..utils import (
7 ExtractorError,
8 parse_resolution,
9 traverse_obj,
10 try_get,
11 urlencode_postdata,
12 )
13
14
15 class DigitalConcertHallIE(InfoExtractor):
16 IE_DESC = 'DigitalConcertHall extractor'
17 _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)'
18 _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
19 _ACCESS_TOKEN = None
20 _NETRC_MACHINE = 'digitalconcerthall'
21 _TESTS = [{
22 'note': 'Playlist with only one video',
23 'url': 'https://www.digitalconcerthall.com/en/concert/53201',
24 'info_dict': {
25 'id': '53201-1',
26 'ext': 'mp4',
27 'composer': 'Kurt Weill',
28 'title': '[Magic Night]',
29 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
30 'upload_date': '20210624',
31 'timestamp': 1624548600,
32 'duration': 2798,
33 'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
34 },
35 'params': {'skip_download': 'm3u8'},
36 }, {
37 'note': 'Concert with several works and an interview',
38 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
39 'info_dict': {
40 'id': '53785',
41 'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
42 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
43 },
44 'params': {'skip_download': 'm3u8'},
45 'playlist_count': 3,
46 }]
47
48 def _perform_login(self, username, password):
49 token_response = self._download_json(
50 self._OAUTH_URL,
51 None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
52 'affiliate': 'none',
53 'grant_type': 'device',
54 'device_vendor': 'unknown',
55 'app_id': 'dch.webapp',
56 'app_version': '1.0.0',
57 'client_secret': '2ySLN+2Fwb',
58 }), headers={
59 'Content-Type': 'application/x-www-form-urlencoded',
60 })
61 self._ACCESS_TOKEN = token_response['access_token']
62 try:
63 self._download_json(
64 self._OAUTH_URL,
65 None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
66 'grant_type': 'password',
67 'username': username,
68 'password': password,
69 }), headers={
70 'Content-Type': 'application/x-www-form-urlencoded',
71 'Referer': 'https://www.digitalconcerthall.com',
72 'Authorization': f'Bearer {self._ACCESS_TOKEN}'
73 })
74 except ExtractorError:
75 self.raise_login_required(msg='Login info incorrect')
76
77 def _real_initialize(self):
78 if not self._ACCESS_TOKEN:
79 self.raise_login_required(method='password')
80
81 def _entries(self, items, language, **kwargs):
82 for item in items:
83 video_id = item['id']
84 stream_info = self._download_json(
85 self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
86 'Accept': 'application/json',
87 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
88 'Accept-Language': language
89 })
90
91 m3u8_url = traverse_obj(
92 stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
93 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
94 self._sort_formats(formats)
95
96 yield {
97 'id': video_id,
98 'title': item.get('title'),
99 'composer': item.get('name_composer'),
100 'url': m3u8_url,
101 'formats': formats,
102 'duration': item.get('duration_total'),
103 'timestamp': traverse_obj(item, ('date', 'published')),
104 'description': item.get('short_description') or stream_info.get('short_description'),
105 **kwargs,
106 'chapters': [{
107 'start_time': chapter.get('time'),
108 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
109 'title': chapter.get('text'),
110 } for chapter in item['cuepoints']] if item.get('cuepoints') else None,
111 }
112
113 def _real_extract(self, url):
114 language, video_id = self._match_valid_url(url).group('language', 'id')
115 if not language:
116 language = 'en'
117
118 thumbnail_url = self._html_search_regex(
119 r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
120 self._download_webpage(url, video_id), 'thumbnail')
121 thumbnails = [{
122 'url': thumbnail_url,
123 **parse_resolution(thumbnail_url)
124 }]
125
126 vid_info = self._download_json(
127 f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={
128 'Accept': 'application/json',
129 'Accept-Language': language
130 })
131 album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
132
133 return {
134 '_type': 'playlist',
135 'id': video_id,
136 'title': vid_info.get('title'),
137 'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language,
138 thumbnails=thumbnails, album_artist=album_artist),
139 'thumbnails': thumbnails,
140 'album_artist': album_artist,
141 }