]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/digitalconcerthall.py
[tumblr] Fix 403 errors and handle vimeo embeds (#2542)
[yt-dlp.git] / yt_dlp / extractor / digitalconcerthall.py
CommitLineData
8bcd4048 1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5
6from ..utils import (
7 ExtractorError,
8 parse_resolution,
9 traverse_obj,
10 try_get,
11 urlencode_postdata,
12)
13
14
15class DigitalConcertHallIE(InfoExtractor):
16 IE_DESC = 'DigitalConcertHall extractor'
17 _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/concert/(?P<id>[0-9]+)'
18 _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
19 _ACCESS_TOKEN = None
20 _NETRC_MACHINE = 'digitalconcerthall'
21 _TESTS = [{
22 'note': 'Playlist with only one video',
23 'url': 'https://www.digitalconcerthall.com/en/concert/53201',
24 'info_dict': {
25 'id': '53201-1',
26 'ext': 'mp4',
27 'composer': 'Kurt Weill',
28 'title': '[Magic Night]',
29 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
30 'upload_date': '20210624',
31 'timestamp': 1624548600,
32 'duration': 2798,
33 'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler',
34 },
35 'params': {'skip_download': 'm3u8'},
36 }, {
37 'note': 'Concert with several works and an interview',
38 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
39 'info_dict': {
40 'id': '53785',
41 'album_artist': 'Berliner Philharmoniker / Kirill Petrenko',
42 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
43 },
44 'params': {'skip_download': 'm3u8'},
45 'playlist_count': 3,
46 }]
47
48 def _login(self):
49 username, password = self._get_login_info()
50 if not username:
51 self.raise_login_required()
52 token_response = self._download_json(
53 self._OAUTH_URL,
54 None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
55 'affiliate': 'none',
56 'grant_type': 'device',
57 'device_vendor': 'unknown',
58 'app_id': 'dch.webapp',
59 'app_version': '1.0.0',
60 'client_secret': '2ySLN+2Fwb',
61 }), headers={
62 'Content-Type': 'application/x-www-form-urlencoded',
63 })
64 self._ACCESS_TOKEN = token_response['access_token']
65 try:
66 self._download_json(
67 self._OAUTH_URL,
68 None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
69 'grant_type': 'password',
70 'username': username,
71 'password': password,
72 }), headers={
73 'Content-Type': 'application/x-www-form-urlencoded',
74 'Referer': 'https://www.digitalconcerthall.com',
75 'Authorization': f'Bearer {self._ACCESS_TOKEN}'
76 })
77 except ExtractorError:
78 self.raise_login_required(msg='Login info incorrect')
79
80 def _real_initialize(self):
81 self._login()
82
83 def _entries(self, items, language, **kwargs):
84 for item in items:
85 video_id = item['id']
86 stream_info = self._download_json(
87 self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
88 'Accept': 'application/json',
89 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
90 'Accept-Language': language
91 })
92
93 m3u8_url = traverse_obj(
94 stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
95 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
96 self._sort_formats(formats)
97
98 yield {
99 'id': video_id,
100 'title': item.get('title'),
101 'composer': item.get('name_composer'),
102 'url': m3u8_url,
103 'formats': formats,
104 'duration': item.get('duration_total'),
105 'timestamp': traverse_obj(item, ('date', 'published')),
106 'description': item.get('short_description') or stream_info.get('short_description'),
107 **kwargs,
108 'chapters': [{
109 'start_time': chapter.get('time'),
110 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']),
111 'title': chapter.get('text'),
112 } for chapter in item['cuepoints']] if item.get('cuepoints') else None,
113 }
114
115 def _real_extract(self, url):
116 language, video_id = self._match_valid_url(url).group('language', 'id')
117 if not language:
118 language = 'en'
119
120 thumbnail_url = self._html_search_regex(
121 r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
122 self._download_webpage(url, video_id), 'thumbnail')
123 thumbnails = [{
124 'url': thumbnail_url,
125 **parse_resolution(thumbnail_url)
126 }]
127
128 vid_info = self._download_json(
129 f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={
130 'Accept': 'application/json',
131 'Accept-Language': language
132 })
133 album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '')
134
135 return {
136 '_type': 'playlist',
137 'id': video_id,
138 'title': vid_info.get('title'),
139 'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language,
140 thumbnails=thumbnails, album_artist=album_artist),
141 'thumbnails': thumbnails,
142 'album_artist': album_artist,
143 }