]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/yandexmusic.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / yandexmusic.py
index c30c438f8b941854fe97284d0003524704814a18..12cc5ca28eea174c8fd20f7e8a7dbb2f2d063ab6 100644 (file)
@@ -1,16 +1,11 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import hashlib
 import itertools
-import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
     ExtractorError,
-    int_or_none,
     float_or_none,
+    int_or_none,
     try_get,
 )
 
@@ -39,19 +34,19 @@ def _raise_captcha():
             expected=True)
 
     def _download_webpage_handle(self, *args, **kwargs):
-        webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
+        webpage = super()._download_webpage_handle(*args, **kwargs)
         if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage:
             self._raise_captcha()
         return webpage
 
     def _download_json(self, *args, **kwargs):
-        response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+        response = super()._download_json(*args, **kwargs)
         self._handle_error(response)
         return response
 
     def _call_api(self, ep, tld, url, item_id, note, query):
         return self._download_json(
-            'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
+            f'https://music.yandex.{tld}/handlers/{ep}.jsx',
             item_id, note,
             fatal=False,
             headers={
@@ -65,7 +60,7 @@ def _call_api(self, ep, tld, url, item_id, note, query):
 class YandexMusicTrackIE(YandexMusicBaseIE):
     IE_NAME = 'yandexmusic:track'
     IE_DESC = 'Яндекс.Музыка - Трек'
-    _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://music.yandex.ru/album/540508/track/4878838',
@@ -109,25 +104,24 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
 
         track = self._call_api(
             'track', tld, url, track_id, 'Downloading track JSON',
-            {'track': '%s:%s' % (track_id, album_id)})['track']
+            {'track': f'{track_id}:{album_id}'})['track']
         track_title = track['title']
 
         download_data = self._download_json(
-            'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
-            track_id, 'Downloading track location url JSON',
-            headers={'X-Retpath-Y': url})
+            f'https://music.yandex.ru/api/v2.1/handlers/track/{track_id}:{album_id}/web-album_track-track-track-main/download/m',
+            track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url})
 
         fd_data = self._download_json(
             download_data['src'], track_id,
             'Downloading track location JSON',
             query={'format': 'json'})
-        key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
-        f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
+        key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode()).hexdigest()
+        f_url = 'http://{}/get-mp3/{}/{}?track-id={} '.format(fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
 
         thumbnail = None
         cover_uri = track.get('albums', [{}])[0].get('coverUri')
@@ -156,7 +150,7 @@ def extract_artist_name(artist):
             for element in decomposed:
                 if isinstance(element, dict) and element.get('name'):
                     parts.append(element['name'])
-                elif isinstance(element, compat_str):
+                elif isinstance(element, str):
                     parts.append(element)
             return ''.join(parts)
 
@@ -188,7 +182,7 @@ def extract_artist(artist_list):
         if track_artist:
             track_info.update({
                 'artist': track_artist,
-                'title': '%s - %s' % (track_artist, track_title),
+                'title': f'{track_artist} - {track_title}',
             })
         else:
             track_info['title'] = track_title
@@ -199,14 +193,14 @@ def extract_artist(artist_list):
 class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
     def _extract_tracks(self, source, item_id, url, tld):
         tracks = source['tracks']
-        track_ids = [compat_str(track_id) for track_id in source['trackIds']]
+        track_ids = [str(track_id) for track_id in source['trackIds']]
 
         # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
         # missing tracks should be retrieved manually.
         if len(tracks) < len(track_ids):
-            present_track_ids = set([
-                compat_str(track['id'])
-                for track in tracks if track.get('id')])
+            present_track_ids = {
+                str(track['id'])
+                for track in tracks if track.get('id')}
             missing_track_ids = [
                 track_id for track_id in track_ids
                 if track_id not in present_track_ids]
@@ -220,10 +214,10 @@ def _extract_tracks(self, source, item_id, url, tld):
                 assert missing_track_ids_req
                 missing_tracks = self._call_api(
                     'track-entries', tld, url, item_id,
-                    'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), {
+                    f'Downloading missing tracks JSON chunk {chunk_num + 1}', {
                         'entries': ','.join(missing_track_ids_req),
                         'lang': tld,
-                        'external-domain': 'music.yandex.%s' % tld,
+                        'external-domain': f'music.yandex.{tld}',
                         'overembed': 'false',
                         'strict': 'true',
                     })
@@ -250,7 +244,7 @@ def _build_playlist(self, tracks):
             if not album_id:
                 continue
             entries.append(self.url_result(
-                'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
+                f'http://music.yandex.ru/album/{album_id}/track/{track_id}',
                 ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
         return entries
 
@@ -258,7 +252,7 @@ def _build_playlist(self, tracks):
 class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
     IE_NAME = 'yandexmusic:album'
     IE_DESC = 'Яндекс.Музыка - Альбом'
-    _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://music.yandex.ru/album/540508',
@@ -288,10 +282,10 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
 
     @classmethod
     def suitable(cls, url):
-        return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url)
+        return False if YandexMusicTrackIE.suitable(url) else super().suitable(url)
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld = mobj.group('tld')
         album_id = mobj.group('id')
 
@@ -302,20 +296,20 @@ def _real_extract(self, url):
         entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
 
         title = album['title']
-        artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
+        artist = try_get(album, lambda x: x['artists'][0]['name'], str)
         if artist:
-            title = '%s - %s' % (artist, title)
+            title = f'{artist} - {title}'
         year = album.get('year')
         if year:
-            title += ' (%s)' % year
+            title += f' ({year})'
 
-        return self.playlist_result(entries, compat_str(album['id']), title)
+        return self.playlist_result(entries, str(album['id']), title)
 
 
 class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
     IE_NAME = 'yandexmusic:playlist'
     IE_DESC = 'Яндекс.Музыка - Плейлист'
-    _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE
+    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
@@ -342,7 +336,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld = mobj.group('tld')
         user = mobj.group('user')
         playlist_id = mobj.group('id')
@@ -353,7 +347,7 @@ def _real_extract(self, url):
                 'kinds': playlist_id,
                 'light': 'true',
                 'lang': tld,
-                'external-domain': 'music.yandex.%s' % tld,
+                'external-domain': f'music.yandex.{tld}',
                 'overembed': 'false',
             })['playlist']
 
@@ -361,7 +355,7 @@ def _real_extract(self, url):
 
         return self.playlist_result(
             self._build_playlist(tracks),
-            compat_str(playlist_id),
+            str(playlist_id),
             playlist.get('title'), playlist.get('description'))
 
 
@@ -369,24 +363,24 @@ class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
     def _call_artist(self, tld, url, artist_id):
         return self._call_api(
             'artist', tld, url, artist_id,
-            'Downloading artist %s JSON' % self._ARTIST_WHAT, {
+            f'Downloading artist {self._ARTIST_WHAT} JSON', {
                 'artist': artist_id,
                 'what': self._ARTIST_WHAT,
                 'sort': self._ARTIST_SORT or '',
                 'dir': '',
                 'period': '',
                 'lang': tld,
-                'external-domain': 'music.yandex.%s' % tld,
+                'external-domain': f'music.yandex.{tld}',
                 'overembed': 'false',
             })
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld = mobj.group('tld')
         artist_id = mobj.group('id')
         data = self._call_artist(tld, url, artist_id)
         tracks = self._extract_tracks(data, artist_id, url, tld)
-        title = try_get(data, lambda x: x['artist']['name'], compat_str)
+        title = try_get(data, lambda x: x['artist']['name'], str)
         return self.playlist_result(
             self._build_playlist(tracks), artist_id, title)
 
@@ -394,7 +388,7 @@ def _real_extract(self, url):
 class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
     IE_NAME = 'yandexmusic:artist:tracks'
     IE_DESC = 'Яндекс.Музыка - Артист - Треки'
-    _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE
+    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/tracks'
 
     _TESTS = [{
         'url': 'https://music.yandex.ru/artist/617526/tracks',
@@ -410,13 +404,13 @@ class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
     _ARTIST_WHAT = 'tracks'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld = mobj.group('tld')
         artist_id = mobj.group('id')
         data = self._call_artist(tld, url, artist_id)
         tracks = self._extract_tracks(data, artist_id, url, tld)
-        artist = try_get(data, lambda x: x['artist']['name'], compat_str)
-        title = '%s - %s' % (artist or artist_id, 'Треки')
+        artist = try_get(data, lambda x: x['artist']['name'], str)
+        title = '{} - {}'.format(artist or artist_id, 'Треки')
         return self.playlist_result(
             self._build_playlist(tracks), artist_id, title)
 
@@ -424,7 +418,7 @@ def _real_extract(self, url):
 class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
     IE_NAME = 'yandexmusic:artist:albums'
     IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
-    _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE
+    _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/albums'
 
     _TESTS = [{
         'url': 'https://music.yandex.ru/artist/617526/albums',
@@ -440,7 +434,7 @@ class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
     _ARTIST_WHAT = 'albums'
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         tld = mobj.group('tld')
         artist_id = mobj.group('id')
         data = self._call_artist(tld, url, artist_id)
@@ -452,8 +446,8 @@ def _real_extract(self, url):
             if not album_id:
                 continue
             entries.append(self.url_result(
-                'http://music.yandex.ru/album/%s' % album_id,
+                f'http://music.yandex.ru/album/{album_id}',
                 ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
-        artist = try_get(data, lambda x: x['artist']['name'], compat_str)
-        title = '%s - %s' % (artist or artist_id, 'Альбомы')
+        artist = try_get(data, lambda x: x['artist']['name'], str)
+        title = '{} - {}'.format(artist or artist_id, 'Альбомы')
         return self.playlist_result(entries, artist_id, title)