[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)

[yt-dlp.git] / yt_dlp / extractor / jamendo.py
diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py

index 490efa8fb4aadcbf626295338fbd8c40e818d32d..8557a81ad454f89224b420124b4c118be92bd539 100644 (file)
--- a/yt_dlp/extractor/jamendo.py
+++ b/yt_dlp/extractor/jamendo.py
@@ -1,11 +1,8 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import hashlib
  import random
  
-from ..compat import compat_str
  from .common import InfoExtractor
+from ..compat import compat_str
  from ..utils import (
      clean_html,
      int_or_none,
@@ -29,34 +26,54 @@ class JamendoIE(InfoExtractor):
              'id': '196219',
              'display_id': 'stories-from-emona-i',
              'ext': 'flac',
-            'title': 'Maya Filipič - Stories from Emona I',
+            # 'title': 'Maya Filipič - Stories from Emona I',
+            'title': 'Stories from Emona I',
              'artist': 'Maya Filipič',
+            'album': 'Between two worlds',
              'track': 'Stories from Emona I',
              'duration': 210,
-            'thumbnail': r're:^https?://.*\.jpg',
+            'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219',
              'timestamp': 1217438117,
              'upload_date': '20080730',
+            'license': 'by-nc-nd',
+            'view_count': int,
+            'like_count': int,
+            'average_rating': int,
+            'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
          }
      }, {
          'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
          'only_matching': True,
      }]
  
+    def _call_api(self, resource, resource_id, fatal=True):
+        path = '/api/%ss' % resource
+        rand = compat_str(random.random())
+        return self._download_json(
+            'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={
+                'id[]': resource_id,
+            }, headers={
+                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+            })[0]
+
      def _real_extract(self, url):
-        track_id, display_id = self._VALID_URL_RE.match(url).groups()
-        webpage = self._download_webpage(
-            'https://www.jamendo.com/track/' + track_id, track_id)
-        models = self._parse_json(self._html_search_regex(
-            r"data-bundled-models='([^']+)",
-            webpage, 'bundled models'), track_id)
-        track = models['track']['models'][0]
+        track_id, display_id = self._match_valid_url(url).groups()
+        # webpage = self._download_webpage(
+        #     'https://www.jamendo.com/track/' + track_id, track_id)
+        # models = self._parse_json(self._html_search_regex(
+        #     r"data-bundled-models='([^']+)",
+        #     webpage, 'bundled models'), track_id)
+        # track = models['track']['models'][0]
+        track = self._call_api('track', track_id)
          title = track_name = track['name']
-        get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
-        artist = get_model('artist')
-        artist_name = artist.get('name')
-        if artist_name:
-            title = '%s - %s' % (artist_name, title)
-        album = get_model('album')
+        # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+        # artist = get_model('artist')
+        # artist_name = artist.get('name')
+        # if artist_name:
+        #     title = '%s - %s' % (artist_name, title)
+        # album = get_model('album')
+        artist = self._call_api("artist", track.get('artistId'), fatal=False)
+        album = self._call_api("album", track.get('albumId'), fatal=False)
  
          formats = [{
              'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@@ -70,11 +87,10 @@ def _real_extract(self, url):
              ('ogg1', 'ogg', 'ogg'),
              ('flac', 'flac', 'flac'),
          ))]
-        self._sort_formats(formats)
  
          urls = []
          thumbnails = []
-        for _, covers in track.get('cover', {}).items():
+        for covers in (track.get('cover') or {}).values():
              for cover_id, cover_url in covers.items():
                  if not cover_url or cover_url in urls:
                      continue
@@ -88,13 +104,14 @@ def _real_extract(self, url):
                  })
  
          tags = []
-        for tag in track.get('tags', []):
+        for tag in (track.get('tags') or []):
              tag_name = tag.get('name')
              if not tag_name:
                  continue
              tags.append(tag_name)
  
          stats = track.get('stats') or {}
+        license = track.get('licenseCC') or []
  
          return {
              'id': track_id,
@@ -103,11 +120,11 @@ def _real_extract(self, url):
              'title': title,
              'description': track.get('description'),
              'duration': int_or_none(track.get('duration')),
-            'artist': artist_name,
+            'artist': artist.get('name'),
              'track': track_name,
              'album': album.get('name'),
              'formats': formats,
-            'license': '-'.join(track.get('licenseCC', [])) or None,
+            'license': '-'.join(license) if license else None,
              'timestamp': int_or_none(track.get('dateCreated')),
              'view_count': int_or_none(stats.get('listenedAll')),
              'like_count': int_or_none(stats.get('favorited')),
@@ -116,9 +133,9 @@ def _real_extract(self, url):
          }
  
  
-class JamendoAlbumIE(InfoExtractor):
+class JamendoAlbumIE(JamendoIE):  # XXX: Do not subclass from concrete IE
      _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
          'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
          'info_dict': {
              'id': '121486',
@@ -130,38 +147,44 @@ class JamendoAlbumIE(InfoExtractor):
              'info_dict': {
                  'id': '1032333',
                  'ext': 'flac',
-                'title': 'Shearer - Warmachine',
+                'title': 'Warmachine',
                  'artist': 'Shearer',
                  'track': 'Warmachine',
                  'timestamp': 1368089771,
                  'upload_date': '20130509',
+                'view_count': int,
+                'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333',
+                'duration': 190,
+                'license': 'by',
+                'album': 'Duck On Cover',
+                'average_rating': 4,
+                'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'],
+                'like_count': int,
              }
          }, {
              'md5': '1f358d7b2f98edfe90fd55dac0799d50',
              'info_dict': {
                  'id': '1032330',
                  'ext': 'flac',
-                'title': 'Shearer - Without Your Ghost',
+                'title': 'Without Your Ghost',
                  'artist': 'Shearer',
                  'track': 'Without Your Ghost',
                  'timestamp': 1368089771,
                  'upload_date': '20130509',
+                'duration': 192,
+                'tags': ['rock', 'drums', 'bass', 'world', 'punk'],
+                'album': 'Duck On Cover',
+                'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330',
+                'view_count': int,
+                'average_rating': 4,
+                'license': 'by',
+                'like_count': int,
              }
          }],
          'params': {
              'playlistend': 2
          }
-    }
-
-    def _call_api(self, resource, resource_id):
-        path = '/api/%ss' % resource
-        rand = compat_str(random.random())
-        return self._download_json(
-            'https://www.jamendo.com' + path, resource_id, query={
-                'id[]': resource_id,
-            }, headers={
-                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
-            })[0]
+    }]
  
      def _real_extract(self, url):
          album_id = self._match_id(url)
@@ -169,7 +192,7 @@ def _real_extract(self, url):
          album_name = album.get('name')
  
          entries = []
-        for track in album.get('tracks', []):
+        for track in (album.get('tracks') or []):
              track_id = track.get('id')
              if not track_id:
                  continue