]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/bandcamp.py
[extractors] Use new framework for existing embeds (#4307)
[yt-dlp.git] / yt_dlp / extractor / bandcamp.py
index 5863eaeca625d95f07e8d229e6d3f444e8ce206d..b34fcb10817b7f9554a6b600f22f2df4ae546788 100644 (file)
@@ -22,6 +22,7 @@
 
 class BandcampIE(InfoExtractor):
     _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+    _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
     _TESTS = [{
         'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
         'md5': 'c557841d5e50261777a6585648adf439',
@@ -436,7 +437,7 @@ def _real_extract(self, url):
         uploader = self._match_id(url)
         webpage = self._download_webpage(url, uploader)
 
-        discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
+        discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
                             or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
 
         return self.playlist_from_matches(