[extractors] Use new framework for existing embeds (#4307)

[yt-dlp.git] / yt_dlp / extractor / bandcamp.py
diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py

index 5863eaeca625d95f07e8d229e6d3f444e8ce206d..b34fcb10817b7f9554a6b600f22f2df4ae546788 100644 (file)
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@@ -22,6 +22,7 @@
  
  class BandcampIE(InfoExtractor):
      _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
+    _EMBED_REGEX = [r'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
      _TESTS = [{
          'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
          'md5': 'c557841d5e50261777a6585648adf439',
@@ -436,7 +437,7 @@ def _real_extract(self, url):
          uploader = self._match_id(url)
          webpage = self._download_webpage(url, uploader)
  
-        discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
+        discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage)
                              or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
  
          return self.playlist_from_matches(