yt_dlp/extractor/soundgasm.py

   1 import re
   2
   3 from .common import InfoExtractor
   4
   5
   6 class SoundgasmIE(InfoExtractor):
   7     IE_NAME = 'soundgasm'
   8     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
   9     _TEST = {
  10         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
  11         'md5': '010082a2c802c5275bb00030743e75ad',
  12         'info_dict': {
  13             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
  14             'ext': 'm4a',
  15             'title': 'Piano sample',
  16             'description': 'Royalty Free Sample Music',
  17             'uploader': 'ytdl',
  18         },
  19     }
  20
  21     def _real_extract(self, url):
  22         mobj = self._match_valid_url(url)
  23         display_id = mobj.group('display_id')
  24
  25         webpage = self._download_webpage(url, display_id)
  26
  27         audio_url = self._html_search_regex(
  28             r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  29             'audio URL', group='url')
  30
  31         title = self._search_regex(
  32             r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
  33             webpage, 'title', default=display_id)
  34
  35         description = self._html_search_regex(
  36             (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
  37              r'(?s)<li>Description:\s(.*?)<\/li>'),
  38             webpage, 'description', fatal=False)
  39
  40         audio_id = self._search_regex(
  41             r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
  42
  43         return {
  44             'id': audio_id,
  45             'display_id': display_id,
  46             'url': audio_url,
  47             'vcodec': 'none',
  48             'title': title,
  49             'description': description,
  50             'uploader': mobj.group('user'),
  51         }
  52
  53
  54 class SoundgasmProfileIE(InfoExtractor):
  55     IE_NAME = 'soundgasm:profile'
  56     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
  57     _TEST = {
  58         'url': 'http://soundgasm.net/u/ytdl',
  59         'info_dict': {
  60             'id': 'ytdl',
  61         },
  62         'playlist_count': 1,
  63     }
  64
  65     def _real_extract(self, url):
  66         profile_id = self._match_id(url)
  67
  68         webpage = self._download_webpage(url, profile_id)
  69
  70         entries = [
  71             self.url_result(audio_url, 'Soundgasm')
  72             for audio_url in re.findall(rf'href="([^"]+/u/{profile_id}/[^"]+)', webpage)]
  73
  74         return self.playlist_result(entries, profile_id)