yt_dlp/extractor/clyp.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     float_or_none,
   4     parse_qs,
   5     unified_timestamp,
   6 )
   7
   8
   9 class ClypIE(InfoExtractor):
  10     _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)'
  11     _TESTS = [{
  12         'url': 'https://clyp.it/iynkjk4b',
  13         'md5': '4bc6371c65210e7b372097fce4d92441',
  14         'info_dict': {
  15             'id': 'iynkjk4b',
  16             'ext': 'ogg',
  17             'title': 'research',
  18             'description': '#Research',
  19             'duration': 51.278,
  20             'timestamp': 1435524981,
  21             'upload_date': '20150628',
  22         },
  23     }, {
  24         'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d',
  25         'info_dict': {
  26             'id': 'b04p1odi',
  27             'ext': 'ogg',
  28             'title': 'GJ! (Reward Edit)',
  29             'description': 'Metal Resistance (THE ONE edition)',
  30             'duration': 177.789,
  31             'timestamp': 1528241278,
  32             'upload_date': '20180605',
  33         },
  34         'params': {
  35             'skip_download': True,
  36         },
  37     }, {
  38         'url': 'https://clyp.it/v42214lc',
  39         'md5': '4aca4dfc3236fb6d6ddc4ea08314f33f',
  40         'info_dict': {
  41             'id': 'v42214lc',
  42             'ext': 'wav',
  43             'title': 'i dont wanna go (old version)',
  44             'duration': 113.528,
  45             'timestamp': 1607348505,
  46             'upload_date': '20201207',
  47         },
  48     }]
  49
  50     def _real_extract(self, url):
  51         audio_id = self._match_id(url)
  52
  53         qs = parse_qs(url)
  54         token = qs.get('token', [None])[0]
  55
  56         query = {}
  57         if token:
  58             query['token'] = token
  59
  60         metadata = self._download_json(
  61             f'https://api.clyp.it/{audio_id}', audio_id, query=query)
  62
  63         formats = []
  64         for secure in ('', 'Secure'):
  65             for ext in ('Ogg', 'Mp3'):
  66                 format_id = f'{secure}{ext}'
  67                 format_url = metadata.get(f'{format_id}Url')
  68                 if format_url:
  69                     formats.append({
  70                         'url': format_url,
  71                         'format_id': format_id,
  72                         'vcodec': 'none',
  73                         'acodec': ext.lower(),
  74                     })
  75
  76         page = self._download_webpage(url, video_id=audio_id)
  77         wav_url = self._html_search_regex(
  78             r'var\s*wavStreamUrl\s*=\s*["\'](?P<url>https?://[^\'"]+)', page, 'url', default=None)
  79         if wav_url:
  80             formats.append({
  81                 'url': wav_url,
  82                 'format_id': 'wavStreamUrl',
  83                 'vcodec': 'none',
  84                 'acodec': 'wav',
  85             })
  86
  87         title = metadata['Title']
  88         description = metadata.get('Description')
  89         duration = float_or_none(metadata.get('Duration'))
  90         timestamp = unified_timestamp(metadata.get('DateCreated'))
  91
  92         return {
  93             'id': audio_id,
  94             'title': title,
  95             'description': description,
  96             'duration': duration,
  97             'timestamp': timestamp,
  98             'formats': formats,
  99         }