yt_dlp/extractor/freesound.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     float_or_none,
   6     get_element_by_class,
   7     get_element_by_id,
   8     unified_strdate,
   9 )
  10
  11
  12 class FreesoundIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?freesound\.org/people/[^/]+/sounds/(?P<id>[^/]+)'
  14     _TEST = {
  15         'url': 'http://www.freesound.org/people/miklovan/sounds/194503/',
  16         'md5': '12280ceb42c81f19a515c745eae07650',
  17         'info_dict': {
  18             'id': '194503',
  19             'ext': 'mp3',
  20             'title': 'gulls in the city.wav',
  21             'description': 'the sounds of seagulls in the city',
  22             'duration': 130.233,
  23             'uploader': 'miklovan',
  24             'upload_date': '20130715',
  25             'tags': list,
  26         },
  27     }
  28
  29     def _real_extract(self, url):
  30         audio_id = self._match_id(url)
  31
  32         webpage = self._download_webpage(url, audio_id)
  33
  34         audio_url = self._og_search_property('audio', webpage, 'song url')
  35         title = self._og_search_property('audio:title', webpage, 'song title')
  36
  37         description = self._html_search_regex(
  38             r'(?s)id=["\']sound_description["\'][^>]*>(.+?)</div>',
  39             webpage, 'description', fatal=False)
  40
  41         duration = float_or_none(
  42             get_element_by_class('duration', webpage), scale=1000)
  43
  44         upload_date = unified_strdate(get_element_by_id('sound_date', webpage))
  45         uploader = self._og_search_property(
  46             'audio:artist', webpage, 'uploader', fatal=False)
  47
  48         channels = self._html_search_regex(
  49             r'Channels</dt><dd>(.+?)</dd>', webpage,
  50             'channels info', fatal=False)
  51
  52         tags_str = get_element_by_class('tags', webpage)
  53         tags = re.findall(r'<a[^>]+>([^<]+)', tags_str) if tags_str else None
  54
  55         audio_url = re.sub(r'^https?://freesound\.org(https?://)', r'\1', audio_url)
  56         audio_urls = [audio_url]
  57
  58         LQ_FORMAT = '-lq.mp3'
  59         if LQ_FORMAT in audio_url:
  60             audio_urls.append(audio_url.replace(LQ_FORMAT, '-hq.mp3'))
  61
  62         formats = [{
  63             'url': format_url,
  64             'format_note': channels,
  65             'quality': quality,
  66         } for quality, format_url in enumerate(audio_urls)]
  67
  68         return {
  69             'id': audio_id,
  70             'title': title,
  71             'description': description,
  72             'duration': duration,
  73             'uploader': uploader,
  74             'upload_date': upload_date,
  75             'tags': tags,
  76             'formats': formats,
  77         }