]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/vocaroo.py
[extractor/vocaroo] Add extractor (#6117)
[yt-dlp.git] / yt_dlp / extractor / vocaroo.py
CommitLineData
e4a8b176 1from .common import InfoExtractor
2from ..utils import (
3 HEADRequest,
4 float_or_none,
5)
6
7
8class VocarooIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?(?:vocaroo\.com|voca\.ro)/(?:embed/)?(?P<id>\w+)'
10 _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?vocaroo\.com/embed/.+?)\1']
11 _TESTS = [
12 {
13 'url': 'https://vocaroo.com/1de8yA3LNe77',
14 'md5': 'c557841d5e50261777a6585648adf439',
15 'info_dict': {
16 'id': '1de8yA3LNe77',
17 'ext': 'mp3',
18 'title': 'Vocaroo video #1de8yA3LNe77',
19 'timestamp': 1675059800.370,
20 'upload_date': '20230130',
21 },
22 },
23 {
24 'url': 'https://vocaroo.com/embed/12WqtjLnpj6g?autoplay=0',
25 'only_matching': True,
26 },
27 {
28 'url': 'https://voca.ro/12D52rgpzkB0',
29 'only_matching': True,
30 },
31 ]
32
33 _WEBPAGE_TESTS = [
34 {
35 'url': 'https://qbnu.github.io/cool.html',
36 'md5': 'f322e529275dd8a47994919eeac404a5',
37 'info_dict': {
38 'id': '19cgWmKO6AmC',
39 'ext': 'mp3',
40 'title': 'Vocaroo video #19cgWmKO6AmC',
41 'timestamp': 1675093841.408,
42 'upload_date': '20230130',
43 },
44 },
45 ]
46
47 def _real_extract(self, url):
48 audio_id = self._match_id(url)
49 if len(audio_id) == 10 or (len(audio_id) == 12 and audio_id[0] == '1'):
50 media_subdomain = 'media1'
51 else:
52 media_subdomain = 'media'
53
54 url = f'https://{media_subdomain}.vocaroo.com/mp3/{audio_id}'
55 http_headers = {'Referer': 'https://vocaroo.com/'}
56 resp = self._request_webpage(HEADRequest(url), audio_id, headers=http_headers)
57 return {
58 'id': audio_id,
59 'title': '',
60 'url': url,
61 'ext': 'mp3',
62 'timestamp': float_or_none(resp.getheader('x-bz-upload-timestamp'), scale=1000),
63 'vcodec': 'none',
64 'http_headers': http_headers,
65 }