[yt-dlp.git] / yt_dlp / extractor / vocaroo.py

from .common import InfoExtractor
from ..utils import (
    HEADRequest,
    float_or_none,
)


class VocarooIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?:vocaroo\.com|voca\.ro)/(?:embed/)?(?P<id>\w+)'
    _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?vocaroo\.com/embed/.+?)\1']
    _TESTS = [
        {
            'url': 'https://vocaroo.com/1de8yA3LNe77',
            'md5': 'c557841d5e50261777a6585648adf439',
            'info_dict': {
                'id': '1de8yA3LNe77',
                'ext': 'mp3',
                'title': 'Vocaroo video #1de8yA3LNe77',
                'timestamp': 1675059800.370,
                'upload_date': '20230130',
            },
        },
        {
            'url': 'https://vocaroo.com/embed/12WqtjLnpj6g?autoplay=0',
            'only_matching': True,
        },
        {
            'url': 'https://voca.ro/12D52rgpzkB0',
            'only_matching': True,
        },
    ]

    _WEBPAGE_TESTS = [
        {
            'url': 'https://qbnu.github.io/cool.html',
            'md5': 'f322e529275dd8a47994919eeac404a5',
            'info_dict': {
                'id': '19cgWmKO6AmC',
                'ext': 'mp3',
                'title': 'Vocaroo video #19cgWmKO6AmC',
                'timestamp': 1675093841.408,
                'upload_date': '20230130',
            },
        },
    ]

    def _real_extract(self, url):
        audio_id = self._match_id(url)
        if len(audio_id) == 10 or (len(audio_id) == 12 and audio_id[0] == '1'):
            media_subdomain = 'media1'
        else:
            media_subdomain = 'media'

        url = f'https://{media_subdomain}.vocaroo.com/mp3/{audio_id}'
        http_headers = {'Referer': 'https://vocaroo.com/'}
        resp = self._request_webpage(HEADRequest(url), audio_id, headers=http_headers)
        return {
            'id': audio_id,
            'title': '',
            'url': url,
            'ext': 'mp3',
            'timestamp': float_or_none(resp.getheader('x-bz-upload-timestamp'), scale=1000),
            'vcodec': 'none',
            'http_headers': http_headers,
        }
Commit	Line	Data
e4a8b176	1	from .common import InfoExtractor
	2	from ..utils import (
	3	HEADRequest,
	4	float_or_none,
	5	)
	6
	7
	8	class VocarooIE(InfoExtractor):
	9	_VALID_URL = r'https?://(?:www\.)?(?:vocaroo\.com\|voca\.ro)/(?:embed/)?(?P<id>\w+)'
	10	_EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?vocaroo\.com/embed/.+?)\1']
	11	_TESTS = [
	12	{
	13	'url': 'https://vocaroo.com/1de8yA3LNe77',
	14	'md5': 'c557841d5e50261777a6585648adf439',
	15	'info_dict': {
	16	'id': '1de8yA3LNe77',
	17	'ext': 'mp3',
	18	'title': 'Vocaroo video #1de8yA3LNe77',
	19	'timestamp': 1675059800.370,
	20	'upload_date': '20230130',
	21	},
	22	},
	23	{
	24	'url': 'https://vocaroo.com/embed/12WqtjLnpj6g?autoplay=0',
	25	'only_matching': True,
	26	},
	27	{
	28	'url': 'https://voca.ro/12D52rgpzkB0',
	29	'only_matching': True,
	30	},
	31	]
	32
	33	_WEBPAGE_TESTS = [
	34	{
	35	'url': 'https://qbnu.github.io/cool.html',
	36	'md5': 'f322e529275dd8a47994919eeac404a5',
	37	'info_dict': {
	38	'id': '19cgWmKO6AmC',
	39	'ext': 'mp3',
	40	'title': 'Vocaroo video #19cgWmKO6AmC',
	41	'timestamp': 1675093841.408,
	42	'upload_date': '20230130',
	43	},
	44	},
	45	]
	46
	47	def _real_extract(self, url):
	48	audio_id = self._match_id(url)
	49	if len(audio_id) == 10 or (len(audio_id) == 12 and audio_id[0] == '1'):
	50	media_subdomain = 'media1'
	51	else:
	52	media_subdomain = 'media'
	53
	54	url = f'https://{media_subdomain}.vocaroo.com/mp3/{audio_id}'
	55	http_headers = {'Referer': 'https://vocaroo.com/'}
	56	resp = self._request_webpage(HEADRequest(url), audio_id, headers=http_headers)
	57	return {
	58	'id': audio_id,
	59	'title': '',
	60	'url': url,
	61	'ext': 'mp3',
	62	'timestamp': float_or_none(resp.getheader('x-bz-upload-timestamp'), scale=1000),
	63	'vcodec': 'none',
	64	'http_headers': http_headers,
65	}