[yt-dlp.git] / youtube_dl / extractor / tapely.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    clean_html,
    ExtractorError,
    float_or_none,
    parse_iso8601,
    sanitized_Request,
)


class TapelyIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?:tape\.ly|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
    _API_URL = 'http://tape.ly/showtape?id={0:}'
    _S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
    _SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
    _TESTS = [
        {
            'url': 'http://tape.ly/my-grief-as-told-by-water',
            'info_dict': {
                'id': 23952,
                'title': 'my grief as told by water',
                'thumbnail': 're:^https?://.*\.png$',
                'uploader_id': 16484,
                'timestamp': 1411848286,
                'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
            },
            'playlist_count': 13,
        },
        {
            'url': 'http://tape.ly/my-grief-as-told-by-water/1',
            'md5': '79031f459fdec6530663b854cbc5715c',
            'info_dict': {
                'id': 258464,
                'title': 'Dreaming Awake  (My Brightest Diamond)',
                'ext': 'm4a',
            },
        },
        {
            'url': 'https://tapely.com/my-grief-as-told-by-water',
            'only_matching': True,
        },
    ]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')

        playlist_url = self._API_URL.format(display_id)
        request = sanitized_Request(playlist_url)
        request.add_header('X-Requested-With', 'XMLHttpRequest')
        request.add_header('Accept', 'application/json')
        request.add_header('Referer', url)

        playlist = self._download_json(request, display_id)

        tape = playlist['tape']

        entries = []
        for s in tape['songs']:
            song = s['song']
            entry = {
                'id': song['id'],
                'duration': float_or_none(song.get('songduration'), 1000),
                'title': song['title'],
            }
            if song['source'] == 'S3':
                entry.update({
                    'url': self._S3_SONG_URL.format(song['filename']),
                })
                entries.append(entry)
            elif song['source'] == 'YT':
                self.to_screen('YouTube video detected')
                yt_id = song['filename'].replace('/youtube/', '')
                entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
                entries.append(entry)
            elif song['source'] == 'SC':
                self.to_screen('SoundCloud song detected')
                sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
                entry.update(self.url_result(sc_url, 'Soundcloud'))
                entries.append(entry)
            else:
                self.report_warning('Unknown song source: %s' % song['source'])

        if mobj.group('songnr'):
            songnr = int(mobj.group('songnr')) - 1
            try:
                return entries[songnr]
            except IndexError:
                raise ExtractorError(
                    'No song with index: %s' % mobj.group('songnr'),
                    expected=True)

        return {
            '_type': 'playlist',
            'id': tape['id'],
            'display_id': display_id,
            'title': tape['name'],
            'entries': entries,
            'thumbnail': tape.get('image_url'),
            'description': clean_html(tape.get('subtext')),
            'like_count': tape.get('likescount'),
            'uploader_id': tape.get('user_id'),
            'timestamp': parse_iso8601(tape.get('published_at')),
        }
Commit	Line	Data
e80f40e5 NJ	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
e80f40e5	8	clean_html,
1cc79574	9	ExtractorError,
e80f40e5 NJ	10	float_or_none,
e80f40e5 NJ	11	parse_iso8601,
5c2266df	12	sanitized_Request,
e80f40e5 NJ	13	)
	14
	15
	16	class TapelyIE(InfoExtractor):
60d23e5e	17	_VALID_URL = r'https?://(?:www\.)?(?:tape\.ly\|tapely\.com)/(?P<id>[A-Za-z0-9\-_]+)(?:/(?P<songnr>\d+))?'
e80f40e5 NJ	18	_API_URL = 'http://tape.ly/showtape?id={0:}'
e80f40e5 NJ	19	_S3_SONG_URL = 'http://mytape.s3.amazonaws.com/{0:}'
9e77c60c	20	_SOUNDCLOUD_SONG_URL = 'http://api.soundcloud.com{0:}'
e80f40e5 NJ	21	_TESTS = [
	22	{
	23	'url': 'http://tape.ly/my-grief-as-told-by-water',
	24	'info_dict': {
	25	'id': 23952,
	26	'title': 'my grief as told by water',
	27	'thumbnail': 're:^https?://.*\.png$',
	28	'uploader_id': 16484,
	29	'timestamp': 1411848286,
	30	'description': 'For Robin and Ponkers, whom the tides of life have taken out to sea.',
	31	},
	32	'playlist_count': 13,
	33	},
	34	{
	35	'url': 'http://tape.ly/my-grief-as-told-by-water/1',
	36	'md5': '79031f459fdec6530663b854cbc5715c',
	37	'info_dict': {
	38	'id': 258464,
	39	'title': 'Dreaming Awake (My Brightest Diamond)',
	40	'ext': 'm4a',
	41	},
	42	},
60d23e5e NJ	43	{
	44	'url': 'https://tapely.com/my-grief-as-told-by-water',
	45	'only_matching': True,
	46	},
e80f40e5 NJ	47	]
	48
	49	def _real_extract(self, url):
	50	mobj = re.match(self._VALID_URL, url)
	51	display_id = mobj.group('id')
	52
	53	playlist_url = self._API_URL.format(display_id)
5c2266df	54	request = sanitized_Request(playlist_url)
e80f40e5 NJ	55	request.add_header('X-Requested-With', 'XMLHttpRequest')
e80f40e5 NJ	56	request.add_header('Accept', 'application/json')
1a92e086	57	request.add_header('Referer', url)
e80f40e5 NJ	58
	59	playlist = self._download_json(request, display_id)
	60
	61	tape = playlist['tape']
	62
	63	entries = []
	64	for s in tape['songs']:
	65	song = s['song']
	66	entry = {
	67	'id': song['id'],
	68	'duration': float_or_none(song.get('songduration'), 1000),
	69	'title': song['title'],
	70	}
	71	if song['source'] == 'S3':
	72	entry.update({
	73	'url': self._S3_SONG_URL.format(song['filename']),
	74	})
	75	entries.append(entry)
	76	elif song['source'] == 'YT':
9e77c60c NJ	77	self.to_screen('YouTube video detected')
9e77c60c NJ	78	yt_id = song['filename'].replace('/youtube/', '')
e80f40e5 NJ	79	entry.update(self.url_result(yt_id, 'Youtube', video_id=yt_id))
e80f40e5 NJ	80	entries.append(entry)
9e77c60c NJ	81	elif song['source'] == 'SC':
	82	self.to_screen('SoundCloud song detected')
	83	sc_url = self._SOUNDCLOUD_SONG_URL.format(song['filename'])
	84	entry.update(self.url_result(sc_url, 'Soundcloud'))
	85	entries.append(entry)
e80f40e5 NJ	86	else:
	87	self.report_warning('Unknown song source: %s' % song['source'])
	88
	89	if mobj.group('songnr'):
	90	songnr = int(mobj.group('songnr')) - 1
	91	try:
	92	return entries[songnr]
	93	except IndexError:
	94	raise ExtractorError(
	95	'No song with index: %s' % mobj.group('songnr'),
	96	expected=True)
	97
	98	return {
	99	'_type': 'playlist',
	100	'id': tape['id'],
	101	'display_id': display_id,
	102	'title': tape['name'],
	103	'entries': entries,
	104	'thumbnail': tape.get('image_url'),
	105	'description': clean_html(tape.get('subtext')),
	106	'like_count': tape.get('likescount'),
	107	'uploader_id': tape.get('user_id'),
	108	'timestamp': parse_iso8601(tape.get('published_at')),
	109	}