[yt-dlp.git] / yt_dlp / extractor / dctp.py

from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
    float_or_none,
    int_or_none,
    unified_timestamp,
    url_or_none,
)


class DctpTvIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
    _TESTS = [{
        # 4x3
        'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
        'md5': '3ffbd1556c3fe210724d7088fad723e3',
        'info_dict': {
            'id': '95eaa4f33dad413aa17b4ee613cccc6c',
            'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
            'ext': 'm4v',
            'title': 'Videoinstallation für eine Kaufhausfassade',
            'description': 'Kurzfilm',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 71.24,
            'timestamp': 1302172322,
            'upload_date': '20110407',
        },
    }, {
        # 16x9
        'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
        'only_matching': True,
    }]

    _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'

    def _real_extract(self, url):
        display_id = self._match_id(url)

        version = self._download_json(
            '%s/version.json' % self._BASE_URL, display_id,
            'Downloading version JSON')

        restapi_base = '%s/%s/restapi' % (
            self._BASE_URL, version['version_name'])

        info = self._download_json(
            '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
            'Downloading video info JSON')

        media = self._download_json(
            '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
            display_id, 'Downloading media JSON')

        uuid = media['uuid']
        title = media['title']
        is_wide = media.get('is_wide')
        formats = []

        def add_formats(suffix):
            templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
            formats.extend([{
                'format_id': 'hls-' + suffix,
                'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
                'protocol': 'm3u8_native',
            }, {
                'format_id': 's3-' + suffix,
                'url': templ % 'completed-media.s3.amazonaws.com',
            }, {
                'format_id': 'http-' + suffix,
                'url': templ % 'cdn-media.dctp.tv',
            }])

        add_formats('0500_' + ('16x9' if is_wide else '4x3'))
        if is_wide:
            add_formats('720p')

        thumbnails = []
        images = media.get('images')
        if isinstance(images, list):
            for image in images:
                if not isinstance(image, dict):
                    continue
                image_url = url_or_none(image.get('url'))
                if not image_url:
                    continue
                thumbnails.append({
                    'url': image_url,
                    'width': int_or_none(image.get('width')),
                    'height': int_or_none(image.get('height')),
                })

        return {
            'id': uuid,
            'display_id': display_id,
            'title': title,
            'alt_title': media.get('subtitle'),
            'description': media.get('description') or media.get('teaser'),
            'timestamp': unified_timestamp(media.get('created')),
            'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
            'thumbnails': thumbnails,
            'formats': formats,
        }
Commit	Line	Data
0865f397	1	from .common import InfoExtractor
8e01f3ca S	2	from ..compat import compat_str
	3	from ..utils import (
	4	float_or_none,
acbd0ff5 S	5	int_or_none,
acbd0ff5 S	6	unified_timestamp,
3052a30d	7	url_or_none,
8e01f3ca	8	)
0865f397	9
48a1e514	10
0865f397	11	class DctpTvIE(InfoExtractor):
8e01f3ca	12	_VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
acbd0ff5 S	13	_TESTS = [{
acbd0ff5 S	14	# 4x3
48a1e514	15	'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
b2771a28	16	'md5': '3ffbd1556c3fe210724d7088fad723e3',
48a1e514	17	'info_dict': {
e295618f	18	'id': '95eaa4f33dad413aa17b4ee613cccc6c',
75a4fc5b	19	'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
b2771a28	20	'ext': 'm4v',
e295618f YCH	21	'title': 'Videoinstallation für eine Kaufhausfassade',
e295618f YCH	22	'description': 'Kurzfilm',
ec85ded8	23	'thumbnail': r're:^https?://.*\.jpg$',
8e01f3ca	24	'duration': 71.24,
acbd0ff5 S	25	'timestamp': 1302172322,
acbd0ff5 S	26	'upload_date': '20110407',
8e01f3ca	27	},
acbd0ff5 S	28	}, {
	29	# 16x9
	30	'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
	31	'only_matching': True,
	32	}]
	33
	34	_BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
0865f397 PH	35
0865f397 PH	36	def _real_extract(self, url):
8e01f3ca	37	display_id = self._match_id(url)
e295618f	38
acbd0ff5 S	39	version = self._download_json(
	40	'%s/version.json' % self._BASE_URL, display_id,
	41	'Downloading version JSON')
	42
	43	restapi_base = '%s/%s/restapi' % (
	44	self._BASE_URL, version['version_name'])
0865f397	45
acbd0ff5 S	46	info = self._download_json(
	47	'%s/slugs/%s.json' % (restapi_base, display_id), display_id,
	48	'Downloading video info JSON')
e295618f	49
acbd0ff5 S	50	media = self._download_json(
	51	'%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
	52	display_id, 'Downloading media JSON')
	53
	54	uuid = media['uuid']
	55	title = media['title']
b2771a28 RA	56	is_wide = media.get('is_wide')
	57	formats = []
	58
	59	def add_formats(suffix):
	60	templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
	61	formats.extend([{
	62	'format_id': 'hls-' + suffix,
	63	'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
	64	'protocol': 'm3u8_native',
	65	}, {
	66	'format_id': 's3-' + suffix,
	67	'url': templ % 'completed-media.s3.amazonaws.com',
	68	}, {
	69	'format_id': 'http-' + suffix,
	70	'url': templ % 'cdn-media.dctp.tv',
	71	}])
	72
	73	add_formats('0500_' + ('16x9' if is_wide else '4x3'))
	74	if is_wide:
	75	add_formats('720p')
8e01f3ca	76
acbd0ff5 S	77	thumbnails = []
	78	images = media.get('images')
	79	if isinstance(images, list):
	80	for image in images:
	81	if not isinstance(image, dict):
	82	continue
3052a30d S	83	image_url = url_or_none(image.get('url'))
3052a30d S	84	if not image_url:
acbd0ff5 S	85	continue
	86	thumbnails.append({
	87	'url': image_url,
	88	'width': int_or_none(image.get('width')),
	89	'height': int_or_none(image.get('height')),
	90	})
0865f397 PH	91
0865f397 PH	92	return {
acbd0ff5 S	93	'id': uuid,
acbd0ff5 S	94	'display_id': display_id,
0865f397	95	'title': title,
acbd0ff5 S	96	'alt_title': media.get('subtitle'),
	97	'description': media.get('description') or media.get('teaser'),
	98	'timestamp': unified_timestamp(media.get('created')),
	99	'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
	100	'thumbnails': thumbnails,
e295618f	101	'formats': formats,
0865f397	102	}