[yt-dlp.git] / youtube_dlc / extractor / rtl2.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..aes import aes_cbc_decrypt
from ..compat import (
    compat_b64decode,
    compat_ord,
    compat_str,
)
from ..utils import (
    bytes_to_intlist,
    ExtractorError,
    intlist_to_bytes,
    int_or_none,
    strip_or_none,
)


class RTL2IE(InfoExtractor):
    IE_NAME = 'rtl2'
    _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
        'info_dict': {
            'id': 'folge-203-0',
            'ext': 'f4v',
            'title': 'GRIP sucht den Sommerkönig',
            'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
    }, {
        'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
        'info_dict': {
            'id': 'anna-erwischt-alex',
            'ext': 'mp4',
            'title': 'Anna erwischt Alex!',
            'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
    }]

    def _real_extract(self, url):
        vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
        if not vico_id:
            webpage = self._download_webpage(url, display_id)

            mobj = re.search(
                r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
                webpage)
            if mobj:
                vico_id = mobj.group('vico_id')
                vivi_id = mobj.group('vivi_id')
            else:
                vico_id = self._html_search_regex(
                    r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
                vivi_id = self._html_search_regex(
                    r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')

        info = self._download_json(
            'https://service.rtl2.de/api-player-vipo/video.php',
            display_id, query={
                'vico_id': vico_id,
                'vivi_id': vivi_id,
            })
        video_info = info['video']
        title = video_info['titel']

        formats = []

        rtmp_url = video_info.get('streamurl')
        if rtmp_url:
            rtmp_url = rtmp_url.replace('\\', '')
            stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
            rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']

            formats.append({
                'format_id': 'rtmp',
                'url': rtmp_url,
                'play_path': stream_url,
                'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
                'page_url': url,
                'flash_version': 'LNX 11,2,202,429',
                'rtmp_conn': rtmp_conn,
                'no_resume': True,
                'preference': 1,
            })

        m3u8_url = video_info.get('streamurl_hls')
        if m3u8_url:
            formats.extend(self._extract_akamai_formats(m3u8_url, display_id))

        self._sort_formats(formats)

        return {
            'id': display_id,
            'title': title,
            'thumbnail': video_info.get('image'),
            'description': video_info.get('beschreibung'),
            'duration': int_or_none(video_info.get('duration')),
            'formats': formats,
        }


class RTL2YouBaseIE(InfoExtractor):
    _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'


class RTL2YouIE(RTL2YouBaseIE):
    IE_NAME = 'rtl2:you'
    _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
        'info_dict': {
            'id': '15740',
            'ext': 'mp4',
            'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
            'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
            'age_limit': 12,
        },
    }, {
        'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
        'only_matching': True,
    }]
    _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
    _GEO_COUNTRIES = ['DE']

    def _real_extract(self, url):
        video_id = self._match_id(url)

        stream_data = self._download_json(
            self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)

        data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
        stream_url = intlist_to_bytes(aes_cbc_decrypt(
            bytes_to_intlist(compat_b64decode(data)),
            bytes_to_intlist(self._AES_KEY),
            bytes_to_intlist(compat_b64decode(iv))
        ))
        if b'rtl2_you_video_not_found' in stream_url:
            raise ExtractorError('video not found', expected=True)

        formats = self._extract_m3u8_formats(
            stream_url[:-compat_ord(stream_url[-1])].decode(),
            video_id, 'mp4', 'm3u8_native')
        self._sort_formats(formats)

        video_data = self._download_json(
            self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)

        series = video_data.get('formatTitle')
        title = episode = video_data.get('title') or series
        if series and series != title:
            title = '%s - %s' % (series, title)

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'description': strip_or_none(video_data.get('description')),
            'thumbnail': video_data.get('image'),
            'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
            'series': series,
            'episode': episode,
            'age_limit': int_or_none(video_data.get('minimumAge')),
        }


class RTL2YouSeriesIE(RTL2YouBaseIE):
    IE_NAME = 'rtl2:you:series'
    _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
    _TEST = {
        'url': 'http://you.rtl2.de/videos/115/dragon-ball',
        'info_dict': {
            'id': '115',
        },
        'playlist_mincount': 5,
    }

    def _real_extract(self, url):
        series_id = self._match_id(url)
        stream_data = self._download_json(
            self._BACKWERK_BASE_URL + 'videos',
            series_id, query={
                'formatId': series_id,
                'limit': 1000000000,
            })

        entries = []
        for video in stream_data.get('videos', []):
            video_id = compat_str(video['videoId'])
            if not video_id:
                continue
            entries.append(self.url_result(
                'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
                'RTL2You', video_id))
        return self.playlist_result(entries, series_id)
Commit	Line	Data
dcdb292f	1	# coding: utf-8
7906d199 DD	2	from __future__ import unicode_literals
7906d199 DD	3
5e1a5ac8	4	import re
9c5b5f21	5
7906d199	6	from .common import InfoExtractor
b68e00b0 RA	7	from ..aes import aes_cbc_decrypt
b68e00b0 RA	8	from ..compat import (
cf282071	9	compat_b64decode,
b68e00b0 RA	10	compat_ord,
	11	compat_str,
	12	)
	13	from ..utils import (
	14	bytes_to_intlist,
	15	ExtractorError,
	16	intlist_to_bytes,
	17	int_or_none,
	18	strip_or_none,
	19	)
7906d199 DD	20
	21
	22	class RTL2IE(InfoExtractor):
b68e00b0	23	IE_NAME = 'rtl2'
4f7db468	24	_VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-\|folge/)(?P<id>[^/?#]+)'
7906d199	25	_TESTS = [{
3dee7826	26	'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
3dee7826 PH	27	'info_dict': {
	28	'id': 'folge-203-0',
	29	'ext': 'f4v',
	30	'title': 'GRIP sucht den Sommerkönig',
9c5b5f21	31	'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f'
7906d199	32	},
4932a817 YCH	33	'params': {
	34	# rtmp download
	35	'skip_download': True,
	36	},
4f7db468	37	'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
3dee7826 PH	38	}, {
3dee7826 PH	39	'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
3dee7826	40	'info_dict': {
4f7db468	41	'id': 'anna-erwischt-alex',
3dee7826 PH	42	'ext': 'mp4',
3dee7826 PH	43	'title': 'Anna erwischt Alex!',
9c5b5f21	44	'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
7906d199	45	},
5e1a5ac8 YCH	46	'params': {
	47	# rtmp download
	48	'skip_download': True,
	49	},
4f7db468	50	'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
3dee7826	51	}]
7906d199 DD	52
7906d199 DD	53	def _real_extract(self, url):
4f7db468 RA	54	vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
	55	if not vico_id:
	56	webpage = self._download_webpage(url, display_id)
	57
	58	mobj = re.search(
	59	r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
	60	webpage)
	61	if mobj:
	62	vico_id = mobj.group('vico_id')
	63	vivi_id = mobj.group('vivi_id')
	64	else:
	65	vico_id = self._html_search_regex(
	66	r'vico_id\s:\s([0-9]+)', webpage, 'vico_id')
	67	vivi_id = self._html_search_regex(
	68	r'vivi_id\s:\s([0-9]+)', webpage, 'vivi_id')
7906d199	69
9c5b5f21	70	info = self._download_json(
4f7db468 RA	71	'https://service.rtl2.de/api-player-vipo/video.php',
4f7db468 RA	72	display_id, query={
9c5b5f21 RA	73	'vico_id': vico_id,
	74	'vivi_id': vivi_id,
	75	})
3dee7826 PH	76	video_info = info['video']
3dee7826 PH	77	title = video_info['titel']
7906d199	78
9c5b5f21 RA	79	formats = []
	80
	81	rtmp_url = video_info.get('streamurl')
	82	if rtmp_url:
	83	rtmp_url = rtmp_url.replace('\\', '')
	84	stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL')
	85	rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0']
	86
	87	formats.append({
	88	'format_id': 'rtmp',
	89	'url': rtmp_url,
	90	'play_path': stream_url,
977a7821	91	'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
9c5b5f21 RA	92	'page_url': url,
	93	'flash_version': 'LNX 11,2,202,429',
	94	'rtmp_conn': rtmp_conn,
	95	'no_resume': True,
	96	'preference': 1,
	97	})
	98
	99	m3u8_url = video_info.get('streamurl_hls')
	100	if m3u8_url:
4f7db468	101	formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
7906d199	102
3dee7826	103	self._sort_formats(formats)
7906d199 DD	104
7906d199 DD	105	return {
4f7db468	106	'id': display_id,
7906d199	107	'title': title,
9c5b5f21 RA	108	'thumbnail': video_info.get('image'),
	109	'description': video_info.get('beschreibung'),
	110	'duration': int_or_none(video_info.get('duration')),
7906d199 DD	111	'formats': formats,
7906d199 DD	112	}
b68e00b0 RA	113
	114
	115	class RTL2YouBaseIE(InfoExtractor):
	116	_BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/'
	117
	118
	119	class RTL2YouIE(RTL2YouBaseIE):
	120	IE_NAME = 'rtl2:you'
	121	_VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/\|youplayer/index\.html\?.*?\bvid=)(?P<id>\d+)'
	122	_TESTS = [{
	123	'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du',
	124	'info_dict': {
	125	'id': '15740',
	126	'ext': 'mp4',
	127	'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!',
	128	'description': 'md5:ddaa95c61b372b12b66e115b2772fe01',
	129	'age_limit': 12,
	130	},
	131	}, {
	132	'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712',
	133	'only_matching': True,
	134	}]
	135	_AES_KEY = b'\xe9W\xe4.<\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!'
	136	_GEO_COUNTRIES = ['DE']
	137
	138	def _real_extract(self, url):
	139	video_id = self._match_id(url)
	140
	141	stream_data = self._download_json(
	142	self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id)
	143
cf282071	144	data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':')
b68e00b0	145	stream_url = intlist_to_bytes(aes_cbc_decrypt(
cf282071	146	bytes_to_intlist(compat_b64decode(data)),
b68e00b0	147	bytes_to_intlist(self._AES_KEY),
cf282071	148	bytes_to_intlist(compat_b64decode(iv))
b68e00b0 RA	149	))
	150	if b'rtl2_you_video_not_found' in stream_url:
	151	raise ExtractorError('video not found', expected=True)
	152
	153	formats = self._extract_m3u8_formats(
	154	stream_url[:-compat_ord(stream_url[-1])].decode(),
	155	video_id, 'mp4', 'm3u8_native')
	156	self._sort_formats(formats)
	157
	158	video_data = self._download_json(
	159	self._BACKWERK_BASE_URL + 'video/' + video_id, video_id)
	160
	161	series = video_data.get('formatTitle')
	162	title = episode = video_data.get('title') or series
	163	if series and series != title:
	164	title = '%s - %s' % (series, title)
	165
	166	return {
	167	'id': video_id,
	168	'title': title,
	169	'formats': formats,
	170	'description': strip_or_none(video_data.get('description')),
	171	'thumbnail': video_data.get('image'),
	172	'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000),
	173	'series': series,
	174	'episode': episode,
	175	'age_limit': int_or_none(video_data.get('minimumAge')),
	176	}
	177
	178
	179	class RTL2YouSeriesIE(RTL2YouBaseIE):
	180	IE_NAME = 'rtl2:you:series'
	181	_VALID_URL = r'http?://you\.rtl2\.de/videos/(?P<id>\d+)'
	182	_TEST = {
	183	'url': 'http://you.rtl2.de/videos/115/dragon-ball',
	184	'info_dict': {
	185	'id': '115',
	186	},
	187	'playlist_mincount': 5,
	188	}
	189
	190	def _real_extract(self, url):
	191	series_id = self._match_id(url)
	192	stream_data = self._download_json(
	193	self._BACKWERK_BASE_URL + 'videos',
	194	series_id, query={
	195	'formatId': series_id,
	196	'limit': 1000000000,
	197	})
	198
	199	entries = []
	200	for video in stream_data.get('videos', []):
	201	video_id = compat_str(video['videoId'])
	202	if not video_id:
	203	continue
	204	entries.append(self.url_result(
	205	'http://you.rtl2.de/video/%s/%s' % (series_id, video_id),
	206	'RTL2You', video_id))
	207	return self.playlist_result(entries, series_id)