[yt-dlp.git] / yt_dlp / extractor / twentymin.py

import re

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    try_get,
)


class TwentyMinutenIE(InfoExtractor):
    IE_NAME = '20min'
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.)?20min\.ch/
                        (?:
                            videotv/*\?.*?\bvid=|
                            videoplayer/videoplayer\.html\?.*?\bvideoId@
                        )
                        (?P<id>\d+)
                    '''
    _TESTS = [{
        'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
        'md5': 'e7264320db31eed8c38364150c12496e',
        'info_dict': {
            'id': '469148',
            'ext': 'mp4',
            'title': '85 000 Franken für 15 perfekte Minuten',
            'thumbnail': r're:https?://.*\.jpg$',
        },
    }, {
        'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
        'info_dict': {
            'id': '523629',
            'ext': 'mp4',
            'title': 'So kommen Sie bei Eis und Schnee sicher an',
            'description': 'md5:117c212f64b25e3d95747e5276863f7d',
            'thumbnail': r're:https?://.*\.jpg$',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return [m.group('url') for m in re.finditer(
            r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
            webpage)]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video = self._download_json(
            'http://api.20min.ch/video/%s/show' % video_id,
            video_id)['content']

        title = video['title']

        formats = [{
            'format_id': format_id,
            'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
            'quality': quality,
        } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
        self._sort_formats(formats)

        description = video.get('lead')
        thumbnail = video.get('thumbnail')

        def extract_count(kind):
            return try_get(
                video,
                lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))

        like_count = extract_count('up')
        dislike_count = extract_count('down')

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'like_count': like_count,
            'dislike_count': dislike_count,
            'formats': formats,
        }
Commit	Line	Data
133b1886 S	1	import re
	2
	3	from .common import InfoExtractor
538b17a0 S	4	from ..utils import (
	5	int_or_none,
	6	try_get,
	7	)
133b1886 S	8
	9
	10	class TwentyMinutenIE(InfoExtractor):
	11	IE_NAME = '20min'
538b17a0 S	12	_VALID_URL = r'''(?x)
	13	https?://
	14	(?:www\.)?20min\.ch/
	15	(?:
	16	videotv/\?.?\bvid=\|
	17	videoplayer/videoplayer\.html\?.*?\bvideoId@
	18	)
	19	(?P<id>\d+)
	20	'''
133b1886	21	_TESTS = [{
133b1886	22	'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
4e445985	23	'md5': 'e7264320db31eed8c38364150c12496e',
133b1886 S	24	'info_dict': {
133b1886 S	25	'id': '469148',
4e445985	26	'ext': 'mp4',
133b1886	27	'title': '85 000 Franken für 15 perfekte Minuten',
538b17a0	28	'thumbnail': r're:https?://.*\.jpg$',
3cc8649c	29	},
4e445985	30	}, {
538b17a0	31	'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE\|videoId@523629',
4e445985 AS	32	'info_dict': {
4e445985 AS	33	'id': '523629',
4e445985 AS	34	'ext': 'mp4',
4e445985 AS	35	'title': 'So kommen Sie bei Eis und Schnee sicher an',
538b17a0 S	36	'description': 'md5:117c212f64b25e3d95747e5276863f7d',
	37	'thumbnail': r're:https?://.*\.jpg$',
	38	},
	39	'params': {
	40	'skip_download': True,
3cc8649c	41	},
133b1886 S	42	}, {
	43	'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
	44	'only_matching': True,
133b1886 S	45	}]
133b1886 S	46
538b17a0 S	47	@staticmethod
	48	def _extract_urls(webpage):
	49	return [m.group('url') for m in re.finditer(
4bf22f7a	50	r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.?\bvideoId@\d+.?)\1',
538b17a0 S	51	webpage)]
538b17a0 S	52
133b1886	53	def _real_extract(self, url):
538b17a0	54	video_id = self._match_id(url)
133b1886	55
538b17a0 S	56	video = self._download_json(
	57	'http://api.20min.ch/video/%s/show' % video_id,
	58	video_id)['content']
133b1886	59
538b17a0	60	title = video['title']
3cc8649c	61
538b17a0 S	62	formats = [{
	63	'format_id': format_id,
	64	'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
	65	'quality': quality,
	66	} for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
	67	self._sort_formats(formats)
133b1886	68
538b17a0 S	69	description = video.get('lead')
538b17a0 S	70	thumbnail = video.get('thumbnail')
133b1886	71
538b17a0 S	72	def extract_count(kind):
	73	return try_get(
	74	video,
	75	lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
133b1886	76
538b17a0 S	77	like_count = extract_count('up')
538b17a0 S	78	dislike_count = extract_count('down')
4e445985	79
133b1886 S	80	return {
133b1886 S	81	'id': video_id,
133b1886 S	82	'title': title,
	83	'description': description,
	84	'thumbnail': thumbnail,
538b17a0 S	85	'like_count': like_count,
538b17a0 S	86	'dislike_count': dislike_count,
4e445985	87	'formats': formats,
133b1886	88	}