]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twentymin.py
[generic] Extract subtitles from video.js (#3156)
[yt-dlp.git] / yt_dlp / extractor / twentymin.py
CommitLineData
133b1886
S
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
538b17a0
S
7from ..utils import (
8 int_or_none,
9 try_get,
10)
133b1886
S
11
12
13class TwentyMinutenIE(InfoExtractor):
14 IE_NAME = '20min'
538b17a0
S
15 _VALID_URL = r'''(?x)
16 https?://
17 (?:www\.)?20min\.ch/
18 (?:
19 videotv/*\?.*?\bvid=|
20 videoplayer/videoplayer\.html\?.*?\bvideoId@
21 )
22 (?P<id>\d+)
23 '''
133b1886 24 _TESTS = [{
133b1886 25 'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
4e445985 26 'md5': 'e7264320db31eed8c38364150c12496e',
133b1886
S
27 'info_dict': {
28 'id': '469148',
4e445985 29 'ext': 'mp4',
133b1886 30 'title': '85 000 Franken für 15 perfekte Minuten',
538b17a0 31 'thumbnail': r're:https?://.*\.jpg$',
3cc8649c 32 },
4e445985 33 }, {
538b17a0 34 'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',
4e445985
AS
35 'info_dict': {
36 'id': '523629',
4e445985
AS
37 'ext': 'mp4',
38 'title': 'So kommen Sie bei Eis und Schnee sicher an',
538b17a0
S
39 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
40 'thumbnail': r're:https?://.*\.jpg$',
41 },
42 'params': {
43 'skip_download': True,
3cc8649c 44 },
133b1886
S
45 }, {
46 'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
47 'only_matching': True,
133b1886
S
48 }]
49
538b17a0
S
50 @staticmethod
51 def _extract_urls(webpage):
52 return [m.group('url') for m in re.finditer(
4bf22f7a 53 r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
538b17a0
S
54 webpage)]
55
133b1886 56 def _real_extract(self, url):
538b17a0 57 video_id = self._match_id(url)
133b1886 58
538b17a0
S
59 video = self._download_json(
60 'http://api.20min.ch/video/%s/show' % video_id,
61 video_id)['content']
133b1886 62
538b17a0 63 title = video['title']
3cc8649c 64
538b17a0
S
65 formats = [{
66 'format_id': format_id,
67 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p),
68 'quality': quality,
69 } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])]
70 self._sort_formats(formats)
133b1886 71
538b17a0
S
72 description = video.get('lead')
73 thumbnail = video.get('thumbnail')
133b1886 74
538b17a0
S
75 def extract_count(kind):
76 return try_get(
77 video,
78 lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind]))
133b1886 79
538b17a0
S
80 like_count = extract_count('up')
81 dislike_count = extract_count('down')
4e445985 82
133b1886
S
83 return {
84 'id': video_id,
133b1886
S
85 'title': title,
86 'description': description,
87 'thumbnail': thumbnail,
538b17a0
S
88 'like_count': like_count,
89 'dislike_count': dislike_count,
4e445985 90 'formats': formats,
133b1886 91 }