]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/viki.py
[viki] Add extractor (fixes #1813)
[yt-dlp.git] / youtube_dl / extractor / viki.py
CommitLineData
382ed50e
PH
1import re
2
3from ..utils import (
4 unified_strdate,
5)
6from .subtitles import SubtitlesInfoExtractor
7
8
9class VikiIE(SubtitlesInfoExtractor):
10 IE_NAME = u'viki'
11
12 _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
13 _TEST = {
14 u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
15 u'file': u'1023585v.mp4',
16 u'md5': u'a21454021c2646f5433514177e2caa5f',
17 u'info_dict': {
18 u'title': u'Heirs Episode 14',
19 u'uploader': u'SBS',
20 u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
21 u'upload_date': u'20131121',
22 u'age_limit': 13,
23 }
24 }
25
26 def _real_extract(self, url):
27 mobj = re.match(self._VALID_URL, url)
28 video_id = mobj.group(1)
29
30 webpage = self._download_webpage(url, video_id)
31 title = self._og_search_title(webpage)
32 description = self._og_search_description(webpage)
33 thumbnail = self._og_search_thumbnail(webpage)
34
35 uploader = self._html_search_regex(
36 r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
37 u'uploader')
38 if uploader is not None:
39 uploader = uploader.strip()
40
41 rating_str = self._html_search_regex(
42 r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
43 u'rating information', default='').strip()
44 RATINGS = {
45 'G': 0,
46 'PG': 10,
47 'PG-13': 13,
48 'R': 16,
49 'NC': 18,
50 }
51 age_limit = RATINGS.get(rating_str)
52
53 info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
54 info_webpage = self._download_webpage(info_url, video_id)
55 video_url = self._html_search_regex(
56 r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
57
58 upload_date_str = self._html_search_regex(
59 r'"created_at":"([^"]+)"', info_webpage, u'upload date')
60 upload_date = (
61 unified_strdate(upload_date_str)
62 if upload_date_str is not None
63 else None
64 )
65
66 # subtitles
67 video_subtitles = self.extract_subtitles(video_id, info_webpage)
68 if self._downloader.params.get('listsubtitles', False):
69 self._list_available_subtitles(video_id, info_webpage)
70 return
71
72 return {
73 'id': video_id,
74 'title': title,
75 'url': video_url,
76 'description': description,
77 'thumbnail': thumbnail,
78 'age_limit': age_limit,
79 'uploader': uploader,
80 'subtitles': video_subtitles,
81 'upload_date': upload_date,
82 }
83
84 def _get_available_subtitles(self, video_id, info_webpage):
85 res = {}
86 for sturl in re.findall(r'<track src="([^"]+)"/>'):
87 m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
88 if not m:
89 continue
90 res[m.group('lang')] = sturl
91 return res