[yt-dlp.git] / youtube_dl / extractor / viki.py

import re

from ..utils import (
    unified_strdate,
)
from .subtitles import SubtitlesInfoExtractor


class VikiIE(SubtitlesInfoExtractor):
    IE_NAME = u'viki'

    _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
    _TEST = {
        u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
        u'file': u'1023585v.mp4',
        u'md5': u'a21454021c2646f5433514177e2caa5f',
        u'info_dict': {
            u'title': u'Heirs Episode 14',
            u'uploader': u'SBS',
            u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
            u'upload_date': u'20131121',
            u'age_limit': 13,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group(1)

        webpage = self._download_webpage(url, video_id)
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._og_search_thumbnail(webpage)

        uploader = self._html_search_regex(
            r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage,
            u'uploader')
        if uploader is not None:
            uploader = uploader.strip()

        rating_str = self._html_search_regex(
            r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
            u'rating information', default='').strip()
        RATINGS = {
            'G': 0,
            'PG': 10,
            'PG-13': 13,
            'R': 16,
            'NC': 18,
        }
        age_limit = RATINGS.get(rating_str)

        info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
        info_webpage = self._download_webpage(info_url, video_id)
        video_url = self._html_search_regex(
            r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')

        upload_date_str = self._html_search_regex(
            r'"created_at":"([^"]+)"', info_webpage, u'upload date')
        upload_date = (
            unified_strdate(upload_date_str)
            if upload_date_str is not None
            else None
        )

        # subtitles
        video_subtitles = self.extract_subtitles(video_id, info_webpage)
        if self._downloader.params.get('listsubtitles', False):
            self._list_available_subtitles(video_id, info_webpage)
            return

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'description': description,
            'thumbnail': thumbnail,
            'age_limit': age_limit,
            'uploader': uploader,
            'subtitles': video_subtitles,
            'upload_date': upload_date,
        }

    def _get_available_subtitles(self, video_id, info_webpage):
        res = {}
        for sturl in re.findall(r'<track src="([^"]+)"/>'):
            m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
            if not m:
                continue
            res[m.group('lang')] = sturl
        return res
Commit	Line	Data
382ed50e PH	1	import re
	2
	3	from ..utils import (
	4	unified_strdate,
	5	)
	6	from .subtitles import SubtitlesInfoExtractor
	7
	8
	9	class VikiIE(SubtitlesInfoExtractor):
	10	IE_NAME = u'viki'
	11
	12	_VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
	13	_TEST = {
	14	u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
	15	u'file': u'1023585v.mp4',
	16	u'md5': u'a21454021c2646f5433514177e2caa5f',
	17	u'info_dict': {
	18	u'title': u'Heirs Episode 14',
	19	u'uploader': u'SBS',
	20	u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
	21	u'upload_date': u'20131121',
	22	u'age_limit': 13,
	23	}
	24	}
	25
	26	def _real_extract(self, url):
	27	mobj = re.match(self._VALID_URL, url)
	28	video_id = mobj.group(1)
	29
	30	webpage = self._download_webpage(url, video_id)
	31	title = self._og_search_title(webpage)
	32	description = self._og_search_description(webpage)
	33	thumbnail = self._og_search_thumbnail(webpage)
	34
	35	uploader = self._html_search_regex(
	36	r'<strong>Broadcast Network: </strong>\s([^<])<', webpage,
	37	u'uploader')
	38	if uploader is not None:
	39	uploader = uploader.strip()
	40
	41	rating_str = self._html_search_regex(
	42	r'<strong>Rating: </strong>\s([^<])<', webpage,
	43	u'rating information', default='').strip()
	44	RATINGS = {
	45	'G': 0,
	46	'PG': 10,
	47	'PG-13': 13,
	48	'R': 16,
	49	'NC': 18,
	50	}
	51	age_limit = RATINGS.get(rating_str)
	52
	53	info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
	54	info_webpage = self._download_webpage(info_url, video_id)
	55	video_url = self._html_search_regex(
	56	r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
	57
	58	upload_date_str = self._html_search_regex(
	59	r'"created_at":"([^"]+)"', info_webpage, u'upload date')
	60	upload_date = (
	61	unified_strdate(upload_date_str)
	62	if upload_date_str is not None
	63	else None
	64	)
65
66	# subtitles
67	video_subtitles = self.extract_subtitles(video_id, info_webpage)
68	if self._downloader.params.get('listsubtitles', False):
69	self._list_available_subtitles(video_id, info_webpage)
70	return
71
72	return {
73	'id': video_id,
74	'title': title,
75	'url': video_url,
76	'description': description,
77	'thumbnail': thumbnail,
78	'age_limit': age_limit,
79	'uploader': uploader,
80	'subtitles': video_subtitles,
81	'upload_date': upload_date,
82	}
83
84	def _get_available_subtitles(self, video_id, info_webpage):
85	res = {}
86	for sturl in re.findall(r'<track src="([^"]+)"/>'):
87	m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
88	if not m:
89	continue
90	res[m.group('lang')] = sturl
91	return res