[yt-dlp.git] / youtube_dl / extractor / viddler.py

import json
import re

from .common import InfoExtractor
from ..utils import (
    determine_ext,
)


class ViddlerIE(InfoExtractor):
    _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
    _TEST = {
        u"url": u"http://www.viddler.com/v/43903784",
        u'file': u'43903784.mp4',
        u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
        u'info_dict': {
            u"title": u"Video Made Easy",
            u"uploader": u"viddler",
            u"duration": 100.89,
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        embed_url = mobj.group('domain') + u'/embed/' + video_id
        webpage = self._download_webpage(embed_url, video_id)

        video_sources_code = self._search_regex(
            r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
        video_sources = json.loads(video_sources_code.replace("'", '"'))

        formats = [{
            'url': video_url,
            'format': format_id,
        } for video_url, format_id in video_sources.items()]

        title = self._html_search_regex(
            r"title\s*:\s*'([^']*)'", webpage, u'title')
        uploader = self._html_search_regex(
            r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
        duration_s = self._html_search_regex(
            r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
        duration = float(duration_s) if duration_s else None
        thumbnail = self._html_search_regex(
            r"thumbnail\s*:\s*'([^']*)'",
            webpage, u'thumbnail', fatal=False)

        info = {
            '_type': 'video',
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'uploader': uploader,
            'duration': duration,
            'formats': formats,
        }

        # TODO: Remove when #980 has been merged
        info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
        info.update(info['formats'][-1])

        return info
Commit	Line	Data
41e8bca4 PH	1	import json
	2	import re
	3
	4	from .common import InfoExtractor
	5	from ..utils import (
	6	determine_ext,
	7	)
	8
	9
	10	class ViddlerIE(InfoExtractor):
12ebdd15	11	_VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler.com)/(?:v\|embed\|player)/(?P<id>[a-z0-9]+)'
41e8bca4 PH	12	_TEST = {
	13	u"url": u"http://www.viddler.com/v/43903784",
	14	u'file': u'43903784.mp4',
	15	u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
	16	u'info_dict': {
	17	u"title": u"Video Made Easy",
	18	u"uploader": u"viddler",
	19	u"duration": 100.89,
	20	}
	21	}
	22
	23	def _real_extract(self, url):
	24	mobj = re.match(self._VALID_URL, url)
	25	video_id = mobj.group('id')
	26
	27	embed_url = mobj.group('domain') + u'/embed/' + video_id
	28	webpage = self._download_webpage(embed_url, video_id)
	29
	30	video_sources_code = self._search_regex(
	31	r"(?ms)sources\s:\s(\{.*?\})", webpage, u'video URLs')
	32	video_sources = json.loads(video_sources_code.replace("'", '"'))
	33
	34	formats = [{
	35	'url': video_url,
	36	'format': format_id,
	37	} for video_url, format_id in video_sources.items()]
	38
	39	title = self._html_search_regex(
	40	r"title\s:\s'([^']*)'", webpage, u'title')
	41	uploader = self._html_search_regex(
	42	r"authorName\s:\s'([^']*)'", webpage, u'uploader', fatal=False)
	43	duration_s = self._html_search_regex(
	44	r"duration\s:\s([0-9.]*)", webpage, u'duration', fatal=False)
	45	duration = float(duration_s) if duration_s else None
	46	thumbnail = self._html_search_regex(
	47	r"thumbnail\s:\s'([^']*)'",
	48	webpage, u'thumbnail', fatal=False)
	49
	50	info = {
	51	'_type': 'video',
	52	'id': video_id,
	53	'title': title,
	54	'thumbnail': thumbnail,
	55	'uploader': uploader,
	56	'duration': duration,
	57	'formats': formats,
	58	}
	59
	60	# TODO: Remove when #980 has been merged
	61	info['formats'][-1]['ext'] = determine_ext(info['formats'][-1]['url'])
	62	info.update(info['formats'][-1])
	63
	64	return info