[yt-dlp.git] / youtube_dl / extractor / wistia.py

from __future__ import unicode_literals

import json
import re

from .common import InfoExtractor


class WistiaIE(InfoExtractor):
    _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'

    _TEST = {
        'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
        'md5': 'cafeb56ec0c53c18c97405eecb3133df',
        'info_dict': {
            'id': 'sh7fpupwlt',
            'ext': 'mov',
            'title': 'Being Resourceful',
            'duration': 117,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
        data_json = self._html_search_regex(
            r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')

        data = json.loads(data_json)

        formats = []
        thumbnails = []
        for atype, a in data['assets'].items():
            if atype == 'still':
                thumbnails.append({
                    'url': a['url'],
                    'resolution': '%dx%d' % (a['width'], a['height']),
                })
                continue
            if atype == 'preview':
                continue
            formats.append({
                'format_id': atype,
                'url': a['url'],
                'width': a['width'],
                'height': a['height'],
                'filesize': a['size'],
                'ext': a['ext'],
                'preference': 1 if atype == 'original' else None,
            })

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': data['name'],
            'formats': formats,
            'thumbnails': thumbnails,
            'duration': data.get('duration'),
        }
Commit	Line	Data
e423e0ba S	1	from __future__ import unicode_literals
e423e0ba S	2
ef4fd848 PH	3	import json
	4	import re
	5
	6	from .common import InfoExtractor
	7
	8
	9	class WistiaIE(InfoExtractor):
e423e0ba	10	_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)'
ef4fd848 PH	11
ef4fd848 PH	12	_TEST = {
e423e0ba S	13	'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
	14	'md5': 'cafeb56ec0c53c18c97405eecb3133df',
	15	'info_dict': {
	16	'id': 'sh7fpupwlt',
	17	'ext': 'mov',
	18	'title': 'Being Resourceful',
	19	'duration': 117,
ef4fd848 PH	20	},
	21	}
	22
	23	def _real_extract(self, url):
	24	mobj = re.match(self._VALID_URL, url)
	25	video_id = mobj.group('id')
	26
	27	webpage = self._download_webpage(url, video_id)
	28	data_json = self._html_search_regex(
e423e0ba	29	r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data')
ef4fd848 PH	30
	31	data = json.loads(data_json)
	32
	33	formats = []
	34	thumbnails = []
	35	for atype, a in data['assets'].items():
	36	if atype == 'still':
	37	thumbnails.append({
	38	'url': a['url'],
	39	'resolution': '%dx%d' % (a['width'], a['height']),
	40	})
	41	continue
	42	if atype == 'preview':
	43	continue
	44	formats.append({
	45	'format_id': atype,
	46	'url': a['url'],
	47	'width': a['width'],
	48	'height': a['height'],
	49	'filesize': a['size'],
	50	'ext': a['ext'],
08d13955	51	'preference': 1 if atype == 'original' else None,
ef4fd848	52	})
539179f4 PH	53
539179f4 PH	54	self._sort_formats(formats)
ef4fd848 PH	55
	56	return {
	57	'id': video_id,
	58	'title': data['name'],
	59	'formats': formats,
	60	'thumbnails': thumbnails,
e423e0ba	61	'duration': data.get('duration'),
ef4fd848	62	}