[yt-dlp.git] / youtube_dl / extractor / onionstudios.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    float_or_none,
    mimetype2ext,
)


class OnionStudiosIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'

    _TESTS = [{
        'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
        'md5': '719d1f8c32094b8c33902c17bcae5e34',
        'info_dict': {
            'id': '2937',
            'ext': 'mp4',
            'title': 'Hannibal charges forward, stops for a cocktail',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'The A.V. Club',
            'uploader_id': 'the-av-club',
        },
    }, {
        'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
        'only_matching': True,
    }, {
        'url': 'http://www.onionstudios.com/video/6139.json',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
        if mobj:
            return mobj.group('url')

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_data = self._download_json(
            'http://www.onionstudios.com/video/%s.json' % video_id, video_id)

        title = video_data['title']

        formats = []
        for source in video_data.get('sources', []):
            source_url = source.get('url')
            if not source_url:
                continue
            ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
            else:
                tbr = int_or_none(source.get('bitrate'))
                formats.append({
                    'format_id': ext + ('-%d' % tbr if tbr else ''),
                    'url': source_url,
                    'width': int_or_none(source.get('width')),
                    'tbr': tbr,
                    'ext': ext,
                })
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': video_data.get('poster_url'),
            'uploader': video_data.get('channel_name'),
            'uploader_id': video_data.get('channel_slug'),
            'duration': float_or_none(video_data.get('duration', 1000)),
            'tags': video_data.get('tags'),
            'formats': formats,
        }
Commit	Line	Data
f843300f S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
bbb3f730	7	from ..utils import (
	8	determine_ext,
	9	int_or_none,
6c26815d	10	float_or_none,
ab49d7a9	11	mimetype2ext,
bbb3f730	12	)
f843300f S	13
	14
	15	class OnionStudiosIE(InfoExtractor):
b0def2c2	16	_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-\|/)\|embed\?.*\bid=)(?P<id>\d+)(?!-)'
f843300f S	17
	18	_TESTS = [{
	19	'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
b0def2c2	20	'md5': '719d1f8c32094b8c33902c17bcae5e34',
f843300f S	21	'info_dict': {
	22	'id': '2937',
	23	'ext': 'mp4',
	24	'title': 'Hannibal charges forward, stops for a cocktail',
ec85ded8	25	'thumbnail': r're:^https?://.*\.jpg$',
f843300f	26	'uploader': 'The A.V. Club',
6c26815d	27	'uploader_id': 'the-av-club',
f843300f S	28	},
	29	}, {
	30	'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
	31	'only_matching': True,
b0def2c2 RA	32	}, {
	33	'url': 'http://www.onionstudios.com/video/6139.json',
	34	'only_matching': True,
f843300f S	35	}]
f843300f S	36
d4f58034 S	37	@staticmethod
	38	def _extract_url(webpage):
	39	mobj = re.search(
b0def2c2	40	r'(?s)<(?:iframe\|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?\|video/\d+\.json))\1', webpage)
d4f58034 S	41	if mobj:
	42	return mobj.group('url')
	43
f843300f S	44	def _real_extract(self, url):
	45	video_id = self._match_id(url)
	46
6c26815d RA	47	video_data = self._download_json(
	48	'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
	49
	50	title = video_data['title']
f843300f S	51
f843300f S	52	formats = []
6c26815d RA	53	for source in video_data.get('sources', []):
	54	source_url = source.get('url')
	55	if not source_url:
	56	continue
ab49d7a9 RA	57	ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
ab49d7a9 RA	58	if ext == 'm3u8':
bbb3f730	59	formats.extend(self._extract_m3u8_formats(
6c26815d	60	source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
bbb3f730	61	else:
6c26815d	62	tbr = int_or_none(source.get('bitrate'))
f843300f	63	formats.append({
6c26815d RA	64	'format_id': ext + ('-%d' % tbr if tbr else ''),
	65	'url': source_url,
	66	'width': int_or_none(source.get('width')),
	67	'tbr': tbr,
bbb3f730	68	'ext': ext,
f843300f S	69	})
	70	self._sort_formats(formats)
	71
f843300f S	72	return {
	73	'id': video_id,
	74	'title': title,
6c26815d RA	75	'thumbnail': video_data.get('poster_url'),
	76	'uploader': video_data.get('channel_name'),
	77	'uploader_id': video_data.get('channel_slug'),
	78	'duration': float_or_none(video_data.get('duration', 1000)),
	79	'tags': video_data.get('tags'),
f843300f S	80	'formats': formats,
f843300f S	81	}