[yt-dlp.git] / youtube_dl / extractor / onionstudios.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    determine_ext,
    int_or_none,
    float_or_none,
    mimetype2ext,
)


class OnionStudiosIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'

    _TESTS = [{
        'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
        'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
        'info_dict': {
            'id': '2937',
            'ext': 'mp4',
            'title': 'Hannibal charges forward, stops for a cocktail',
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'The A.V. Club',
            'uploader_id': 'the-av-club',
        },
    }, {
        'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
        if mobj:
            return mobj.group('url')

    def _real_extract(self, url):
        video_id = self._match_id(url)

        video_data = self._download_json(
            'http://www.onionstudios.com/video/%s.json' % video_id, video_id)

        title = video_data['title']

        formats = []
        for source in video_data.get('sources', []):
            source_url = source.get('url')
            if not source_url:
                continue
            ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
            else:
                tbr = int_or_none(source.get('bitrate'))
                formats.append({
                    'format_id': ext + ('-%d' % tbr if tbr else ''),
                    'url': source_url,
                    'width': int_or_none(source.get('width')),
                    'tbr': tbr,
                    'ext': ext,
                })
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': video_data.get('poster_url'),
            'uploader': video_data.get('channel_name'),
            'uploader_id': video_data.get('channel_slug'),
            'duration': float_or_none(video_data.get('duration', 1000)),
            'tags': video_data.get('tags'),
            'formats': formats,
        }
Commit	Line	Data
f843300f S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
bbb3f730	7	from ..utils import (
	8	determine_ext,
	9	int_or_none,
6c26815d	10	float_or_none,
ab49d7a9	11	mimetype2ext,
bbb3f730	12	)
f843300f S	13
	14
	15	class OnionStudiosIE(InfoExtractor):
	16	_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-\|embed\?.*\bid=)(?P<id>\d+)(?!-)'
	17
	18	_TESTS = [{
	19	'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
6c26815d	20	'md5': 'e49f947c105b8a78a675a0ee1bddedfe',
f843300f S	21	'info_dict': {
	22	'id': '2937',
	23	'ext': 'mp4',
	24	'title': 'Hannibal charges forward, stops for a cocktail',
ec85ded8	25	'thumbnail': r're:^https?://.*\.jpg$',
f843300f	26	'uploader': 'The A.V. Club',
6c26815d	27	'uploader_id': 'the-av-club',
f843300f S	28	},
	29	}, {
	30	'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
	31	'only_matching': True,
	32	}]
	33
d4f58034 S	34	@staticmethod
	35	def _extract_url(webpage):
	36	mobj = re.search(
	37	r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/embed.+?)\1', webpage)
	38	if mobj:
	39	return mobj.group('url')
	40
f843300f S	41	def _real_extract(self, url):
	42	video_id = self._match_id(url)
	43
6c26815d RA	44	video_data = self._download_json(
	45	'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
	46
	47	title = video_data['title']
f843300f S	48
f843300f S	49	formats = []
6c26815d RA	50	for source in video_data.get('sources', []):
	51	source_url = source.get('url')
	52	if not source_url:
	53	continue
ab49d7a9 RA	54	ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
ab49d7a9 RA	55	if ext == 'm3u8':
bbb3f730	56	formats.extend(self._extract_m3u8_formats(
6c26815d	57	source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
bbb3f730	58	else:
6c26815d	59	tbr = int_or_none(source.get('bitrate'))
f843300f	60	formats.append({
6c26815d RA	61	'format_id': ext + ('-%d' % tbr if tbr else ''),
	62	'url': source_url,
	63	'width': int_or_none(source.get('width')),
	64	'tbr': tbr,
bbb3f730	65	'ext': ext,
f843300f S	66	})
	67	self._sort_formats(formats)
	68
f843300f S	69	return {
	70	'id': video_id,
	71	'title': title,
6c26815d RA	72	'thumbnail': video_data.get('poster_url'),
	73	'uploader': video_data.get('channel_name'),
	74	'uploader_id': video_data.get('channel_slug'),
	75	'duration': float_or_none(video_data.get('duration', 1000)),
	76	'tags': video_data.get('tags'),
f843300f S	77	'formats': formats,
f843300f S	78	}