[yt-dlp.git] / youtube_dl / extractor / streamcloud.py

# coding: utf-8
import re
import time

from .common import InfoExtractor
from ..utils import (
    compat_urllib_parse,
    compat_urllib_request,
)


class StreamcloudIE(InfoExtractor):
    IE_NAME = u'streamcloud.eu'
    _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'

    _TEST = {
        u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
        u'file': u'skp9j99s4bpz.mp4',
        u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
        u'info_dict': {
            u'title': u'youtube-dl test video  \'/\\ ä ↭',
            u'duration': 9,
        },
        u'skip': u'Only available from the EU'
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')

        orig_webpage = self._download_webpage(url, video_id)

        fields = re.findall(r'''(?x)<input\s+
            type="(?:hidden|submit)"\s+
            name="([^"]+)"\s+
            (?:id="[^"]+"\s+)?
            value="([^"]*)"
            ''', orig_webpage)
        post = compat_urllib_parse.urlencode(fields)

        self.to_screen('%s: Waiting for timeout' % video_id)
        time.sleep(12)
        headers = {
            b'Content-Type': b'application/x-www-form-urlencoded',
        }
        req = compat_urllib_request.Request(url, post, headers)

        webpage = self._download_webpage(
            req, video_id, note=u'Downloading video page ...')
        title = self._html_search_regex(
            r'<h1[^>]*>([^<]+)<', webpage, u'title')
        video_url = self._search_regex(
            r'file:\s*"([^"]+)"', webpage, u'video URL')
        duration_str = self._search_regex(
            r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
        duration = None if duration_str is None else int(duration_str)
        thumbnail = self._search_regex(
            r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)

        return {
            'id': video_id,
            'title': title,
            'url': video_url,
            'duration': duration,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
02e4ebbb PH	1	# coding: utf-8
	2	import re
	3	import time
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	compat_urllib_parse,
	8	compat_urllib_request,
	9	)
	10
	11
	12	class StreamcloudIE(InfoExtractor):
	13	IE_NAME = u'streamcloud.eu'
	14	_VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
	15
	16	_TEST = {
	17	u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
	18	u'file': u'skp9j99s4bpz.mp4',
	19	u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
	20	u'info_dict': {
	21	u'title': u'youtube-dl test video \'/\\ ä ↭',
	22	u'duration': 9,
	23	},
e5c146d5	24	u'skip': u'Only available from the EU'
02e4ebbb PH	25	}
	26
	27	def _real_extract(self, url):
	28	mobj = re.match(self._VALID_URL, url)
	29	video_id = mobj.group('id')
	30
	31	orig_webpage = self._download_webpage(url, video_id)
	32
	33	fields = re.findall(r'''(?x)<input\s+
	34	type="(?:hidden\|submit)"\s+
	35	name="([^"]+)"\s+
	36	(?:id="[^"]+"\s+)?
	37	value="([^"]*)"
	38	''', orig_webpage)
	39	post = compat_urllib_parse.urlencode(fields)
	40
	41	self.to_screen('%s: Waiting for timeout' % video_id)
	42	time.sleep(12)
	43	headers = {
	44	b'Content-Type': b'application/x-www-form-urlencoded',
	45	}
	46	req = compat_urllib_request.Request(url, post, headers)
	47
	48	webpage = self._download_webpage(
	49	req, video_id, note=u'Downloading video page ...')
	50	title = self._html_search_regex(
	51	r'<h1[^>]*>([^<]+)<', webpage, u'title')
	52	video_url = self._search_regex(
	53	r'file:\s*"([^"]+)"', webpage, u'video URL')
	54	duration_str = self._search_regex(
	55	r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
	56	duration = None if duration_str is None else int(duration_str)
	57	thumbnail = self._search_regex(
	58	r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
	59
	60	return {
	61	'id': video_id,
	62	'title': title,
	63	'url': video_url,
	64	'duration': duration,
	65	'thumbnail': thumbnail,
	66	}