]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/streamcloud.py
release 2016.06.20
[yt-dlp.git] / youtube_dl / extractor / streamcloud.py
CommitLineData
02e4ebbb 1# coding: utf-8
71aa656d
S
2from __future__ import unicode_literals
3
02e4ebbb 4import re
02e4ebbb
PH
5
6from .common import InfoExtractor
6e6bc8da 7from ..utils import (
84dcd1c4 8 ExtractorError,
6e6bc8da
S
9 sanitized_Request,
10 urlencode_postdata,
11)
02e4ebbb
PH
12
13
14class StreamcloudIE(InfoExtractor):
71aa656d 15 IE_NAME = 'streamcloud.eu'
3c6af203 16 _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
02e4ebbb 17
84dcd1c4 18 _TESTS = [{
71aa656d
S
19 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
20 'md5': '6bea4c7fa5daaacc2a946b7146286686',
21 'info_dict': {
22 'id': 'skp9j99s4bpz',
23 'ext': 'mp4',
24 'title': 'youtube-dl test video \'/\\ ä ↭',
02e4ebbb 25 },
71aa656d 26 'skip': 'Only available from the EU'
84dcd1c4
S
27 }, {
28 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
29 'only_matching': True,
30 }]
02e4ebbb
PH
31
32 def _real_extract(self, url):
3c6af203
NJ
33 video_id = self._match_id(url)
34 url = 'http://streamcloud.eu/%s' % video_id
02e4ebbb
PH
35
36 orig_webpage = self._download_webpage(url, video_id)
37
84dcd1c4
S
38 if '>File Not Found<' in orig_webpage:
39 raise ExtractorError(
40 'Video %s does not exist' % video_id, expected=True)
41
02e4ebbb
PH
42 fields = re.findall(r'''(?x)<input\s+
43 type="(?:hidden|submit)"\s+
44 name="([^"]+)"\s+
45 (?:id="[^"]+"\s+)?
46 value="([^"]*)"
47 ''', orig_webpage)
6e6bc8da 48 post = urlencode_postdata(fields)
02e4ebbb 49
6f9b5493 50 self._sleep(12, video_id)
02e4ebbb
PH
51 headers = {
52 b'Content-Type': b'application/x-www-form-urlencoded',
53 }
5c2266df 54 req = sanitized_Request(url, post, headers)
02e4ebbb
PH
55
56 webpage = self._download_webpage(
71aa656d 57 req, video_id, note='Downloading video page ...')
02e4ebbb 58 title = self._html_search_regex(
71aa656d 59 r'<h1[^>]*>([^<]+)<', webpage, 'title')
02e4ebbb 60 video_url = self._search_regex(
71aa656d 61 r'file:\s*"([^"]+)"', webpage, 'video URL')
02e4ebbb 62 thumbnail = self._search_regex(
71aa656d 63 r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
02e4ebbb
PH
64
65 return {
66 'id': video_id,
67 'title': title,
68 'url': video_url,
02e4ebbb
PH
69 'thumbnail': thumbnail,
70 }