]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/videa.py
[videa] Add extractor
[yt-dlp.git] / youtube_dl / extractor / videa.py
CommitLineData
e7460215
B
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5
6from .common import InfoExtractor
7from ..utils import (
8 int_or_none,
9 parse_duration,
10 xpath_element,
11 xpath_text,
12 xpath_attr,
13 urlencode_postdata,
14 unescapeHTML,
15)
16
17
18class VideaIE(InfoExtractor):
19 _VALID_URL = r'https?://(?:.+?\.)?videa\.hu/videok/(?P<id>[^#?]+)'
20 _TESTS = [{
21 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
22 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
23 'info_dict': {
24 'id': '8YfIAjxwWGwT8HVQ',
25 'display_id': '8YfIAjxwWGwT8HVQ',
26 'ext': 'mp4',
27 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
28 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
29 'duration': 21,
30 },
31 }, {
32 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
33 'only_matching': True,
34 }]
35
36 def _real_extract(self, url):
37 video_id = self._match_id(url)
38
39 video_data = self._download_json("http://videa.hu/oembed/?" + urlencode_postdata({"url": url.split('?')[0], "format": "json"}), video_id)
40 video_url = self._search_regex(
41 r'src="(.+?)"', video_data.get('html'), 'embed url')
42
43 return {
44 '_type': 'url_transparent',
45 'url': video_url,
46 'ie_key': 'VideaEmbed'
47 }
48
49class VideaEmbedIE(InfoExtractor):
50 _VALID_URL = r'(?P<protocol>https?:)(?P<baseurl>//(?:.+?\.)?videa\.hu)/player(?:\?v=|/v/)(?P<id>[^/#?]+)';
51 _TESTS = [{
52 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
53 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
54 'info_dict': {
55 'id': '8YfIAjxwWGwT8HVQ',
56 'ext': 'mp4',
57 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
58 'thumbnail': 'http://videa.hu/static/still/1.4.1.1007274.1204470.3',
59 'duration': 21
60 },
61 }, {
62 'url': 'http://videa.hu/player?v=jAHDWfWSJH5XuFhH',
63 'only_matching': True,
64 }];
65
66 @staticmethod
67 def _extract_url(webpage):
68 mobj = re.search(
69 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:.+?\.)?videa\.hu/player(?:\?v=|/v/)[^/#?]+)\1',
70 webpage)
71 if mobj:
72 return mobj.group('url')
73
74 def _real_extract(self, url):
75 protocol, base_url, display_id = re.search(self._VALID_URL, url).groups()
76 xml = self._download_xml(protocol + base_url + "/flvplayer_get_video_xml.php?v=" + display_id, display_id)
77
78 medias = []
79
80 for xml_media in xml.findall('video') + xml.findall('audio'):
81 media_url = protocol + xpath_attr(xml_media, 'versions/version', 'video_url')
82 media = {
83 'id': display_id,
84 'ext': 'mp4',
85 'title': xpath_text(xml_media, 'title', 'title', True),
86 'duration': parse_duration(xpath_text(xml_media, 'duration')),
87 'thumbnail': protocol + xpath_text(xml_media, 'still', 'still', True),
88 'url': media_url,
89 }
90 medias.append(media)
91
92 if len(medias) > 1:
93 self._downloader.report_warning(
94 'found multiple medias; please '
95 'report this with the video URL to http://yt-dl.org/bug')
96 if not medias:
97 raise ExtractorError('No media entries found')
98 return medias[0]