]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | ||
3 | from .common import InfoExtractor | |
4 | ||
5 | class BRIE(InfoExtractor): | |
6 | ||
7 | IE_DESC = u"Bayerischer Rundfunk Mediathek" | |
8 | _VALID_URL = r"^https?://(?:www\.)?br\.de/mediathek/video/(?:sendungen/)?(?:[a-z0-9\-]+\.html)$" | |
9 | _BASE_URL = u"http://www.br.de" | |
10 | ||
11 | _TESTS = [ | |
12 | { | |
13 | u"url": u"http://www.br.de/mediathek/video/anselm-gruen-114.html", | |
14 | u"file": u"2c8d81c5-6fb7-4a74-88d4-e768e5856532.mp4", | |
15 | u"md5": u"c4f83cf0f023ba5875aba0bf46860df2", | |
16 | u"info_dict": { | |
17 | u"title": u"Feiern und Verzichten", | |
18 | u"description": u"Anselm GrĂ¼n: Feiern und Verzichten", | |
19 | u"uploader": u"BR/Birgit Baier", | |
20 | u"upload_date": u"20140301" | |
21 | } | |
22 | } | |
23 | ] | |
24 | ||
25 | def _real_extract(self, url): | |
26 | page = self._download_webpage(url, None) | |
27 | xml_url = self._search_regex(r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/mediathek/video/[a-z0-9/~_.-]+)'}\)\);", page, "XMLURL") | |
28 | xml = self._download_xml(self._BASE_URL + xml_url, None) | |
29 | ||
30 | videos = [] | |
31 | for xml_video in xml.findall("video"): | |
32 | video = {} | |
33 | video["id"] = xml_video.get("externalId") | |
34 | video["title"] = xml_video.find("title").text | |
35 | video["formats"] = self._extract_formats(xml_video.find("assets")) | |
36 | video["thumbnails"] = self._extract_thumbnails(xml_video.find("teaserImage/variants")) | |
37 | video["thumbnail"] = video["thumbnails"][0]["url"] | |
38 | video["description"] = " ".join(xml_video.find("shareTitle").text.splitlines()) | |
39 | video["uploader"] = xml_video.find("author").text | |
40 | video["upload_date"] = "".join(reversed(xml_video.find("broadcastDate").text.split("."))) | |
41 | video["webpage_url"] = xml_video.find("permalink").text | |
42 | videos.append(video) | |
43 | ||
44 | if len(videos) > 1: | |
45 | self._downloader.report_warning(u'found multiple videos; please' | |
46 | u'report this with the video URL to http://yt-dl.org/bug') | |
47 | return videos[0] | |
48 | ||
49 | def _extract_formats(self, assets): | |
50 | vformats = [] | |
51 | for asset in assets.findall("asset"): | |
52 | if asset.find("downloadUrl") is None: | |
53 | continue | |
54 | vformat = {} | |
55 | vformat["url"] = asset.find("downloadUrl").text | |
56 | vformat["ext"] = asset.find("mediaType").text | |
57 | vformat["format_id"] = asset.get("type") | |
58 | vformat["width"] = int(asset.find("frameWidth").text) | |
59 | vformat["height"] = int(asset.find("frameHeight").text) | |
60 | vformat["resolution"] = "%ix%i" % (vformat["width"], vformat["height"]) | |
61 | vformat["tbr"] = int(asset.find("bitrateVideo").text) | |
62 | vformat["abr"] = int(asset.find("bitrateAudio").text) | |
63 | vformat["vcodec"] = asset.find("codecVideo").text | |
64 | vformat["container"] = vformat["ext"] | |
65 | vformat["filesize"] = int(asset.find("size").text) | |
66 | vformat["preference"] = vformat["quality"] = -1 | |
67 | vformat["format"] = "%s container with %i Kbps %s" % (vformat["container"], vformat["tbr"], vformat["vcodec"]) | |
68 | vformats.append(vformat) | |
69 | self._sort_formats(vformats) | |
70 | return vformats | |
71 | ||
72 | def _extract_thumbnails(self, variants): | |
73 | thumbnails = [] | |
74 | for variant in variants.findall("variant"): | |
75 | thumbnail = {} | |
76 | thumbnail["url"] = self._BASE_URL + variant.find("url").text | |
77 | thumbnail["width"] = int(variant.find("width").text) | |
78 | thumbnail["height"] = int(variant.find("height").text) | |
79 | thumbnail["resolution"] = "%ix%i" % (thumbnail["width"], thumbnail["height"]) | |
80 | thumbnails.append(thumbnail) | |
81 | thumbnails.sort(key = lambda x: x["width"] * x["height"], reverse=True) | |
82 | return thumbnails |