]>
Commit | Line | Data |
---|---|---|
94c4abce S |
1 | from __future__ import unicode_literals |
2 | ||
94c4abce | 3 | from .common import InfoExtractor |
6e6bc8da | 4 | from ..utils import ( |
11e6a0b6 S |
5 | clean_html, |
6 | determine_ext, | |
7 | int_or_none, | |
8 | qualities, | |
6e6bc8da | 9 | urlencode_postdata, |
11e6a0b6 | 10 | xpath_text, |
6e6bc8da | 11 | ) |
94c4abce S |
12 | |
13 | ||
14 | class NFBIE(InfoExtractor): | |
15 | IE_NAME = 'nfb' | |
16 | IE_DESC = 'National Film Board of Canada' | |
1cc79574 | 17 | _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' |
94c4abce S |
18 | |
19 | _TEST = { | |
20 | 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', | |
21 | 'info_dict': { | |
22 | 'id': 'qallunaat_why_white_people_are_funny', | |
11e6a0b6 | 23 | 'ext': 'flv', |
94c4abce | 24 | 'title': 'Qallunaat! Why White People Are Funny ', |
11e6a0b6 | 25 | 'description': 'md5:6b8e32dde3abf91e58857b174916620c', |
94c4abce | 26 | 'duration': 3128, |
11e6a0b6 | 27 | 'creator': 'Mark Sandiford', |
94c4abce | 28 | 'uploader': 'Mark Sandiford', |
94c4abce S |
29 | }, |
30 | 'params': { | |
31 | # rtmp download | |
32 | 'skip_download': True, | |
33 | } | |
34 | } | |
35 | ||
36 | def _real_extract(self, url): | |
1cc79574 | 37 | video_id = self._match_id(url) |
94c4abce | 38 | |
11e6a0b6 | 39 | config = self._download_xml( |
5c2266df | 40 | 'https://www.nfb.ca/film/%s/player_config' % video_id, |
11e6a0b6 S |
41 | video_id, 'Downloading player config XML', |
42 | data=urlencode_postdata({'getConfig': 'true'}), | |
43 | headers={ | |
44 | 'Content-Type': 'application/x-www-form-urlencoded', | |
45 | 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf' | |
46 | }) | |
94c4abce | 47 | |
11e6a0b6 S |
48 | title, description, thumbnail, duration, uploader, author = [None] * 6 |
49 | thumbnails, formats = [[]] * 2 | |
50 | subtitles = {} | |
c6fdba23 S |
51 | |
52 | for media in config.findall('./player/stream/media'): | |
53 | if media.get('type') == 'posterImage': | |
11e6a0b6 S |
54 | quality_key = qualities(('low', 'high')) |
55 | thumbnails = [] | |
56 | for asset in media.findall('assets/asset'): | |
57 | asset_url = xpath_text(asset, 'default/url', default=None) | |
58 | if not asset_url: | |
59 | continue | |
60 | quality = asset.get('quality') | |
61 | thumbnails.append({ | |
62 | 'url': asset_url, | |
63 | 'id': quality, | |
64 | 'preference': quality_key(quality), | |
65 | }) | |
c6fdba23 | 66 | elif media.get('type') == 'video': |
11e6a0b6 | 67 | title = xpath_text(media, 'title', fatal=True) |
f71959fc | 68 | for asset in media.findall('assets/asset'): |
11e6a0b6 S |
69 | quality = asset.get('quality') |
70 | height = int_or_none(self._search_regex( | |
71 | r'^(\d+)[pP]$', quality or '', 'height', default=None)) | |
72 | for node in asset: | |
73 | streamer = xpath_text(node, 'streamerURI', default=None) | |
74 | if not streamer: | |
75 | continue | |
76 | play_path = xpath_text(node, 'url', default=None) | |
77 | if not play_path: | |
78 | continue | |
f71959fc | 79 | formats.append({ |
11e6a0b6 S |
80 | 'url': streamer, |
81 | 'app': streamer.split('/', 3)[3], | |
82 | 'play_path': play_path, | |
f71959fc | 83 | 'rtmp_live': False, |
11e6a0b6 S |
84 | 'ext': 'flv', |
85 | 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag, | |
86 | 'height': height, | |
f71959fc | 87 | }) |
11e6a0b6 S |
88 | self._sort_formats(formats) |
89 | description = clean_html(xpath_text(media, 'description')) | |
90 | uploader = xpath_text(media, 'author') | |
91 | duration = int_or_none(media.get('duration')) | |
92 | for subtitle in media.findall('./subtitles/subtitle'): | |
93 | subtitle_url = xpath_text(subtitle, 'url', default=None) | |
94 | if not subtitle_url: | |
95 | continue | |
96 | lang = xpath_text(subtitle, 'lang', default='en') | |
97 | subtitles.setdefault(lang, []).append({ | |
98 | 'url': subtitle_url, | |
99 | 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(), | |
100 | }) | |
94c4abce S |
101 | |
102 | return { | |
103 | 'id': video_id, | |
104 | 'title': title, | |
105 | 'description': description, | |
11e6a0b6 | 106 | 'thumbnails': thumbnails, |
94c4abce | 107 | 'duration': duration, |
11e6a0b6 | 108 | 'creator': uploader, |
94c4abce | 109 | 'uploader': uploader, |
94c4abce | 110 | 'formats': formats, |
11e6a0b6 | 111 | 'subtitles': subtitles, |
5f6a1245 | 112 | } |