]>
Commit | Line | Data |
---|---|---|
94c4abce S |
1 | from __future__ import unicode_literals |
2 | ||
94c4abce | 3 | from .common import InfoExtractor |
6e6bc8da S |
4 | from ..utils import ( |
5 | sanitized_Request, | |
6 | urlencode_postdata, | |
7 | ) | |
94c4abce S |
8 | |
9 | ||
10 | class NFBIE(InfoExtractor): | |
11 | IE_NAME = 'nfb' | |
12 | IE_DESC = 'National Film Board of Canada' | |
1cc79574 | 13 | _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' |
94c4abce S |
14 | |
15 | _TEST = { | |
16 | 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', | |
17 | 'info_dict': { | |
18 | 'id': 'qallunaat_why_white_people_are_funny', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Qallunaat! Why White People Are Funny ', | |
21 | 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', | |
22 | 'duration': 3128, | |
23 | 'uploader': 'Mark Sandiford', | |
24 | 'uploader_id': 'mark-sandiford', | |
25 | }, | |
26 | 'params': { | |
27 | # rtmp download | |
28 | 'skip_download': True, | |
29 | } | |
30 | } | |
31 | ||
32 | def _real_extract(self, url): | |
1cc79574 PH |
33 | video_id = self._match_id(url) |
34 | page = self._download_webpage( | |
35 | 'https://www.nfb.ca/film/%s' % video_id, video_id, | |
36 | 'Downloading film page') | |
94c4abce S |
37 | |
38 | uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', | |
9e1a5b84 | 39 | page, 'director id', fatal=False) |
94c4abce | 40 | uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', |
9e1a5b84 | 41 | page, 'director name', fatal=False) |
94c4abce | 42 | |
5c2266df S |
43 | request = sanitized_Request( |
44 | 'https://www.nfb.ca/film/%s/player_config' % video_id, | |
6e6bc8da | 45 | urlencode_postdata({'getConfig': 'true'})) |
94c4abce S |
46 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') |
47 | request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') | |
48 | ||
49 | config = self._download_xml(request, video_id, 'Downloading player config XML') | |
50 | ||
c6fdba23 S |
51 | title = None |
52 | description = None | |
53 | thumbnail = None | |
54 | duration = None | |
55 | formats = [] | |
94c4abce | 56 | |
c6fdba23 S |
57 | def extract_thumbnail(media): |
58 | thumbnails = {} | |
59 | for asset in media.findall('assets/asset'): | |
60 | thumbnails[asset.get('quality')] = asset.find('default/url').text | |
61 | if not thumbnails: | |
62 | return None | |
63 | if 'high' in thumbnails: | |
64 | return thumbnails['high'] | |
65 | return list(thumbnails.values())[0] | |
66 | ||
67 | for media in config.findall('./player/stream/media'): | |
68 | if media.get('type') == 'posterImage': | |
69 | thumbnail = extract_thumbnail(media) | |
70 | elif media.get('type') == 'video': | |
71 | duration = int(media.get('duration')) | |
72 | title = media.find('title').text | |
73 | description = media.find('description').text | |
74 | # It seems assets always go from lower to better quality, so no need to sort | |
f71959fc S |
75 | for asset in media.findall('assets/asset'): |
76 | for x in asset: | |
77 | formats.append({ | |
78 | 'url': x.find('streamerURI').text, | |
79 | 'app': x.find('streamerURI').text.split('/', 3)[3], | |
80 | 'play_path': x.find('url').text, | |
81 | 'rtmp_live': False, | |
82 | 'ext': 'mp4', | |
83 | 'format_id': '%s-%s' % (x.tag, asset.get('quality')), | |
84 | }) | |
94c4abce S |
85 | |
86 | return { | |
87 | 'id': video_id, | |
88 | 'title': title, | |
89 | 'description': description, | |
90 | 'thumbnail': thumbnail, | |
91 | 'duration': duration, | |
92 | 'uploader': uploader, | |
93 | 'uploader_id': uploader_id, | |
94 | 'formats': formats, | |
5f6a1245 | 95 | } |