]>
Commit | Line | Data |
---|---|---|
94c4abce S |
1 | from __future__ import unicode_literals |
2 | ||
94c4abce | 3 | from .common import InfoExtractor |
1cc79574 | 4 | from ..compat import ( |
94c4abce S |
5 | compat_urllib_request, |
6 | compat_urllib_parse, | |
7 | ) | |
8 | ||
9 | ||
10 | class NFBIE(InfoExtractor): | |
11 | IE_NAME = 'nfb' | |
12 | IE_DESC = 'National Film Board of Canada' | |
1cc79574 | 13 | _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' |
94c4abce S |
14 | |
15 | _TEST = { | |
16 | 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', | |
17 | 'info_dict': { | |
18 | 'id': 'qallunaat_why_white_people_are_funny', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Qallunaat! Why White People Are Funny ', | |
21 | 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', | |
22 | 'duration': 3128, | |
23 | 'uploader': 'Mark Sandiford', | |
24 | 'uploader_id': 'mark-sandiford', | |
25 | }, | |
26 | 'params': { | |
27 | # rtmp download | |
28 | 'skip_download': True, | |
29 | } | |
30 | } | |
31 | ||
32 | def _real_extract(self, url): | |
1cc79574 PH |
33 | video_id = self._match_id(url) |
34 | page = self._download_webpage( | |
35 | 'https://www.nfb.ca/film/%s' % video_id, video_id, | |
36 | 'Downloading film page') | |
94c4abce S |
37 | |
38 | uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', | |
9e1a5b84 | 39 | page, 'director id', fatal=False) |
94c4abce | 40 | uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', |
9e1a5b84 | 41 | page, 'director name', fatal=False) |
94c4abce S |
42 | |
43 | request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, | |
9e1a5b84 | 44 | compat_urllib_parse.urlencode({'getConfig': 'true'}).encode('ascii')) |
94c4abce S |
45 | request.add_header('Content-Type', 'application/x-www-form-urlencoded') |
46 | request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') | |
47 | ||
48 | config = self._download_xml(request, video_id, 'Downloading player config XML') | |
49 | ||
c6fdba23 S |
50 | title = None |
51 | description = None | |
52 | thumbnail = None | |
53 | duration = None | |
54 | formats = [] | |
94c4abce | 55 | |
c6fdba23 S |
56 | def extract_thumbnail(media): |
57 | thumbnails = {} | |
58 | for asset in media.findall('assets/asset'): | |
59 | thumbnails[asset.get('quality')] = asset.find('default/url').text | |
60 | if not thumbnails: | |
61 | return None | |
62 | if 'high' in thumbnails: | |
63 | return thumbnails['high'] | |
64 | return list(thumbnails.values())[0] | |
65 | ||
66 | for media in config.findall('./player/stream/media'): | |
67 | if media.get('type') == 'posterImage': | |
68 | thumbnail = extract_thumbnail(media) | |
69 | elif media.get('type') == 'video': | |
70 | duration = int(media.get('duration')) | |
71 | title = media.find('title').text | |
72 | description = media.find('description').text | |
73 | # It seems assets always go from lower to better quality, so no need to sort | |
f71959fc S |
74 | for asset in media.findall('assets/asset'): |
75 | for x in asset: | |
76 | formats.append({ | |
77 | 'url': x.find('streamerURI').text, | |
78 | 'app': x.find('streamerURI').text.split('/', 3)[3], | |
79 | 'play_path': x.find('url').text, | |
80 | 'rtmp_live': False, | |
81 | 'ext': 'mp4', | |
82 | 'format_id': '%s-%s' % (x.tag, asset.get('quality')), | |
83 | }) | |
94c4abce S |
84 | |
85 | return { | |
86 | 'id': video_id, | |
87 | 'title': title, | |
88 | 'description': description, | |
89 | 'thumbnail': thumbnail, | |
90 | 'duration': duration, | |
91 | 'uploader': uploader, | |
92 | 'uploader_id': uploader_id, | |
93 | 'formats': formats, | |
5f6a1245 | 94 | } |