]>
Commit | Line | Data |
---|---|---|
3798eadc PH |
1 | from __future__ import unicode_literals |
2 | ||
5fe3a3c3 | 3 | from .common import InfoExtractor |
e8e28989 | 4 | from ..utils import unified_strdate |
5fe3a3c3 PH |
5 | |
6 | ||
7 | class ArchiveOrgIE(InfoExtractor): | |
8 | IE_NAME = 'archive.org' | |
9 | IE_DESC = 'archive.org videos' | |
e8e28989 S |
10 | _VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' |
11 | _TESTS = [{ | |
12 | 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', | |
3798eadc PH |
13 | 'md5': '8af1d4cf447933ed3c7f4871162602db', |
14 | 'info_dict': { | |
e8e28989 S |
15 | 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', |
16 | 'ext': 'ogv', | |
17 | 'title': '1968 Demo - FJCC Conference Presentation Reel #1', | |
18 | 'description': 'md5:1780b464abaca9991d8968c877bb53ed', | |
19 | 'upload_date': '19681210', | |
20 | 'uploader': 'SRI International' | |
5fe3a3c3 | 21 | } |
e8e28989 S |
22 | }, { |
23 | 'url': 'https://archive.org/details/Cops1922', | |
24 | 'md5': '18f2a19e6d89af8425671da1cf3d4e04', | |
25 | 'info_dict': { | |
26 | 'id': 'Cops1922', | |
27 | 'ext': 'ogv', | |
28 | 'title': 'Buster Keaton\'s "Cops" (1922)', | |
29 | 'description': 'md5:70f72ee70882f713d4578725461ffcc3', | |
30 | } | |
31 | }] | |
ff7a07d5 | 32 | |
5fe3a3c3 | 33 | def _real_extract(self, url): |
e8e28989 | 34 | video_id = self._match_id(url) |
5fe3a3c3 | 35 | |
d6a17388 | 36 | json_url = url + ('&' if '?' in url else '?') + 'output=json' |
e8e28989 S |
37 | data = self._download_json(json_url, video_id) |
38 | ||
39 | def get_optional(data_dict, field): | |
40 | return data_dict['metadata'].get(field, [None])[0] | |
5fe3a3c3 | 41 | |
e8e28989 S |
42 | title = get_optional(data, 'title') |
43 | description = get_optional(data, 'description') | |
44 | uploader = get_optional(data, 'creator') | |
45 | upload_date = unified_strdate(get_optional(data, 'date')) | |
5fe3a3c3 | 46 | |
42154ad5 PH |
47 | formats = [ |
48 | { | |
5fe3a3c3 PH |
49 | 'format': fdata['format'], |
50 | 'url': 'http://' + data['server'] + data['dir'] + fn, | |
51 | 'file_size': int(fdata['size']), | |
52 | } | |
42154ad5 | 53 | for fn, fdata in data['files'].items() |
5fe3a3c3 | 54 | if 'Video' in fdata['format']] |
42154ad5 PH |
55 | |
56 | self._sort_formats(formats) | |
5fe3a3c3 | 57 | |
fb7abb31 | 58 | return { |
690e872c | 59 | '_type': 'video', |
5fe3a3c3 PH |
60 | 'id': video_id, |
61 | 'title': title, | |
62 | 'formats': formats, | |
63 | 'description': description, | |
64 | 'uploader': uploader, | |
65 | 'upload_date': upload_date, | |
fb7abb31 | 66 | 'thumbnail': data.get('misc', {}).get('image'), |
5fe3a3c3 | 67 | } |