]>
Commit | Line | Data |
---|---|---|
3798eadc PH |
1 | from __future__ import unicode_literals |
2 | ||
5fe3a3c3 PH |
3 | import json |
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
5fe3a3c3 PH |
8 | unified_strdate, |
9 | ) | |
10 | ||
11 | ||
12 | class ArchiveOrgIE(InfoExtractor): | |
13 | IE_NAME = 'archive.org' | |
14 | IE_DESC = 'archive.org videos' | |
c0ade33e | 15 | _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$' |
5fe3a3c3 | 16 | _TEST = { |
3798eadc PH |
17 | "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", |
18 | 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', | |
19 | 'md5': '8af1d4cf447933ed3c7f4871162602db', | |
20 | 'info_dict': { | |
21 | "title": "1968 Demo - FJCC Conference Presentation Reel #1", | |
22 | "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>", | |
23 | "upload_date": "19681210", | |
24 | "uploader": "SRI International" | |
5fe3a3c3 PH |
25 | } |
26 | } | |
27 | ||
5fe3a3c3 PH |
28 | def _real_extract(self, url): |
29 | mobj = re.match(self._VALID_URL, url) | |
30 | video_id = mobj.group('id') | |
31 | ||
3798eadc | 32 | json_url = url + ('?' if '?' in url else '&') + 'output=json' |
5fe3a3c3 PH |
33 | json_data = self._download_webpage(json_url, video_id) |
34 | data = json.loads(json_data) | |
35 | ||
36 | title = data['metadata']['title'][0] | |
37 | description = data['metadata']['description'][0] | |
38 | uploader = data['metadata']['creator'][0] | |
39 | upload_date = unified_strdate(data['metadata']['date'][0]) | |
40 | ||
42154ad5 PH |
41 | formats = [ |
42 | { | |
5fe3a3c3 PH |
43 | 'format': fdata['format'], |
44 | 'url': 'http://' + data['server'] + data['dir'] + fn, | |
45 | 'file_size': int(fdata['size']), | |
46 | } | |
42154ad5 | 47 | for fn, fdata in data['files'].items() |
5fe3a3c3 | 48 | if 'Video' in fdata['format']] |
42154ad5 PH |
49 | |
50 | self._sort_formats(formats) | |
5fe3a3c3 | 51 | |
fb7abb31 | 52 | return { |
690e872c | 53 | '_type': 'video', |
5fe3a3c3 PH |
54 | 'id': video_id, |
55 | 'title': title, | |
56 | 'formats': formats, | |
57 | 'description': description, | |
58 | 'uploader': uploader, | |
59 | 'upload_date': upload_date, | |
fb7abb31 | 60 | 'thumbnail': data.get('misc', {}).get('image'), |
5fe3a3c3 | 61 | } |