]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/archiveorg.py
[xhamster] Fix and improve
[yt-dlp.git] / youtube_dl / extractor / archiveorg.py
CommitLineData
3798eadc
PH
1from __future__ import unicode_literals
2
5fe3a3c3
PH
3import json
4import re
5
6from .common import InfoExtractor
7from ..utils import (
5fe3a3c3
PH
8 unified_strdate,
9)
10
11
12class ArchiveOrgIE(InfoExtractor):
13 IE_NAME = 'archive.org'
14 IE_DESC = 'archive.org videos'
c0ade33e 15 _VALID_URL = r'(?:https?://)?(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
5fe3a3c3 16 _TEST = {
3798eadc
PH
17 "url": "http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect",
18 'file': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv',
19 'md5': '8af1d4cf447933ed3c7f4871162602db',
20 'info_dict': {
21 "title": "1968 Demo - FJCC Conference Presentation Reel #1",
22 "description": "Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also <a href=\"http://dougengelbart.org/firsts/dougs-1968-demo.html\" rel=\"nofollow\">Doug's 1968 Demo page</a> for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | <a href=\"http://www.archive.org/details/XD300-24_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 2</a> | <a href=\"http://www.archive.org/details/XD300-25_68HighlightsAResearchCntAugHumanIntellect\" rel=\"nofollow\">Reel 3</a>",
23 "upload_date": "19681210",
24 "uploader": "SRI International"
5fe3a3c3
PH
25 }
26 }
27
5fe3a3c3
PH
28 def _real_extract(self, url):
29 mobj = re.match(self._VALID_URL, url)
30 video_id = mobj.group('id')
31
3798eadc 32 json_url = url + ('?' if '?' in url else '&') + 'output=json'
5fe3a3c3
PH
33 json_data = self._download_webpage(json_url, video_id)
34 data = json.loads(json_data)
35
36 title = data['metadata']['title'][0]
37 description = data['metadata']['description'][0]
38 uploader = data['metadata']['creator'][0]
39 upload_date = unified_strdate(data['metadata']['date'][0])
40
42154ad5
PH
41 formats = [
42 {
5fe3a3c3
PH
43 'format': fdata['format'],
44 'url': 'http://' + data['server'] + data['dir'] + fn,
45 'file_size': int(fdata['size']),
46 }
42154ad5 47 for fn, fdata in data['files'].items()
5fe3a3c3 48 if 'Video' in fdata['format']]
42154ad5
PH
49
50 self._sort_formats(formats)
5fe3a3c3 51
fb7abb31 52 return {
690e872c 53 '_type': 'video',
5fe3a3c3
PH
54 'id': video_id,
55 'title': title,
56 'formats': formats,
57 'description': description,
58 'uploader': uploader,
59 'upload_date': upload_date,
fb7abb31 60 'thumbnail': data.get('misc', {}).get('image'),
5fe3a3c3 61 }