]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/amara.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / amara.py
CommitLineData
a0566bbf 1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5from .youtube import YoutubeIE
6from .vimeo import VimeoIE
7from ..utils import (
8 int_or_none,
9 parse_iso8601,
10 update_url_query,
11)
12
13
14class AmaraIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
16 _TESTS = [{
17 # Youtube
18 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
19 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
20 'info_dict': {
21 'id': 'h6ZuVdvYnfE',
22 'ext': 'mp4',
23 'title': 'Why jury trials are becoming less common',
24 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
25 'thumbnail': r're:^https?://.*\.jpg$',
26 'subtitles': dict,
27 'upload_date': '20160813',
28 'uploader': 'PBS NewsHour',
29 'uploader_id': 'PBSNewsHour',
30 'timestamp': 1549639570,
31 }
32 }, {
33 # Vimeo
34 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
35 'md5': '99392c75fa05d432a8f11df03612195e',
36 'info_dict': {
37 'id': '18622084',
38 'ext': 'mov',
39 'title': 'Vimeo at CES 2011!',
40 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
41 'thumbnail': r're:^https?://.*\.jpg$',
42 'subtitles': dict,
43 'timestamp': 1294763658,
44 'upload_date': '20110111',
45 'uploader': 'Sam Morrill',
46 'uploader_id': 'sammorrill'
47 }
48 }, {
49 # Direct Link
50 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
51 'md5': 'd3970f08512738ee60c5807311ff5d3f',
52 'info_dict': {
53 'id': 's8KL7I3jLmh6',
54 'ext': 'mp4',
55 'title': 'The danger of a single story',
56 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
57 'thumbnail': r're:^https?://.*\.jpg$',
58 'subtitles': dict,
59 'upload_date': '20091007',
60 'timestamp': 1254942511,
61 }
62 }]
63
64 def _real_extract(self, url):
65 video_id = self._match_id(url)
66 meta = self._download_json(
67 'https://amara.org/api/videos/%s/' % video_id,
68 video_id, query={'format': 'json'})
69 title = meta['title']
70 video_url = meta['all_urls'][0]
71
72 subtitles = {}
73 for language in (meta.get('languages') or []):
74 subtitles_uri = language.get('subtitles_uri')
75 if not (subtitles_uri and language.get('published')):
76 continue
77 subtitle = subtitles.setdefault(language.get('code') or 'en', [])
78 for f in ('json', 'srt', 'vtt'):
79 subtitle.append({
80 'ext': f,
81 'url': update_url_query(subtitles_uri, {'format': f}),
82 })
83
84 info = {
85 'url': video_url,
86 'id': video_id,
87 'subtitles': subtitles,
88 'title': title,
89 'description': meta.get('description'),
90 'thumbnail': meta.get('thumbnail'),
91 'duration': int_or_none(meta.get('duration')),
92 'timestamp': parse_iso8601(meta.get('created')),
93 }
94
95 for ie in (YoutubeIE, VimeoIE):
96 if ie.suitable(video_url):
97 info.update({
98 '_type': 'url_transparent',
99 'ie_key': ie.ie_key(),
100 })
101 break
102
103 return info