]>
Commit | Line | Data |
---|---|---|
d664de44 S |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import parse_iso8601 | |
7 | ||
8 | ||
9 | class NYTimesIE(InfoExtractor): | |
10 | _VALID_URL = r'https?://(?:www\.)?nytimes\.com/video/(?:[^/]+/)+(?P<id>\d+)' | |
11 | ||
12 | _TEST = { | |
13 | 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263', | |
14 | 'md5': '18a525a510f942ada2720db5f31644c0', | |
15 | 'info_dict': { | |
16 | 'id': '100000002847155', | |
17 | 'ext': 'mov', | |
18 | 'title': 'Verbatim: What Is a Photocopier?', | |
19 | 'description': 'md5:93603dada88ddbda9395632fdc5da260', | |
20 | 'timestamp': 1398631707, | |
21 | 'upload_date': '20140427', | |
22 | 'uploader': 'Brett Weiner', | |
23 | 'duration': 419, | |
24 | } | |
25 | } | |
26 | ||
27 | def _real_extract(self, url): | |
28 | mobj = re.match(self._VALID_URL, url) | |
29 | video_id = mobj.group('id') | |
30 | ||
31 | video_data = self._download_json( | |
32 | 'http://www.nytimes.com/svc/video/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') | |
33 | ||
34 | title = video_data['headline'] | |
35 | description = video_data['summary'] | |
36 | duration = video_data['duration'] / 1000.0 | |
37 | ||
38 | uploader = video_data['byline'] | |
39 | timestamp = parse_iso8601(video_data['publication_date'][:-8]) | |
40 | ||
65157783 S |
41 | def get_file_size(file_size): |
42 | if isinstance(file_size, int): | |
43 | return file_size | |
44 | elif isinstance(file_size, dict): | |
45 | return int(file_size.get('value', 0)) | |
46 | else: | |
47 | return 0 | |
48 | ||
d664de44 S |
49 | formats = [ |
50 | { | |
51 | 'url': video['url'], | |
52 | 'format_id': video['type'], | |
53 | 'vcodec': video['video_codec'], | |
54 | 'width': video['width'], | |
55 | 'height': video['height'], | |
65157783 | 56 | 'filesize': get_file_size(video['fileSize']), |
d664de44 S |
57 | } for video in video_data['renditions'] |
58 | ] | |
59 | self._sort_formats(formats) | |
60 | ||
61 | thumbnails = [ | |
62 | { | |
63 | 'url': 'http://www.nytimes.com/%s' % image['url'], | |
64 | 'resolution': '%dx%d' % (image['width'], image['height']), | |
65 | } for image in video_data['images'] | |
66 | ] | |
67 | ||
68 | return { | |
69 | 'id': video_id, | |
70 | 'title': title, | |
71 | 'description': description, | |
72 | 'timestamp': timestamp, | |
73 | 'uploader': uploader, | |
74 | 'duration': duration, | |
75 | 'formats': formats, | |
76 | 'thumbnails': thumbnails, | |
5f6a1245 | 77 | } |