]>
Commit | Line | Data |
---|---|---|
8b8c1093 SW |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import os | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
8 | ||
9 | from ..utils import ( | |
10 | unified_strdate | |
11 | ) | |
12 | ||
13 | ||
14 | class VideoEsriIE(InfoExtractor): | |
15 | _VALID_URL = r'https?://video\.esri\.com/watch/(?P<id>[0-9]+)' | |
16 | _TEST = { | |
17 | 'url': 'https://video.esri.com/watch/4228', | |
18 | 'md5': '170b4d513c2466ed483c150a48384133', | |
19 | 'info_dict': { | |
20 | 'id': '4228', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'AppStudio for ArcGIS', | |
23 | 'thumbnail': 're:^https?://.*\.jpg$', | |
24 | 'upload_date': '20150310', | |
25 | } | |
26 | } | |
27 | ||
28 | def _real_extract(self, url): | |
29 | video_id = self._match_id(url) | |
30 | webpage = self._download_webpage(url, video_id) | |
31 | ||
32 | title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') | |
33 | ||
34 | upload_date_raw = self._search_regex( | |
35 | r'http-equiv="last-modified" content="(.*)"', | |
36 | webpage, 'upload date') | |
37 | upload_date = unified_strdate(upload_date_raw) | |
38 | ||
39 | settings_info = self._search_regex( | |
40 | r'evPlayerSettings = {(.*?);\s*$', | |
41 | webpage, 'settings info', flags=re.MULTILINE | re.DOTALL) | |
42 | ||
43 | # thumbnail includes '_x' for large, also has {_m,_t,_s} or | |
44 | # without size suffix returns full image | |
45 | thumbnail_path = re.findall( | |
46 | r'image\': \'(\/thumbs.*)\'', | |
47 | settings_info)[0] | |
48 | ||
49 | if thumbnail_path: | |
50 | thumbnail = '/'.join(['http://video.esri.com', thumbnail_path]) | |
51 | ||
52 | # note that this misses the (exceedly rare) webm files | |
53 | video_paths = re.findall(r'mp4:(.*)\'', settings_info) | |
54 | ||
55 | # find possible http servers of the mp4 files (also has rtsp) | |
56 | base_url = re.findall( | |
57 | r'netstreambasepath\':\s\'(h.*)\'', settings_info)[0] | |
58 | ||
59 | # these are the numbers used internally, but really map | |
60 | # to other resolutions, e.g. 960 is 720p. | |
61 | heights = [480, 720, 960] | |
62 | videos_by_res = {} | |
63 | for video_path in video_paths: | |
64 | url = "{base_url}{video_path}".format( | |
65 | base_url=base_url, | |
66 | video_path=video_path) | |
67 | filename, ext = os.path.splitext(video_path) | |
68 | height_label = int(filename.split('_')[1]) | |
69 | videos_by_res[height_label] = { | |
70 | 'url': url, | |
71 | 'ext': ext[1:], | |
72 | 'protocol': 'http', # http-only supported currently | |
73 | } | |
74 | ||
75 | formats = [] | |
76 | for height in heights: | |
77 | if height in videos_by_res: | |
78 | formats.append(videos_by_res[height]) | |
79 | ||
80 | result = { | |
81 | 'id': video_id, | |
82 | 'title': title, | |
83 | 'upload_date': upload_date, | |
84 | 'formats': formats, | |
85 | } | |
86 | ||
87 | if thumbnail: | |
88 | result['thumbnail'] = thumbnail | |
89 | ||
90 | return result |