]>
Commit | Line | Data |
---|---|---|
3e5f3df1 | 1 | import re |
2 | ||
984e4d48 | 3 | from .common import InfoExtractor |
4 | from ..utils import RegexNotFoundError | |
5 | ||
3e5f3df1 | 6 | class GoogleDriveEmbedIE(InfoExtractor): |
3b3d5319 | 7 | _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})' |
3e5f3df1 | 8 | _TEST = { |
9 | 'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview', | |
10 | 'info_dict': { | |
11 | 'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE', | |
12 | 'ext': 'mp4', | |
13 | 'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv', | |
14 | } | |
15 | } | |
16 | ||
17 | @staticmethod | |
18 | def _extract_url(webpage): | |
19 | mobj = re.search( | |
3b3d5319 | 20 | r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})', |
3e5f3df1 | 21 | webpage) |
22 | if mobj: | |
23 | return 'https://drive.google.com/file/d/%s' % mobj.group('id') | |
24 | ||
25 | def _real_extract(self, url): | |
26 | video_id = self._match_id(url) | |
27 | return { | |
28 | '_type': 'url', | |
36dbca87 | 29 | 'ie_key': 'GoogleDrive', |
3e5f3df1 | 30 | 'url': 'https://drive.google.com/file/d/%s' % video_id |
31 | } | |
32 | ||
984e4d48 | 33 | class GoogleDriveIE(InfoExtractor): |
3b3d5319 | 34 | _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})' |
984e4d48 | 35 | _TEST = { |
f120a7ab | 36 | 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', |
984e4d48 | 37 | 'info_dict': { |
f120a7ab | 38 | 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', |
984e4d48 | 39 | 'ext': 'mp4', |
f120a7ab | 40 | 'title': 'Big Buck Bunny.mp4', |
984e4d48 | 41 | } |
42 | } | |
43 | _formats = { | |
44 | '5': {'ext': 'flv'}, | |
45 | '6': {'ext': 'flv'}, | |
46 | '13': {'ext': '3gp'}, | |
47 | '17': {'ext': '3gp'}, | |
48 | '18': {'ext': 'mp4'}, | |
49 | '22': {'ext': 'mp4'}, | |
50 | '34': {'ext': 'flv'}, | |
51 | '35': {'ext': 'flv'}, | |
52 | '36': {'ext': '3gp'}, | |
53 | '37': {'ext': 'mp4'}, | |
54 | '38': {'ext': 'mp4'}, | |
55 | '43': {'ext': 'webm'}, | |
56 | '44': {'ext': 'webm'}, | |
57 | '45': {'ext': 'webm'}, | |
58 | '46': {'ext': 'webm'}, | |
59 | '59': {'ext': 'mp4'} | |
60 | } | |
61 | ||
62 | def _real_extract(self, url): | |
63 | video_id = self._match_id(url) | |
64 | webpage = self._download_webpage( | |
d1cc05e1 | 65 | 'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape' |
984e4d48 | 66 | ) |
67 | try: | |
68 | title = self._html_search_regex( | |
36dbca87 | 69 | r'"title"\s*,\s*"([^"]+)', |
984e4d48 | 70 | webpage, |
d1cc05e1 | 71 | 'title' |
984e4d48 | 72 | ) |
73 | fmt_stream_map = self._html_search_regex( | |
36dbca87 | 74 | r'"fmt_stream_map"\s*,\s*"([^"]+)', |
984e4d48 | 75 | webpage, |
d1cc05e1 | 76 | 'fmt_stream_map' |
984e4d48 | 77 | ) |
78 | fmt_list = self._html_search_regex( | |
36dbca87 | 79 | r'"fmt_list"\s*,\s*"([^"]+)', |
984e4d48 | 80 | webpage, |
d1cc05e1 | 81 | 'fmt_list' |
984e4d48 | 82 | ) |
83 | # timestamp = self._html_search_regex( | |
36dbca87 | 84 | # r'"timestamp"\s*,\s*"([^"]+)', |
984e4d48 | 85 | # webpage, |
d1cc05e1 | 86 | # 'timestamp' |
984e4d48 | 87 | # ) |
88 | length_seconds = self._html_search_regex( | |
36dbca87 | 89 | r'"length_seconds"\s*,\s*"([^"]+)', |
984e4d48 | 90 | webpage, |
d1cc05e1 | 91 | 'length_seconds' |
984e4d48 | 92 | ) |
93 | except RegexNotFoundError: | |
94 | try: | |
95 | reason = self._html_search_regex( | |
36dbca87 | 96 | r'"reason","([^"]+)', |
984e4d48 | 97 | webpage, |
d1cc05e1 | 98 | 'reason' |
984e4d48 | 99 | ) |
100 | self.report_warning(reason) | |
101 | return | |
102 | except RegexNotFoundError: | |
103 | self.report_warning('not a video') | |
104 | return | |
105 | ||
106 | fmt_stream_map = fmt_stream_map.split(',') | |
107 | fmt_list = fmt_list.split(',') | |
108 | formats = [] | |
109 | for i in range(len(fmt_stream_map)): | |
110 | fmt_id, fmt_url = fmt_stream_map[i].split('|') | |
111 | resolution = fmt_list[i].split('/')[1] | |
112 | width, height = resolution.split('x') | |
113 | formats.append({ | |
114 | 'url': fmt_url, | |
115 | 'format_id': fmt_id, | |
116 | 'resolution': resolution, | |
117 | 'width': int(width), | |
118 | 'height': int(height), | |
119 | 'ext': self._formats[fmt_id]['ext'] | |
120 | }) | |
121 | self._sort_formats(formats) | |
122 | ||
123 | return { | |
124 | 'id': video_id, | |
125 | 'title': title, | |
126 | # 'timestamp': int(timestamp), | |
127 | 'duration': int(length_seconds), | |
128 | 'formats': formats | |
129 | } |