]>
Commit | Line | Data |
---|---|---|
5b251628 | 1 | from __future__ import unicode_literals |
2 | ||
3e5f3df1 | 3 | import re |
4 | ||
984e4d48 | 5 | from .common import InfoExtractor |
8e92d21e | 6 | from ..utils import ( |
8e92d21e | 7 | ExtractorError, |
5b251628 | 8 | int_or_none, |
8e92d21e | 9 | ) |
984e4d48 | 10 | |
5b251628 | 11 | |
12 | class GoogleDriveIE(InfoExtractor): | |
58e6d097 S |
13 | _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' |
14 | _TESTS = [{ | |
5b251628 | 15 | 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', |
16 | 'md5': '881f7700aec4f538571fa1e0eed4a7b6', | |
3e5f3df1 | 17 | 'info_dict': { |
5b251628 | 18 | 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', |
3e5f3df1 | 19 | 'ext': 'mp4', |
5b251628 | 20 | 'title': 'Big Buck Bunny.mp4', |
21 | 'duration': 46, | |
3e5f3df1 | 22 | } |
58e6d097 S |
23 | }, { |
24 | # video id is longer than 28 characters | |
25 | 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', | |
26 | 'only_matching': True, | |
27 | }] | |
5b251628 | 28 | _FORMATS_EXT = { |
29 | '5': 'flv', | |
30 | '6': 'flv', | |
31 | '13': '3gp', | |
32 | '17': '3gp', | |
33 | '18': 'mp4', | |
34 | '22': 'mp4', | |
35 | '34': 'flv', | |
36 | '35': 'flv', | |
37 | '36': '3gp', | |
38 | '37': 'mp4', | |
39 | '38': 'mp4', | |
40 | '43': 'webm', | |
41 | '44': 'webm', | |
42 | '45': 'webm', | |
43 | '46': 'webm', | |
44 | '59': 'mp4', | |
45 | } | |
3e5f3df1 | 46 | |
47 | @staticmethod | |
48 | def _extract_url(webpage): | |
49 | mobj = re.search( | |
58e6d097 | 50 | r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})', |
3e5f3df1 | 51 | webpage) |
52 | if mobj: | |
53 | return 'https://drive.google.com/file/d/%s' % mobj.group('id') | |
54 | ||
55 | def _real_extract(self, url): | |
56 | video_id = self._match_id(url) | |
5b251628 | 57 | webpage = self._download_webpage( |
58 | 'http://docs.google.com/file/d/%s' % video_id, video_id, encoding='unicode_escape') | |
3e5f3df1 | 59 | |
5b251628 | 60 | reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) |
61 | if reason: | |
62 | raise ExtractorError(reason) | |
984e4d48 | 63 | |
5b251628 | 64 | title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') |
65 | duration = int_or_none(self._search_regex( | |
66 | r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) | |
67 | fmt_stream_map = self._search_regex( | |
68 | r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') | |
69 | fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') | |
984e4d48 | 70 | |
984e4d48 | 71 | formats = [] |
5b251628 | 72 | for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): |
73 | fmt_id, fmt_url = fmt_stream.split('|') | |
74 | resolution = fmt.split('/')[1] | |
984e4d48 | 75 | width, height = resolution.split('x') |
76 | formats.append({ | |
77 | 'url': fmt_url, | |
78 | 'format_id': fmt_id, | |
79 | 'resolution': resolution, | |
5b251628 | 80 | 'width': int_or_none(width), |
81 | 'height': int_or_none(height), | |
82 | 'ext': self._FORMATS_EXT[fmt_id], | |
984e4d48 | 83 | }) |
84 | self._sort_formats(formats) | |
85 | ||
86 | return { | |
87 | 'id': video_id, | |
88 | 'title': title, | |
d69abbd3 | 89 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
5b251628 | 90 | 'duration': duration, |
91 | 'formats': formats, | |
984e4d48 | 92 | } |