]>
Commit | Line | Data |
---|---|---|
5b251628 | 1 | from __future__ import unicode_literals |
2 | ||
3e5f3df1 | 3 | import re |
4 | ||
984e4d48 | 5 | from .common import InfoExtractor |
8e92d21e | 6 | from ..utils import ( |
8e92d21e | 7 | ExtractorError, |
5b251628 | 8 | int_or_none, |
e4e50f60 | 9 | lowercase_escape, |
8e92d21e | 10 | ) |
984e4d48 | 11 | |
5b251628 | 12 | |
13 | class GoogleDriveIE(InfoExtractor): | |
58e6d097 S |
14 | _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' |
15 | _TESTS = [{ | |
5b251628 | 16 | 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', |
e4e50f60 | 17 | 'md5': 'd109872761f7e7ecf353fa108c0dbe1e', |
3e5f3df1 | 18 | 'info_dict': { |
5b251628 | 19 | 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ', |
3e5f3df1 | 20 | 'ext': 'mp4', |
5b251628 | 21 | 'title': 'Big Buck Bunny.mp4', |
e4e50f60 | 22 | 'duration': 45, |
3e5f3df1 | 23 | } |
58e6d097 S |
24 | }, { |
25 | # video id is longer than 28 characters | |
26 | 'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit', | |
27 | 'only_matching': True, | |
28 | }] | |
5b251628 | 29 | _FORMATS_EXT = { |
30 | '5': 'flv', | |
31 | '6': 'flv', | |
32 | '13': '3gp', | |
33 | '17': '3gp', | |
34 | '18': 'mp4', | |
35 | '22': 'mp4', | |
36 | '34': 'flv', | |
37 | '35': 'flv', | |
38 | '36': '3gp', | |
39 | '37': 'mp4', | |
40 | '38': 'mp4', | |
41 | '43': 'webm', | |
42 | '44': 'webm', | |
43 | '45': 'webm', | |
44 | '46': 'webm', | |
45 | '59': 'mp4', | |
46 | } | |
3e5f3df1 | 47 | |
48 | @staticmethod | |
49 | def _extract_url(webpage): | |
50 | mobj = re.search( | |
58e6d097 | 51 | r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})', |
3e5f3df1 | 52 | webpage) |
53 | if mobj: | |
54 | return 'https://drive.google.com/file/d/%s' % mobj.group('id') | |
55 | ||
56 | def _real_extract(self, url): | |
57 | video_id = self._match_id(url) | |
5b251628 | 58 | webpage = self._download_webpage( |
e4e50f60 | 59 | 'http://docs.google.com/file/d/%s' % video_id, video_id) |
3e5f3df1 | 60 | |
5b251628 | 61 | reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) |
62 | if reason: | |
63 | raise ExtractorError(reason) | |
984e4d48 | 64 | |
5b251628 | 65 | title = self._search_regex(r'"title"\s*,\s*"([^"]+)', webpage, 'title') |
66 | duration = int_or_none(self._search_regex( | |
67 | r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None)) | |
68 | fmt_stream_map = self._search_regex( | |
69 | r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map').split(',') | |
70 | fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list').split(',') | |
984e4d48 | 71 | |
984e4d48 | 72 | formats = [] |
5b251628 | 73 | for fmt, fmt_stream in zip(fmt_list, fmt_stream_map): |
74 | fmt_id, fmt_url = fmt_stream.split('|') | |
75 | resolution = fmt.split('/')[1] | |
984e4d48 | 76 | width, height = resolution.split('x') |
77 | formats.append({ | |
e4e50f60 | 78 | 'url': lowercase_escape(fmt_url), |
984e4d48 | 79 | 'format_id': fmt_id, |
80 | 'resolution': resolution, | |
5b251628 | 81 | 'width': int_or_none(width), |
82 | 'height': int_or_none(height), | |
83 | 'ext': self._FORMATS_EXT[fmt_id], | |
984e4d48 | 84 | }) |
85 | self._sort_formats(formats) | |
86 | ||
87 | return { | |
88 | 'id': video_id, | |
89 | 'title': title, | |
d69abbd3 | 90 | 'thumbnail': self._og_search_thumbnail(webpage, default=None), |
5b251628 | 91 | 'duration': duration, |
92 | 'formats': formats, | |
984e4d48 | 93 | } |