]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/googledrive.py
fix recursive error
[yt-dlp.git] / youtube_dl / extractor / googledrive.py
CommitLineData
3e5f3df1 1import re
2
984e4d48 3from .common import InfoExtractor
4from ..utils import RegexNotFoundError
5
3e5f3df1 6class GoogleDriveEmbedIE(InfoExtractor):
3b3d5319 7 _VALID_URL = r'https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
3e5f3df1 8 _TEST = {
9 'url': 'https://docs.google.com/file/d/0B8KB9DRosYGKMXNoeWxqa3JYclE/preview',
10 'info_dict': {
11 'id': '0B8KB9DRosYGKMXNoeWxqa3JYclE',
12 'ext': 'mp4',
13 'title': 'Jimmy Fallon Sings Since You\'ve Been Gone.wmv',
14 }
15 }
16
17 @staticmethod
18 def _extract_url(webpage):
19 mobj = re.search(
3b3d5319 20 r'<iframe src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})',
3e5f3df1 21 webpage)
22 if mobj:
23 return 'https://drive.google.com/file/d/%s' % mobj.group('id')
24
25 def _real_extract(self, url):
26 video_id = self._match_id(url)
27 return {
28 '_type': 'url',
36dbca87 29 'ie_key': 'GoogleDrive',
3e5f3df1 30 'url': 'https://drive.google.com/file/d/%s' % video_id
31 }
32
984e4d48 33class GoogleDriveIE(InfoExtractor):
3b3d5319 34 _VALID_URL = r'https?://(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)(?P<id>[a-zA-Z0-9_-]{28})'
984e4d48 35 _TEST = {
f120a7ab 36 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
984e4d48 37 'info_dict': {
f120a7ab 38 'id': '0ByeS4oOUV-49Zzh4R1J6R09zazQ',
984e4d48 39 'ext': 'mp4',
f120a7ab 40 'title': 'Big Buck Bunny.mp4',
984e4d48 41 }
42 }
43 _formats = {
44 '5': {'ext': 'flv'},
45 '6': {'ext': 'flv'},
46 '13': {'ext': '3gp'},
47 '17': {'ext': '3gp'},
48 '18': {'ext': 'mp4'},
49 '22': {'ext': 'mp4'},
50 '34': {'ext': 'flv'},
51 '35': {'ext': 'flv'},
52 '36': {'ext': '3gp'},
53 '37': {'ext': 'mp4'},
54 '38': {'ext': 'mp4'},
55 '43': {'ext': 'webm'},
56 '44': {'ext': 'webm'},
57 '45': {'ext': 'webm'},
58 '46': {'ext': 'webm'},
59 '59': {'ext': 'mp4'}
60 }
61
62 def _real_extract(self, url):
63 video_id = self._match_id(url)
64 webpage = self._download_webpage(
d1cc05e1 65 'http://docs.google.com/file/d/' + video_id, video_id, encoding='unicode_escape'
984e4d48 66 )
67 try:
68 title = self._html_search_regex(
36dbca87 69 r'"title"\s*,\s*"([^"]+)',
984e4d48 70 webpage,
d1cc05e1 71 'title'
984e4d48 72 )
73 fmt_stream_map = self._html_search_regex(
36dbca87 74 r'"fmt_stream_map"\s*,\s*"([^"]+)',
984e4d48 75 webpage,
d1cc05e1 76 'fmt_stream_map'
984e4d48 77 )
78 fmt_list = self._html_search_regex(
36dbca87 79 r'"fmt_list"\s*,\s*"([^"]+)',
984e4d48 80 webpage,
d1cc05e1 81 'fmt_list'
984e4d48 82 )
83# timestamp = self._html_search_regex(
36dbca87 84# r'"timestamp"\s*,\s*"([^"]+)',
984e4d48 85# webpage,
d1cc05e1 86# 'timestamp'
984e4d48 87# )
88 length_seconds = self._html_search_regex(
36dbca87 89 r'"length_seconds"\s*,\s*"([^"]+)',
984e4d48 90 webpage,
d1cc05e1 91 'length_seconds'
984e4d48 92 )
93 except RegexNotFoundError:
94 try:
95 reason = self._html_search_regex(
36dbca87 96 r'"reason","([^"]+)',
984e4d48 97 webpage,
d1cc05e1 98 'reason'
984e4d48 99 )
100 self.report_warning(reason)
101 return
102 except RegexNotFoundError:
103 self.report_warning('not a video')
104 return
105
106 fmt_stream_map = fmt_stream_map.split(',')
107 fmt_list = fmt_list.split(',')
108 formats = []
109 for i in range(len(fmt_stream_map)):
110 fmt_id, fmt_url = fmt_stream_map[i].split('|')
111 resolution = fmt_list[i].split('/')[1]
112 width, height = resolution.split('x')
113 formats.append({
114 'url': fmt_url,
115 'format_id': fmt_id,
116 'resolution': resolution,
117 'width': int(width),
118 'height': int(height),
119 'ext': self._formats[fmt_id]['ext']
120 })
121 self._sort_formats(formats)
122
123 return {
124 'id': video_id,
125 'title': title,
126# 'timestamp': int(timestamp),
127 'duration': int(length_seconds),
128 'formats': formats
129 }