]>
Commit | Line | Data |
---|---|---|
1 | # -*- coding: utf-8 -*- | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | qualities, | |
9 | determine_ext, | |
10 | ) | |
11 | ||
12 | ||
13 | class TeacherTubeIE(InfoExtractor): | |
14 | IE_NAME = 'teachertube' | |
15 | IE_DESC = 'teachertube.com videos' | |
16 | ||
17 | _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)' | |
18 | ||
19 | _TESTS = [{ | |
20 | 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', | |
21 | 'md5': 'f9434ef992fd65936d72999951ee254c', | |
22 | 'info_dict': { | |
23 | 'id': '339997', | |
24 | 'ext': 'mp4', | |
25 | 'title': 'Measures of dispersion from a frequency table', | |
26 | 'description': 'Measures of dispersion from a frequency table', | |
27 | 'thumbnail': 're:http://.*\.jpg', | |
28 | }, | |
29 | }, { | |
30 | 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', | |
31 | 'md5': '0d625ec6bc9bf50f70170942ad580676', | |
32 | 'info_dict': { | |
33 | 'id': '340064', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'How to Make Paper Dolls _ Paper Art Projects', | |
36 | 'description': 'Learn how to make paper dolls in this simple', | |
37 | 'thumbnail': 're:http://.*\.jpg', | |
38 | }, | |
39 | }, { | |
40 | 'url': 'http://www.teachertube.com/music.php?music_id=8805', | |
41 | 'md5': '01e8352006c65757caf7b961f6050e21', | |
42 | 'info_dict': { | |
43 | 'id': '8805', | |
44 | 'ext': 'mp3', | |
45 | 'title': 'PER ASPERA AD ASTRA', | |
46 | 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', | |
47 | }, | |
48 | }, { | |
49 | 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', | |
50 | 'md5': '9c79fbb2dd7154823996fc28d4a26998', | |
51 | 'info_dict': { | |
52 | 'id': '297790', | |
53 | 'ext': 'mp4', | |
54 | 'title': 'Intro Video - Schleicher', | |
55 | 'description': 'Intro Video - Why to flip, how flipping will', | |
56 | }, | |
57 | }] | |
58 | ||
59 | def _real_extract(self, url): | |
60 | video_id = self._match_id(url) | |
61 | webpage = self._download_webpage(url, video_id) | |
62 | ||
63 | title = self._html_search_meta('title', webpage, 'title', fatal=True) | |
64 | TITLE_SUFFIX = ' - TeacherTube' | |
65 | if title.endswith(TITLE_SUFFIX): | |
66 | title = title[:-len(TITLE_SUFFIX)].strip() | |
67 | ||
68 | description = self._html_search_meta('description', webpage, 'description') | |
69 | if description: | |
70 | description = description.strip() | |
71 | ||
72 | quality = qualities(['mp3', 'flv', 'mp4']) | |
73 | ||
74 | media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) | |
75 | media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) | |
76 | media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage)) | |
77 | ||
78 | formats = [ | |
79 | { | |
80 | 'url': media_url, | |
81 | 'quality': quality(determine_ext(media_url)) | |
82 | } for media_url in set(media_urls) | |
83 | ] | |
84 | ||
85 | self._sort_formats(formats) | |
86 | ||
87 | return { | |
88 | 'id': video_id, | |
89 | 'title': title, | |
90 | 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), | |
91 | 'formats': formats, | |
92 | 'description': description, | |
93 | } | |
94 | ||
95 | ||
96 | class TeacherTubeUserIE(InfoExtractor): | |
97 | IE_NAME = 'teachertube:user:collection' | |
98 | IE_DESC = 'teachertube.com user and collection videos' | |
99 | ||
100 | _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' | |
101 | ||
102 | _MEDIA_RE = r'''(?sx) | |
103 | class="?sidebar_thumb_time"?>[0-9:]+</div> | |
104 | \s* | |
105 | <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)" | |
106 | ''' | |
107 | _TEST = { | |
108 | 'url': 'http://www.teachertube.com/user/profile/rbhagwati2', | |
109 | 'info_dict': { | |
110 | 'id': 'rbhagwati2' | |
111 | }, | |
112 | 'playlist_mincount': 179, | |
113 | } | |
114 | ||
115 | def _real_extract(self, url): | |
116 | mobj = re.match(self._VALID_URL, url) | |
117 | user_id = mobj.group('user') | |
118 | ||
119 | urls = [] | |
120 | webpage = self._download_webpage(url, user_id) | |
121 | urls.extend(re.findall(self._MEDIA_RE, webpage)) | |
122 | ||
123 | pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1] | |
124 | for p in pages: | |
125 | more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) | |
126 | webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages))) | |
127 | video_urls = re.findall(self._MEDIA_RE, webpage) | |
128 | urls.extend(video_urls) | |
129 | ||
130 | entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls] | |
131 | return self.playlist_result(entries, user_id) |