]>
Commit | Line | Data |
---|---|---|
b4e74474 | 1 | # -*- coding: utf-8 -*- |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
6 | from .common import InfoExtractor | |
f0a6c3d2 S |
7 | from ..utils import ( |
8 | qualities, | |
9 | determine_ext, | |
10 | ) | |
b4e74474 | 11 | |
12 | ||
13 | class TeacherTubeIE(InfoExtractor): | |
14 | IE_NAME = 'teachertube' | |
15 | IE_DESC = 'teachertube.com videos' | |
16 | ||
f5172a30 | 17 | _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P<id>\d+)' |
b4e74474 | 18 | |
19 | _TESTS = [{ | |
20 | 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', | |
21 | 'md5': 'f9434ef992fd65936d72999951ee254c', | |
22 | 'info_dict': { | |
23 | 'id': '339997', | |
24 | 'ext': 'mp4', | |
9a2dc4f7 S |
25 | 'title': 'Measures of dispersion from a frequency table', |
26 | 'description': 'Measures of dispersion from a frequency table', | |
b4e74474 | 27 | 'thumbnail': 're:http://.*\.jpg', |
28 | }, | |
29 | }, { | |
30 | 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', | |
31 | 'md5': '0d625ec6bc9bf50f70170942ad580676', | |
32 | 'info_dict': { | |
33 | 'id': '340064', | |
34 | 'ext': 'mp4', | |
35 | 'title': 'How to Make Paper Dolls _ Paper Art Projects', | |
9a2dc4f7 | 36 | 'description': 'Learn how to make paper dolls in this simple', |
b4e74474 | 37 | 'thumbnail': 're:http://.*\.jpg', |
38 | }, | |
f0a6c3d2 S |
39 | }, { |
40 | 'url': 'http://www.teachertube.com/music.php?music_id=8805', | |
41 | 'md5': '01e8352006c65757caf7b961f6050e21', | |
42 | 'info_dict': { | |
43 | 'id': '8805', | |
44 | 'ext': 'mp3', | |
45 | 'title': 'PER ASPERA AD ASTRA', | |
9a2dc4f7 | 46 | 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', |
f0a6c3d2 | 47 | }, |
b4e74474 | 48 | }] |
49 | ||
50 | def _real_extract(self, url): | |
51 | mobj = re.match(self._VALID_URL, url) | |
52 | video_id = mobj.group('id') | |
53 | ||
54 | webpage = self._download_webpage(url, video_id) | |
55 | ||
9a2dc4f7 S |
56 | title = self._html_search_meta('title', webpage, 'title') |
57 | TITLE_SUFFIX = ' - TeacherTube' | |
58 | if title.endswith(TITLE_SUFFIX): | |
59 | title = title[:-len(TITLE_SUFFIX)].strip() | |
60 | ||
61 | description = self._html_search_meta('description', webpage, 'description') | |
62 | if description: | |
63 | description = description.strip() | |
64 | ||
f0a6c3d2 S |
65 | quality = qualities(['mp3', 'flv', 'mp4']) |
66 | ||
9a2dc4f7 S |
67 | media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) |
68 | media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) | |
c7df67ed | 69 | media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage)) |
702e5220 | 70 | |
f0a6c3d2 S |
71 | formats = [ |
72 | { | |
73 | 'url': media_url, | |
74 | 'quality': quality(determine_ext(media_url)) | |
702e5220 | 75 | } for media_url in set(media_urls) |
f0a6c3d2 | 76 | ] |
b4e74474 | 77 | |
78 | self._sort_formats(formats) | |
79 | ||
80 | return { | |
81 | 'id': video_id, | |
9a2dc4f7 | 82 | 'title': title, |
c7df67ed | 83 | 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), |
b4e74474 | 84 | 'formats': formats, |
9a2dc4f7 | 85 | 'description': description, |
b4e74474 | 86 | } |
87 | ||
88 | ||
31a196d7 PP |
89 | class TeacherTubeUserIE(InfoExtractor): |
90 | IE_NAME = 'teachertube:user:collection' | |
91 | IE_DESC = 'teachertube.com user and collection videos' | |
b4e74474 | 92 | |
31a196d7 | 93 | _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' |
b4e74474 | 94 | |
95 | def _real_extract(self, url): | |
96 | mobj = re.match(self._VALID_URL, url) | |
97 | user_id = mobj.group('user') | |
98 | ||
31a196d7 PP |
99 | urls = [] |
100 | webpage = self._download_webpage(url, user_id) | |
101 | urls.extend(re.findall( | |
25a0cc44 | 102 | r'"sidebar_thumb_time">[0-9:]+</div>\s+<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">', |
31a196d7 PP |
103 | webpage)) |
104 | ||
105 | pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] | |
106 | for p in pages: | |
107 | more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) | |
108 | webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) | |
109 | urls.extend(re.findall( | |
25a0cc44 | 110 | r'"sidebar_thumb_time">[0-9:]+</div>\s+<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">', |
31a196d7 | 111 | webpage)) |
b4e74474 | 112 | |
113 | entries = [] | |
31a196d7 PP |
114 | for url in urls: |
115 | entries.append(self.url_result(url, 'TeacherTube')) | |
b4e74474 | 116 | |
117 | return self.playlist_result(entries, user_id) |