jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# -- coding: utf-8 --
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	qualities,
	9	determine_ext,
	10	)
	11
	12
	13	class TeacherTubeIE(InfoExtractor):
	14	IE_NAME = 'teachertube'
	15	IE_DESC = 'teachertube.com videos'
	16
	17	_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=\|music\.php\?music_id=\|video/(?:[\da-z-]+-)?\|audio/)(?P<id>\d+)'
	18
	19	_TESTS = [{
	20	'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997',
	21	'md5': 'f9434ef992fd65936d72999951ee254c',
	22	'info_dict': {
	23	'id': '339997',
	24	'ext': 'mp4',
	25	'title': 'Measures of dispersion from a frequency table',
	26	'description': 'Measures of dispersion from a frequency table',
	27	'thumbnail': 're:http://.*\.jpg',
	28	},
	29	}, {
	30	'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064',
	31	'md5': '0d625ec6bc9bf50f70170942ad580676',
	32	'info_dict': {
	33	'id': '340064',
	34	'ext': 'mp4',
	35	'title': 'How to Make Paper Dolls _ Paper Art Projects',
	36	'description': 'Learn how to make paper dolls in this simple',
	37	'thumbnail': 're:http://.*\.jpg',
	38	},
	39	}, {
	40	'url': 'http://www.teachertube.com/music.php?music_id=8805',
	41	'md5': '01e8352006c65757caf7b961f6050e21',
	42	'info_dict': {
	43	'id': '8805',
	44	'ext': 'mp3',
	45	'title': 'PER ASPERA AD ASTRA',
	46	'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P',
	47	},
	48	}, {
	49	'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790',
	50	'md5': '9c79fbb2dd7154823996fc28d4a26998',
	51	'info_dict': {
	52	'id': '297790',
	53	'ext': 'mp4',
	54	'title': 'Intro Video - Schleicher',
	55	'description': 'Intro Video - Why to flip, how flipping will',
	56	},
	57	}]
	58
	59	def _real_extract(self, url):
	60	video_id = self._match_id(url)
	61	webpage = self._download_webpage(url, video_id)
	62
	63	title = self._html_search_meta('title', webpage, 'title', fatal=True)
	64	TITLE_SUFFIX = ' - TeacherTube'
	65	if title.endswith(TITLE_SUFFIX):
	66	title = title[:-len(TITLE_SUFFIX)].strip()
	67
	68	description = self._html_search_meta('description', webpage, 'description')
	69	if description:
	70	description = description.strip()
	71
	72	quality = qualities(['mp3', 'flv', 'mp4'])
	73
	74	media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage)
	75	media_urls.extend(re.findall(r'var\s+filePath\s=\s"([^"]+)"', webpage))
	76	media_urls.extend(re.findall(r'\'file\'\s:\s["\']([^"\']+)["\'],', webpage))
	77
	78	formats = [
	79	{
	80	'url': media_url,
	81	'quality': quality(determine_ext(media_url))
	82	} for media_url in set(media_urls)
	83	]
	84
	85	self._sort_formats(formats)
	86
	87	return {
	88	'id': video_id,
	89	'title': title,
	90	'thumbnail': self._html_search_regex(r'\'image\'\s:\s["\']([^"\']+)["\']', webpage, 'thumbnail'),
	91	'formats': formats,
	92	'description': description,
	93	}
	94
	95
	96	class TeacherTubeUserIE(InfoExtractor):
	97	IE_NAME = 'teachertube:user:collection'
	98	IE_DESC = 'teachertube.com user and collection videos'
	99
	100	_VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile\|collection)/(?P<user>[0-9a-zA-Z]+)/?'
	101
	102	_MEDIA_RE = r'''(?sx)
	103	class="?sidebar_thumb_time"?>[0-9:]+</div>
	104	\s*
	105	<a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video\|audio)/[^"]+)"
	106	'''
	107	_TEST = {
	108	'url': 'http://www.teachertube.com/user/profile/rbhagwati2',
	109	'info_dict': {
	110	'id': 'rbhagwati2'
	111	},
	112	'playlist_mincount': 179,
	113	}
	114
	115	def _real_extract(self, url):
	116	mobj = re.match(self._VALID_URL, url)
	117	user_id = mobj.group('user')
	118
	119	urls = []
	120	webpage = self._download_webpage(url, user_id)
	121	urls.extend(re.findall(self._MEDIA_RE, webpage))
	122
	123	pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1]
	124	for p in pages:
	125	more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p)
	126	webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages)))
	127	video_urls = re.findall(self._MEDIA_RE, webpage)
	128	urls.extend(video_urls)
	129
	130	entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls]
	131	return self.playlist_result(entries, user_id)