jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..compat import (
	5	compat_urllib_parse_urlencode,
	6	compat_urlparse,
	7	)
	8	from ..utils import (
	9	clean_html,
	10	parse_duration,
	11	str_to_int,
	12	unified_strdate,
	13	)
	14
	15
	16	class CamdemyIE(InfoExtractor):
	17	_VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
	18	_TESTS = [{
	19	# single file
	20	'url': 'http://www.camdemy.com/media/5181/',
	21	'md5': '5a5562b6a98b37873119102e052e311b',
	22	'info_dict': {
	23	'id': '5181',
	24	'ext': 'mp4',
	25	'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
	26	'thumbnail': r're:^https?://.*\.jpg$',
	27	'creator': 'ss11spring',
	28	'duration': 1591,
	29	'upload_date': '20130114',
	30	'view_count': int,
	31	}
	32	}, {
	33	# With non-empty description
	34	# webpage returns "No permission or not login"
	35	'url': 'http://www.camdemy.com/media/13885',
	36	'md5': '4576a3bb2581f86c61044822adbd1249',
	37	'info_dict': {
	38	'id': '13885',
	39	'ext': 'mp4',
	40	'title': 'EverCam + Camdemy QuickStart',
	41	'thumbnail': r're:^https?://.*\.jpg$',
	42	'description': 'md5:2a9f989c2b153a2342acee579c6e7db6',
	43	'creator': 'evercam',
	44	'duration': 318,
	45	}
	46	}, {
	47	# External source (YouTube)
	48	'url': 'http://www.camdemy.com/media/14842',
	49	'info_dict': {
	50	'id': '2vsYQzNIsJo',
	51	'ext': 'mp4',
	52	'title': 'Excel 2013 Tutorial - How to add Password Protection',
	53	'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
	54	'upload_date': '20130211',
	55	'uploader': 'Hun Kim',
	56	'uploader_id': 'hunkimtutorials',
	57	},
	58	'params': {
	59	'skip_download': True,
	60	},
	61	}]
	62
	63	def _real_extract(self, url):
	64	video_id = self._match_id(url)
	65
	66	webpage = self._download_webpage(url, video_id)
	67
	68	src_from = self._html_search_regex(
	69	r"class=['\"]srcFrom['\"][^>]>Sources?(?:\s+from)?\s:\s*<a[^>]+(?:href\|title)=(['\"])(?P<url>(?:(?!\1).)+)\1",
	70	webpage, 'external source', default=None, group='url')
	71	if src_from:
	72	return self.url_result(src_from)
	73
	74	oembed_obj = self._download_json(
	75	'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id)
	76
	77	title = oembed_obj['title']
	78	thumb_url = oembed_obj['thumbnail_url']
	79	video_folder = compat_urlparse.urljoin(thumb_url, 'video/')
	80	file_list_doc = self._download_xml(
	81	compat_urlparse.urljoin(video_folder, 'fileList.xml'),
	82	video_id, 'Downloading filelist XML')
	83	file_name = file_list_doc.find('./video/item/fileName').text
	84	video_url = compat_urlparse.urljoin(video_folder, file_name)
	85
	86	# Some URLs return "No permission or not login" in a webpage despite being
	87	# freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885)
	88	upload_date = unified_strdate(self._search_regex(
	89	r'>published on ([^<]+)<', webpage,
	90	'upload date', default=None))
	91	view_count = str_to_int(self._search_regex(
	92	r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views',
	93	webpage, 'view count', default=None))
	94	description = self._html_search_meta(
	95	'description', webpage, default=None) or clean_html(
	96	oembed_obj.get('description'))
	97
	98	return {
	99	'id': video_id,
	100	'url': video_url,
	101	'title': title,
	102	'thumbnail': thumb_url,
	103	'description': description,
	104	'creator': oembed_obj.get('author_name'),
	105	'duration': parse_duration(oembed_obj.get('duration')),
	106	'upload_date': upload_date,
	107	'view_count': view_count,
	108	}
	109
	110
	111	class CamdemyFolderIE(InfoExtractor):
	112	_VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)'
	113	_TESTS = [{
	114	# links with trailing slash
	115	'url': 'http://www.camdemy.com/folder/450',
	116	'info_dict': {
	117	'id': '450',
	118	'title': '信號與系統 2012 & 2011 (Signals and Systems)',
	119	},
	120	'playlist_mincount': 145
	121	}, {
	122	# links without trailing slash
	123	# and multi-page
	124	'url': 'http://www.camdemy.com/folder/853',
	125	'info_dict': {
	126	'id': '853',
	127	'title': '科學計算 - 使用 Matlab'
	128	},
	129	'playlist_mincount': 20
	130	}, {
	131	# with displayMode parameter. For testing the codes to add parameters
	132	'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
	133	'info_dict': {
	134	'id': '853',
	135	'title': '科學計算 - 使用 Matlab'
	136	},
	137	'playlist_mincount': 20
	138	}]
	139
	140	def _real_extract(self, url):
	141	folder_id = self._match_id(url)
	142
	143	# Add displayMode=list so that all links are displayed in a single page
	144	parsed_url = list(compat_urlparse.urlparse(url))
	145	query = dict(compat_urlparse.parse_qsl(parsed_url[4]))
	146	query.update({'displayMode': 'list'})
	147	parsed_url[4] = compat_urllib_parse_urlencode(query)
	148	final_url = compat_urlparse.urlunparse(parsed_url)
	149
	150	page = self._download_webpage(final_url, folder_id)
	151	matches = re.findall(r"href='(/media/\d+/?)'", page)
	152
	153	entries = [self.url_result('http://www.camdemy.com' + media_path)
	154	for media_path in matches]
	155
	156	folder_title = self._html_search_meta('keywords', page)
	157
	158	return self.playlist_result(entries, folder_id, folder_title)