jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..compat import (
	7	compat_urllib_request,
	8	compat_urllib_parse,
	9	)
	10	from ..utils import (
	11	ExtractorError,
	12	)
	13
	14
	15	class MooshareIE(InfoExtractor):
	16	IE_NAME = 'mooshare'
	17	IE_DESC = 'Mooshare.biz'
	18	_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
	19
	20	_TESTS = [
	21	{
	22	'url': 'http://mooshare.biz/8dqtk4bjbp8g',
	23	'md5': '4e14f9562928aecd2e42c6f341c8feba',
	24	'info_dict': {
	25	'id': '8dqtk4bjbp8g',
	26	'ext': 'mp4',
	27	'title': 'Comedy Football 2011 - (part 1-2)',
	28	'duration': 893,
	29	},
	30	},
	31	{
	32	'url': 'http://mooshare.biz/aipjtoc4g95j',
	33	'info_dict': {
	34	'id': 'aipjtoc4g95j',
	35	'ext': 'mp4',
	36	'title': 'Orange Caramel Dashing Through the Snow',
	37	'duration': 212,
	38	},
	39	'params': {
	40	# rtmp download
	41	'skip_download': True,
	42	}
	43	}
	44	]
	45
	46	def _real_extract(self, url):
	47	video_id = self._match_id(url)
	48	page = self._download_webpage(url, video_id, 'Downloading page')
	49
	50	if re.search(r'>Video Not Found or Deleted<', page) is not None:
	51	raise ExtractorError('Video %s does not exist' % video_id, expected=True)
	52
	53	hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash')
	54	title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title')
	55
	56	download_form = {
	57	'op': 'download1',
	58	'id': video_id,
	59	'hash': hash_key,
	60	}
	61
	62	request = compat_urllib_request.Request(
	63	'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form))
	64	request.add_header('Content-Type', 'application/x-www-form-urlencoded')
	65
	66	self._sleep(5, video_id)
	67
	68	video_page = self._download_webpage(request, video_id, 'Downloading video page')
	69
	70	thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False)
	71	duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False)
	72	duration = int(duration_str) if duration_str is not None else None
	73
	74	formats = []
	75
	76	# SD video
	77	mobj = re.search(r'(?m)file:\s"(?P<url>[^"]+)",\sprovider:', video_page)
	78	if mobj is not None:
	79	formats.append({
	80	'url': mobj.group('url'),
	81	'format_id': 'sd',
	82	'format': 'SD',
	83	})
	84
	85	# HD video
	86	mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page)
	87	if mobj is not None:
	88	formats.append({
	89	'url': mobj.group('url'),
	90	'format_id': 'hd',
	91	'format': 'HD',
	92	})
	93
	94	# rtmp video
	95	mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page)
	96	if mobj is not None:
	97	formats.append({
	98	'url': mobj.group('rtmpurl'),
	99	'play_path': mobj.group('playpath'),
	100	'rtmp_live': False,
	101	'ext': 'mp4',
	102	'format_id': 'rtmp',
	103	'format': 'HD',
	104	})
	105
	106	return {
	107	'id': video_id,
	108	'title': title,
	109	'thumbnail': thumbnail,
	110	'duration': duration,
	111	'formats': formats,
	112	}