jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_urllib_parse
	8	from ..utils import (
	9	ExtractorError,
	10	encode_dict,
	11	int_or_none,
	12	sanitized_Request,
	13	)
	14
	15
	16	class XFileShareIE(InfoExtractor):
	17	IE_DESC = 'XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me'
	18	_VALID_URL = r'''(?x)
	19	https?://(?P<host>(?:www\.)?
	20	(?:daclips\.in\|gorillavid\.in\|movpod\.in\|fastvideo\.in\|realvid\.net\|filehoot\.com\|vidto\.me))/
	21	(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?
	22	'''
	23
	24	_FILE_NOT_FOUND_REGEX = r'>(?:404 - )?File Not Found<'
	25
	26	_TESTS = [{
	27	'url': 'http://gorillavid.in/06y9juieqpmi',
	28	'md5': '5ae4a3580620380619678ee4875893ba',
	29	'info_dict': {
	30	'id': '06y9juieqpmi',
	31	'ext': 'flv',
	32	'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
	33	'thumbnail': 're:http://.*\.jpg',
	34	},
	35	}, {
	36	'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
	37	'only_matching': True,
	38	}, {
	39	'url': 'http://daclips.in/3rso4kdn6f9m',
	40	'md5': '1ad8fd39bb976eeb66004d3a4895f106',
	41	'info_dict': {
	42	'id': '3rso4kdn6f9m',
	43	'ext': 'mp4',
	44	'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
	45	'thumbnail': 're:http://.*\.jpg',
	46	}
	47	}, {
	48	# video with countdown timeout
	49	'url': 'http://fastvideo.in/1qmdn1lmsmbw',
	50	'md5': '8b87ec3f6564a3108a0e8e66594842ba',
	51	'info_dict': {
	52	'id': '1qmdn1lmsmbw',
	53	'ext': 'mp4',
	54	'title': 'Man of Steel - Trailer',
	55	'thumbnail': 're:http://.*\.jpg',
	56	},
	57	}, {
	58	'url': 'http://realvid.net/ctn2y6p2eviw',
	59	'md5': 'b2166d2cf192efd6b6d764c18fd3710e',
	60	'info_dict': {
	61	'id': 'ctn2y6p2eviw',
	62	'ext': 'flv',
	63	'title': 'rdx 1955',
	64	'thumbnail': 're:http://.*\.jpg',
	65	},
	66	}, {
	67	'url': 'http://movpod.in/0wguyyxi1yca',
	68	'only_matching': True,
	69	}, {
	70	'url': 'http://filehoot.com/3ivfabn7573c.html',
	71	'info_dict': {
	72	'id': '3ivfabn7573c',
	73	'ext': 'mp4',
	74	'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
	75	'thumbnail': 're:http://.*\.jpg',
	76	}
	77	}, {
	78	'url': 'http://vidto.me/ku5glz52nqe1.html',
	79	'info_dict': {
	80	'id': 'ku5glz52nqe1',
	81	'ext': 'mp4',
	82	'title': 'test'
	83	}
	84	}]
	85
	86	def _real_extract(self, url):
	87	mobj = re.match(self._VALID_URL, url)
	88	video_id = mobj.group('id')
	89
	90	url = 'http://%s/%s' % (mobj.group('host'), video_id)
	91	webpage = self._download_webpage(url, video_id)
	92
	93	if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None:
	94	raise ExtractorError('Video %s does not exist' % video_id, expected=True)
	95
	96	fields = self._hidden_inputs(webpage)
	97
	98	if fields['op'] == 'download1':
	99	countdown = int_or_none(self._search_regex(
	100	r'<span id="countdown_str">(?:[Ww]ait)?\s<span id="cxc">(\d+)</span>\s(?:seconds?)?</span>',
	101	webpage, 'countdown', default=None))
	102	if countdown:
	103	self._sleep(countdown, video_id)
	104
	105	post = compat_urllib_parse.urlencode(encode_dict(fields))
	106
	107	req = sanitized_Request(url, post)
	108	req.add_header('Content-type', 'application/x-www-form-urlencoded')
	109
	110	webpage = self._download_webpage(req, video_id, 'Downloading video page')
	111
	112	title = (self._search_regex(
	113	[r'style="z-index: [0-9]+;">([^<]+)</span>',
	114	r'<td nowrap>([^<]+)</td>',
	115	r'>Watch (.+) ',
	116	r'<h2 class="video-page-head">([^<]+)</h2>'],
	117	webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
	118	video_url = self._search_regex(
	119	[r'file\s:\s["\'](http[^"\']+)["\'],',
	120	r'file_link\s=\s\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
	121	webpage, 'file url')
	122	thumbnail = self._search_regex(
	123	r'image\s:\s["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
	124
	125	formats = [{
	126	'format_id': 'sd',
	127	'url': video_url,
	128	'quality': 1,
	129	}]
	130
	131	return {
	132	'id': video_id,
	133	'title': title,
	134	'thumbnail': thumbnail,
	135	'formats': formats,
	136	}