jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import re
	4
	5	from .common import InfoExtractor
	6	from ..utils import (
	7	parse_duration,
	8	sanitized_Request,
	9	unified_strdate,
	10	)
	11
	12
	13	class NuvidIE(InfoExtractor):
	14	_VALID_URL = r'https?://(?:www\|m)\.nuvid\.com/video/(?P<id>[0-9]+)'
	15	_TEST = {
	16	'url': 'http://m.nuvid.com/video/1310741/',
	17	'md5': 'eab207b7ac4fccfb4e23c86201f11277',
	18	'info_dict': {
	19	'id': '1310741',
	20	'ext': 'mp4',
	21	'title': 'Horny babes show their awesome bodeis and',
	22	'duration': 129,
	23	'upload_date': '20140508',
	24	'age_limit': 18,
	25	}
	26	}
	27
	28	def _real_extract(self, url):
	29	video_id = self._match_id(url)
	30
	31	formats = []
	32
	33	for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]:
	34	request = sanitized_Request(
	35	'http://m.nuvid.com/play/%s' % video_id)
	36	request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed)
	37	webpage = self._download_webpage(
	38	request, video_id, 'Downloading %s page' % format_id)
	39	video_url = self._html_search_regex(
	40	r'<a\s+href="([^"]+)"\s+class="b_link">', webpage, '%s video URL' % format_id, fatal=False)
	41	if not video_url:
	42	continue
	43	formats.append({
	44	'url': video_url,
	45	'format_id': format_id,
	46	})
	47
	48	webpage = self._download_webpage(
	49	'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page')
	50	title = self._html_search_regex(
	51	[r'<span title="([^"]+)">',
	52	r'<div class="thumb-holder video">\s<h5[^>]>([^<]+)</h5>'], webpage, 'title').strip()
	53	thumbnails = [
	54	{
	55	'url': thumb_url,
	56	} for thumb_url in re.findall(r'<img src="([^"]+)" alt="" />', webpage)
	57	]
	58	thumbnail = thumbnails[0]['url'] if thumbnails else None
	59	duration = parse_duration(self._html_search_regex(
	60	r'<i class="fa fa-clock-o"></i>\s*(\d{2}:\d{2})', webpage, 'duration', fatal=False))
	61	upload_date = unified_strdate(self._html_search_regex(
	62	r'<i class="fa fa-user"></i>\s*(\d{4}-\d{2}-\d{2})', webpage, 'upload date', fatal=False))
	63
	64	return {
	65	'id': video_id,
	66	'title': title,
	67	'thumbnails': thumbnails,
	68	'thumbnail': thumbnail,
	69	'duration': duration,
	70	'upload_date': upload_date,
	71	'age_limit': 18,
	72	'formats': formats,
	73	}