[yt-dlp.git] / youtube_dl / extractor / moniker.py

# coding: utf-8
from __future__ import unicode_literals

import os.path
import re

from .common import InfoExtractor
from ..compat import (
    compat_urllib_parse,
    compat_urllib_request,
)
from ..utils import (
    ExtractorError,
    remove_start,
)


class MonikerIE(InfoExtractor):
    IE_DESC = 'allmyvideos.net and vidspot.net'
    _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'

    _TESTS = [{
        'url': 'http://allmyvideos.net/jih3nce3x6wn',
        'md5': '710883dee1bfc370ecf9fa6a89307c88',
        'info_dict': {
            'id': 'jih3nce3x6wn',
            'ext': 'mp4',
            'title': 'youtube-dl test video',
        },
    }, {
        'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
        'md5': '710883dee1bfc370ecf9fa6a89307c88',
        'info_dict': {
            'id': 'jih3nce3x6wn',
            'ext': 'mp4',
            'title': 'youtube-dl test video',
        },
    }, {
        'url': 'http://vidspot.net/l2ngsmhs8ci5',
        'md5': '710883dee1bfc370ecf9fa6a89307c88',
        'info_dict': {
            'id': 'l2ngsmhs8ci5',
            'ext': 'mp4',
            'title': 'youtube-dl test video',
        },
    }, {
        'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
        'only_matching': True,
    }, {
        'url': 'http://vidspot.net/2/v-ywDf99',
        'md5': '5f8254ce12df30479428b0152fb8e7ba',
        'info_dict': {
            'id': 'ywDf99',
            'ext': 'mp4',
            'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
            'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
        },
    }, {
        'url': 'http://allmyvideos.net/v/v-HXZm5t',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        orig_video_id = self._match_id(url)
        video_id = remove_start(orig_video_id, 'embed-')
        url = url.replace(orig_video_id, video_id)
        assert re.match(self._VALID_URL, url) is not None
        orig_webpage = self._download_webpage(url, video_id)

        if '>File Not Found<' in orig_webpage:
            raise ExtractorError('Video %s does not exist' % video_id, expected=True)

        error = self._search_regex(
            r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
        if error:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, error), expected=True)

        builtin_url = self._search_regex(
            r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
            orig_webpage, 'builtin URL', default=None, group='url')

        if builtin_url:
            req = compat_urllib_request.Request(builtin_url)
            req.add_header('Referer', url)
            webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
            title = self._og_search_title(orig_webpage).strip()
            description = self._og_search_description(orig_webpage).strip()
        else:
            fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
            data = dict(fields)

            post = compat_urllib_parse.urlencode(data)
            headers = {
                b'Content-Type': b'application/x-www-form-urlencoded',
            }
            req = compat_urllib_request.Request(url, post, headers)
            webpage = self._download_webpage(
                req, video_id, note='Downloading video page ...')

            title = os.path.splitext(data['fname'])[0]
            description = None

        # Could be several links with different quality
        links = re.findall(r'"file" : "?(.+?)",', webpage)
        # Assume the links are ordered in quality
        formats = [{
            'url': l,
            'quality': i,
        } for i, l in enumerate(links)]
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'formats': formats,
        }
Commit	Line	Data
38349518 CR	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
e825c380	4	import os.path
38349518 CR	5	import re
	6
	7	from .common import InfoExtractor
1cc79574	8	from ..compat import (
38349518 CR	9	compat_urllib_parse,
	10	compat_urllib_request,
	11	)
d0d6c097 YCH	12	from ..utils import (
	13	ExtractorError,
	14	remove_start,
	15	)
38349518 CR	16
38349518 CR	17
589d3d7c	18	class MonikerIE(InfoExtractor):
0529eef5	19	IE_DESC = 'allmyvideos.net and vidspot.net'
6fb8ace6	20	_VALID_URL = r'https?://(?:www\.)?(?:allmyvideos\|vidspot)\.net/(?:(?:2\|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
38349518	21
0529eef5	22	_TESTS = [{
38349518	23	'url': 'http://allmyvideos.net/jih3nce3x6wn',
e825c380	24	'md5': '710883dee1bfc370ecf9fa6a89307c88',
38349518 CR	25	'info_dict': {
	26	'id': 'jih3nce3x6wn',
	27	'ext': 'mp4',
	28	'title': 'youtube-dl test video',
	29	},
d0d6c097 YCH	30	}, {
	31	'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
	32	'md5': '710883dee1bfc370ecf9fa6a89307c88',
	33	'info_dict': {
	34	'id': 'jih3nce3x6wn',
	35	'ext': 'mp4',
	36	'title': 'youtube-dl test video',
	37	},
0529eef5 PH	38	}, {
	39	'url': 'http://vidspot.net/l2ngsmhs8ci5',
	40	'md5': '710883dee1bfc370ecf9fa6a89307c88',
	41	'info_dict': {
	42	'id': 'l2ngsmhs8ci5',
	43	'ext': 'mp4',
	44	'title': 'youtube-dl test video',
	45	},
37bfe8ac PH	46	}, {
	47	'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
	48	'only_matching': True,
721f5a27 S	49	}, {
	50	'url': 'http://vidspot.net/2/v-ywDf99',
	51	'md5': '5f8254ce12df30479428b0152fb8e7ba',
	52	'info_dict': {
	53	'id': 'ywDf99',
	54	'ext': 'mp4',
	55	'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
	56	'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
	57	},
	58	}, {
	59	'url': 'http://allmyvideos.net/v/v-HXZm5t',
	60	'only_matching': True,
0529eef5	61	}]
38349518 CR	62
38349518 CR	63	def _real_extract(self, url):
d0d6c097 YCH	64	orig_video_id = self._match_id(url)
	65	video_id = remove_start(orig_video_id, 'embed-')
	66	url = url.replace(orig_video_id, video_id)
	67	assert re.match(self._VALID_URL, url) is not None
7cdd5339	68	orig_webpage = self._download_webpage(url, video_id)
1cc79574	69
2419a376 S	70	if '>File Not Found<' in orig_webpage:
	71	raise ExtractorError('Video %s does not exist' % video_id, expected=True)
	72
e206740f S	73	error = self._search_regex(
	74	r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
	75	if error:
	76	raise ExtractorError(
	77	'%s returned error: %s' % (self.IE_NAME, error), expected=True)
	78
6fb8ace6 S	79	builtin_url = self._search_regex(
	80	r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
	81	orig_webpage, 'builtin URL', default=None, group='url')
7cdd5339	82
6fb8ace6 S	83	if builtin_url:
	84	req = compat_urllib_request.Request(builtin_url)
	85	req.add_header('Referer', url)
	86	webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
	87	title = self._og_search_title(orig_webpage).strip()
	88	description = self._og_search_description(orig_webpage).strip()
	89	else:
	90	fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
	91	data = dict(fields)
	92
	93	post = compat_urllib_parse.urlencode(data)
	94	headers = {
	95	b'Content-Type': b'application/x-www-form-urlencoded',
	96	}
	97	req = compat_urllib_request.Request(url, post, headers)
	98	webpage = self._download_webpage(
	99	req, video_id, note='Downloading video page ...')
e825c380	100
6fb8ace6 S	101	title = os.path.splitext(data['fname'])[0]
6fb8ace6 S	102	description = None
7cdd5339	103
5f6a1245	104	# Could be several links with different quality
7cdd5339	105	links = re.findall(r'"file" : "?(.+?)",', webpage)
e825c380 PH	106	# Assume the links are ordered in quality
	107	formats = [{
	108	'url': l,
	109	'quality': i,
	110	} for i, l in enumerate(links)]
	111	self._sort_formats(formats)
7cdd5339 CR	112
	113	return {
	114	'id': video_id,
e825c380	115	'title': title,
6fb8ace6	116	'description': description,
e825c380 PH	117	'formats': formats,
e825c380 PH	118	}