]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/moniker.py
Merge pull request #8061 from dstftw/introduce-chapter-and-series-fields
[yt-dlp.git] / youtube_dl / extractor / moniker.py
CommitLineData
38349518
CR
1# coding: utf-8
2from __future__ import unicode_literals
3
e825c380 4import os.path
38349518
CR
5import re
6
7from .common import InfoExtractor
5c2266df 8from ..compat import compat_urllib_parse
d0d6c097
YCH
9from ..utils import (
10 ExtractorError,
11 remove_start,
5c2266df 12 sanitized_Request,
d0d6c097 13)
38349518
CR
14
15
589d3d7c 16class MonikerIE(InfoExtractor):
0529eef5 17 IE_DESC = 'allmyvideos.net and vidspot.net'
6fb8ace6 18 _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
38349518 19
0529eef5 20 _TESTS = [{
38349518 21 'url': 'http://allmyvideos.net/jih3nce3x6wn',
e825c380 22 'md5': '710883dee1bfc370ecf9fa6a89307c88',
38349518
CR
23 'info_dict': {
24 'id': 'jih3nce3x6wn',
25 'ext': 'mp4',
26 'title': 'youtube-dl test video',
27 },
d0d6c097
YCH
28 }, {
29 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
30 'md5': '710883dee1bfc370ecf9fa6a89307c88',
31 'info_dict': {
32 'id': 'jih3nce3x6wn',
33 'ext': 'mp4',
34 'title': 'youtube-dl test video',
35 },
0529eef5
PH
36 }, {
37 'url': 'http://vidspot.net/l2ngsmhs8ci5',
38 'md5': '710883dee1bfc370ecf9fa6a89307c88',
39 'info_dict': {
40 'id': 'l2ngsmhs8ci5',
41 'ext': 'mp4',
42 'title': 'youtube-dl test video',
43 },
37bfe8ac
PH
44 }, {
45 'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
46 'only_matching': True,
721f5a27
S
47 }, {
48 'url': 'http://vidspot.net/2/v-ywDf99',
49 'md5': '5f8254ce12df30479428b0152fb8e7ba',
50 'info_dict': {
51 'id': 'ywDf99',
52 'ext': 'mp4',
53 'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
54 'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
55 },
56 }, {
57 'url': 'http://allmyvideos.net/v/v-HXZm5t',
58 'only_matching': True,
0529eef5 59 }]
38349518
CR
60
61 def _real_extract(self, url):
d0d6c097
YCH
62 orig_video_id = self._match_id(url)
63 video_id = remove_start(orig_video_id, 'embed-')
64 url = url.replace(orig_video_id, video_id)
65 assert re.match(self._VALID_URL, url) is not None
7cdd5339 66 orig_webpage = self._download_webpage(url, video_id)
1cc79574 67
2419a376
S
68 if '>File Not Found<' in orig_webpage:
69 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
70
e206740f
S
71 error = self._search_regex(
72 r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
73 if error:
74 raise ExtractorError(
75 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
76
6fb8ace6
S
77 builtin_url = self._search_regex(
78 r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
79 orig_webpage, 'builtin URL', default=None, group='url')
7cdd5339 80
6fb8ace6 81 if builtin_url:
5c2266df 82 req = sanitized_Request(builtin_url)
6fb8ace6
S
83 req.add_header('Referer', url)
84 webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
85 title = self._og_search_title(orig_webpage).strip()
86 description = self._og_search_description(orig_webpage).strip()
87 else:
88 fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
89 data = dict(fields)
90
91 post = compat_urllib_parse.urlencode(data)
92 headers = {
93 b'Content-Type': b'application/x-www-form-urlencoded',
94 }
5c2266df 95 req = sanitized_Request(url, post, headers)
6fb8ace6
S
96 webpage = self._download_webpage(
97 req, video_id, note='Downloading video page ...')
e825c380 98
6fb8ace6
S
99 title = os.path.splitext(data['fname'])[0]
100 description = None
7cdd5339 101
5f6a1245 102 # Could be several links with different quality
7cdd5339 103 links = re.findall(r'"file" : "?(.+?)",', webpage)
e825c380
PH
104 # Assume the links are ordered in quality
105 formats = [{
106 'url': l,
107 'quality': i,
108 } for i, l in enumerate(links)]
109 self._sort_formats(formats)
7cdd5339
CR
110
111 return {
112 'id': video_id,
e825c380 113 'title': title,
6fb8ace6 114 'description': description,
e825c380
PH
115 'formats': formats,
116 }