]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/veehd.py
[extractor/youtube] Add `piped.video` (#5571)
[yt-dlp.git] / yt_dlp / extractor / veehd.py
CommitLineData
8e4e89f1
JMF
1import re
2import json
3
4from .common import InfoExtractor
1cc79574 5from ..compat import (
6b19647d 6 compat_urllib_parse_unquote,
8e4e89f1 7 compat_urlparse,
1cc79574
PH
8)
9from ..utils import (
d5bb814d 10 ExtractorError,
8e4e89f1 11 clean_html,
1cc79574 12 get_element_by_id,
8e4e89f1
JMF
13)
14
c11a0611 15
8e4e89f1 16class VeeHDIE(InfoExtractor):
c0ade33e 17 _VALID_URL = r'https?://veehd\.com/video/(?P<id>\d+)'
8e4e89f1 18
f7f1df1d
YCH
19 # Seems VeeHD videos have multiple copies on several servers, all of
20 # whom have different MD5 checksums, so omit md5 field in all tests
21 _TESTS = [{
aa24de39 22 'url': 'http://veehd.com/video/4639434_Solar-Sinter',
c11a0611 23 'info_dict': {
aa24de39 24 'id': '4639434',
df53b635 25 'ext': 'mp4',
aa24de39
S
26 'title': 'Solar Sinter',
27 'uploader_id': 'VideoEyes',
28 'description': 'md5:46a840e8692ddbaffb5f81d9885cb457',
8e4e89f1 29 },
f7f1df1d
YCH
30 'skip': 'Video deleted',
31 }, {
32 'url': 'http://veehd.com/video/4905758_Elysian-Fields-Channeling',
33 'info_dict': {
34 'id': '4905758',
35 'ext': 'mp4',
36 'title': 'Elysian Fields - Channeling',
37 'description': 'md5:360e4e95fdab58aefbea0f2a19e5604b',
38 'uploader_id': 'spotted',
39 }
40 }, {
482a1258 41 'url': 'http://veehd.com/video/2046729_2012-2009-DivX-Trailer',
f7f1df1d 42 'info_dict': {
482a1258 43 'id': '2046729',
f7f1df1d 44 'ext': 'avi',
482a1258
YCH
45 'title': '2012 (2009) DivX Trailer',
46 'description': 'md5:75435ee95255e6a9838ac6f6f3a2396b',
47 'uploader_id': 'Movie_Trailers',
f7f1df1d
YCH
48 }
49 }]
8e4e89f1
JMF
50
51 def _real_extract(self, url):
1cc79574 52 video_id = self._match_id(url)
8e4e89f1 53
c11a0611 54 # VeeHD seems to send garbage on the first request.
067aa17e 55 # See https://github.com/ytdl-org/youtube-dl/issues/2102
c11a0611 56 self._download_webpage(url, video_id, 'Requesting webpage')
8e4e89f1 57 webpage = self._download_webpage(url, video_id)
d5bb814d
S
58
59 if 'This video has been removed<' in webpage:
60 raise ExtractorError('Video %s has been removed' % video_id, expected=True)
61
c11a0611
PH
62 player_path = self._search_regex(
63 r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
64 webpage, 'player path')
8e4e89f1 65 player_url = compat_urlparse.urljoin(url, player_path)
c11a0611
PH
66
67 self._download_webpage(player_url, video_id, 'Requesting player page')
68 player_page = self._download_webpage(
69 player_url, video_id, 'Downloading player page')
a798e64c 70
f7f1df1d
YCH
71 video_url = None
72
c11a0611 73 config_json = self._search_regex(
a798e64c
S
74 r'value=\'config=({.+?})\'', player_page, 'config json', default=None)
75
76 if config_json:
77 config = json.loads(config_json)
6b19647d 78 video_url = compat_urllib_parse_unquote(config['clip']['url'])
f7f1df1d
YCH
79
80 if not video_url:
81 video_url = self._html_search_regex(
82 r'<embed[^>]+type="video/divx"[^>]+src="([^"]+)"',
83 player_page, 'video url', default=None)
84
85 if not video_url:
a798e64c
S
86 iframe_src = self._search_regex(
87 r'<iframe[^>]+src="/?([^"]+)"', player_page, 'iframe url')
88 iframe_url = 'http://veehd.com/%s' % iframe_src
89
90 self._download_webpage(iframe_url, video_id, 'Requesting iframe page')
91 iframe_page = self._download_webpage(
92 iframe_url, video_id, 'Downloading iframe page')
93
94 video_url = self._search_regex(
95 r"file\s*:\s*'([^']+)'", iframe_page, 'video url')
8e4e89f1 96
8e4e89f1 97 title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
a798e64c
S
98 uploader_id = self._html_search_regex(
99 r'<a href="/profile/\d+">(.+?)</a>',
100 webpage, 'uploader')
101 thumbnail = self._search_regex(
102 r'<img id="veehdpreview" src="(.+?)"',
103 webpage, 'thumbnail')
104 description = self._html_search_regex(
105 r'<td class="infodropdown".*?<div>(.*?)<ul',
106 webpage, 'description', flags=re.DOTALL)
8e4e89f1
JMF
107
108 return {
109 '_type': 'video',
110 'id': video_id,
111 'title': title,
112 'url': video_url,
8e4e89f1
JMF
113 'uploader_id': uploader_id,
114 'thumbnail': thumbnail,
115 'description': description,
116 }