]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/promptfile.py
[promptfile] Fix extraction (Closes #10634)
[yt-dlp.git] / youtube_dl / extractor / promptfile.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 determine_ext,
9 ExtractorError,
10 sanitized_Request,
11 urlencode_postdata,
12 )
13
14
15 class PromptFileIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
17 _TEST = {
18 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
19 'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
20 'info_dict': {
21 'id': '86D1CE8462-576CAAE416',
22 'ext': 'mp4',
23 'title': 'oceans.mp4',
24 'thumbnail': 're:^https?://.*\.jpg$',
25 }
26 }
27
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30 webpage = self._download_webpage(url, video_id)
31
32 if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
33 raise ExtractorError('Video %s does not exist' % video_id,
34 expected=True)
35
36 chash_pattern = r'\$\("#chash"\)\.val\("(.+)"\+\$\("#chash"\)'
37 chash = self._html_search_regex(chash_pattern, webpage, "chash")
38 fields = self._hidden_inputs(webpage)
39 k = list(fields)[0]
40 fields[k] = chash + fields[k]
41
42 post = urlencode_postdata(fields)
43 req = sanitized_Request(url, post)
44 req.add_header('Content-type', 'application/x-www-form-urlencoded')
45 webpage = self._download_webpage(
46 req, video_id, 'Downloading video page')
47
48 url_pattern = r'<a href="(http://www\.promptfile\.com/file/[^"]+)'
49 url = self._html_search_regex(url_pattern, webpage, 'URL')
50 title = self._html_search_regex(
51 r'<span.+title="([^"]+)">', webpage, 'title')
52 thumbnail = self._html_search_regex(
53 r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
54 webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
55
56 formats = [{
57 'format_id': 'sd',
58 'url': url,
59 'ext': determine_ext(title),
60 }]
61 self._sort_formats(formats)
62
63 return {
64 'id': video_id,
65 'title': title,
66 'thumbnail': thumbnail,
67 'formats': formats,
68 }