]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/doodstream.py
[generic] Extract subtitles from video.js (#3156)
[yt-dlp.git] / yt_dlp / extractor / doodstream.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import string
5 import random
6 import time
7
8 from .common import InfoExtractor
9
10
11 class DoodStreamIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
13 _TESTS = [{
14 'url': 'http://dood.to/e/5s1wmbdacezb',
15 'md5': '4568b83b31e13242b3f1ff96c55f0595',
16 'info_dict': {
17 'id': '5s1wmbdacezb',
18 'ext': 'mp4',
19 'title': 'Kat Wonders - Monthly May 2020',
20 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
21 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
22 }
23 }, {
24 'url': 'http://dood.watch/d/5s1wmbdacezb',
25 'md5': '4568b83b31e13242b3f1ff96c55f0595',
26 'info_dict': {
27 'id': '5s1wmbdacezb',
28 'ext': 'mp4',
29 'title': 'Kat Wonders - Monthly May 2020',
30 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
31 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
32 }
33 }, {
34 'url': 'https://dood.to/d/jzrxn12t2s7n',
35 'md5': '3207e199426eca7c2aa23c2872e6728a',
36 'info_dict': {
37 'id': 'jzrxn12t2s7n',
38 'ext': 'mp4',
39 'title': 'Stacy Cruz Cute ALLWAYSWELL',
40 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
41 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
42 }
43 }]
44
45 def _real_extract(self, url):
46 video_id = self._match_id(url)
47 url = f'https://dood.to/e/{video_id}'
48 webpage = self._download_webpage(url, video_id)
49
50 title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None)
51 thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
52 token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
53 description = self._html_search_meta(
54 ['og:description', 'description', 'twitter:description'], webpage, default=None)
55
56 headers = {
57 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
58 'referer': url
59 }
60
61 pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
62 final_url = ''.join((
63 self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
64 *(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
65 f'?token={token}&expiry={int(time.time() * 1000)}',
66 ))
67
68 return {
69 'id': video_id,
70 'title': title,
71 'url': final_url,
72 'http_headers': headers,
73 'ext': 'mp4',
74 'description': description,
75 'thumbnail': thumb,
76 }