]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/yapfiles.py
8fabdf81c95fba6e39bd039b4aa348a61414ece3
[yt-dlp.git] / yt_dlp / extractor / yapfiles.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 int_or_none,
7 qualities,
8 unescapeHTML,
9 url_or_none,
10 )
11
12
13 class YapFilesIE(InfoExtractor):
14 _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
15 _VALID_URL = r'https?:%s' % _YAPFILES_URL
16 _TESTS = [{
17 # with hd
18 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
19 'md5': '2db19e2bfa2450568868548a1aa1956c',
20 'info_dict': {
21 'id': 'vMDE1NjcyNDUt0413',
22 'ext': 'mp4',
23 'title': 'Самый худший пароль WIFI',
24 'thumbnail': r're:^https?://.*\.jpg$',
25 'duration': 72,
26 },
27 }, {
28 # without hd
29 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
30 'only_matching': True,
31 }]
32
33 @staticmethod
34 def _extract_urls(webpage):
35 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
36 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
37 % YapFilesIE._YAPFILES_URL, webpage)]
38
39 def _real_extract(self, url):
40 video_id = self._match_id(url)
41
42 webpage = self._download_webpage(url, video_id, fatal=False)
43
44 player_url = None
45 query = {}
46 if webpage:
47 player_url = self._search_regex(
48 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
49 'player url', default=None, group='url')
50
51 if not player_url:
52 player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
53 query = {
54 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
55 'type': 'json',
56 'ref': url,
57 }
58
59 player = self._download_json(
60 player_url, video_id, query=query)['player']
61
62 playlist_url = player['playlist']
63 title = player['title']
64 thumbnail = player.get('poster')
65
66 if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
67 raise ExtractorError(
68 'Video %s has been removed' % video_id, expected=True)
69
70 playlist = self._download_json(
71 playlist_url, video_id)['player']['main']
72
73 hd_height = int_or_none(player.get('hd'))
74
75 QUALITIES = ('sd', 'hd')
76 quality_key = qualities(QUALITIES)
77 formats = []
78 for format_id in QUALITIES:
79 is_hd = format_id == 'hd'
80 format_url = url_or_none(playlist.get(
81 'file%s' % ('_hd' if is_hd else '')))
82 if not format_url:
83 continue
84 formats.append({
85 'url': format_url,
86 'format_id': format_id,
87 'quality': quality_key(format_id),
88 'height': hd_height if is_hd else None,
89 })
90 self._sort_formats(formats)
91
92 return {
93 'id': video_id,
94 'title': title,
95 'thumbnail': thumbnail,
96 'duration': int_or_none(player.get('length')),
97 'formats': formats,
98 }