yt_dlp/extractor/yapfiles.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     int_or_none,
   7     qualities,
   8     unescapeHTML,
   9     url_or_none,
  10 )
  11
  12
  13 class YapFilesIE(InfoExtractor):
  14     _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
  15     _VALID_URL = r'https?:%s' % _YAPFILES_URL
  16     _TESTS = [{
  17         # with hd
  18         'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
  19         'md5': '2db19e2bfa2450568868548a1aa1956c',
  20         'info_dict': {
  21             'id': 'vMDE1NjcyNDUt0413',
  22             'ext': 'mp4',
  23             'title': 'Самый худший пароль WIFI',
  24             'thumbnail': r're:^https?://.*\.jpg$',
  25             'duration': 72,
  26         },
  27     }, {
  28         # without hd
  29         'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
  30         'only_matching': True,
  31     }]
  32
  33     @staticmethod
  34     def _extract_urls(webpage):
  35         return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
  36             r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
  37             % YapFilesIE._YAPFILES_URL, webpage)]
  38
  39     def _real_extract(self, url):
  40         video_id = self._match_id(url)
  41
  42         webpage = self._download_webpage(url, video_id, fatal=False)
  43
  44         player_url = None
  45         query = {}
  46         if webpage:
  47             player_url = self._search_regex(
  48                 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  49                 'player url', default=None, group='url')
  50
  51         if not player_url:
  52             player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
  53             query = {
  54                 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
  55                 'type': 'json',
  56                 'ref': url,
  57             }
  58
  59         player = self._download_json(
  60             player_url, video_id, query=query)['player']
  61
  62         playlist_url = player['playlist']
  63         title = player['title']
  64         thumbnail = player.get('poster')
  65
  66         if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
  67             raise ExtractorError(
  68                 'Video %s has been removed' % video_id, expected=True)
  69
  70         playlist = self._download_json(
  71             playlist_url, video_id)['player']['main']
  72
  73         hd_height = int_or_none(player.get('hd'))
  74
  75         QUALITIES = ('sd', 'hd')
  76         quality_key = qualities(QUALITIES)
  77         formats = []
  78         for format_id in QUALITIES:
  79             is_hd = format_id == 'hd'
  80             format_url = url_or_none(playlist.get(
  81                 'file%s' % ('_hd' if is_hd else '')))
  82             if not format_url:
  83                 continue
  84             formats.append({
  85                 'url': format_url,
  86                 'format_id': format_id,
  87                 'quality': quality_key(format_id),
  88                 'height': hd_height if is_hd else None,
  89             })
  90         self._sort_formats(formats)
  91
  92         return {
  93             'id': video_id,
  94             'title': title,
  95             'thumbnail': thumbnail,
  96             'duration': int_or_none(player.get('length')),
  97             'formats': formats,
  98         }