yt_dlp/extractor/porn91.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     parse_duration,
   4     int_or_none,
   5     ExtractorError,
   6 )
   7
   8
   9 class Porn91IE(InfoExtractor):
  10     IE_NAME = '91porn'
  11     _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
  12
  13     _TEST = {
  14         'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
  15         'md5': '7fcdb5349354f40d41689bd0fa8db05a',
  16         'info_dict': {
  17             'id': '7e42283b4f5ab36da134',
  18             'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
  19             'ext': 'mp4',
  20             'duration': 431,
  21             'age_limit': 18,
  22         }
  23     }
  24
  25     def _real_extract(self, url):
  26         video_id = self._match_id(url)
  27         self._set_cookie('91porn.com', 'language', 'cn_CN')
  28
  29         webpage = self._download_webpage(
  30             'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
  31
  32         if '作为游客，你每天只可观看10个视频' in webpage:
  33             raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
  34
  35         title = self._search_regex(
  36             r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
  37         title = title.replace('\n', '')
  38
  39         video_link_url = self._search_regex(
  40             r'<textarea[^>]+id=["\']fm-video_link[^>]+>([^<]+)</textarea>',
  41             webpage, 'video link')
  42         videopage = self._download_webpage(video_link_url, video_id)
  43
  44         info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0]
  45
  46         duration = parse_duration(self._search_regex(
  47             r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
  48
  49         comment_count = int_or_none(self._search_regex(
  50             r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
  51
  52         info_dict.update({
  53             'id': video_id,
  54             'title': title,
  55             'duration': duration,
  56             'comment_count': comment_count,
  57             'age_limit': self._rta_search(webpage),
  58         })
  59
  60         return info_dict