yt_dlp/extractor/hitrecord.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     clean_html,
   4     float_or_none,
   5     int_or_none,
   6     try_get,
   7 )
   8
   9
  10 class HitRecordIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
  12     _TEST = {
  13         'url': 'https://hitrecord.org/records/2954362',
  14         'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
  15         'info_dict': {
  16             'id': '2954362',
  17             'ext': 'mp4',
  18             'title': 'A Very Different World (HITRECORD x ACLU)',
  19             'description': 'md5:e62defaffab5075a5277736bead95a3d',
  20             'duration': 139.327,
  21             'timestamp': 1471557582,
  22             'upload_date': '20160818',
  23             'uploader': 'Zuzi.C12',
  24             'uploader_id': '362811',
  25             'view_count': int,
  26             'like_count': int,
  27             'comment_count': int,
  28             'tags': list,
  29         },
  30     }
  31
  32     def _real_extract(self, url):
  33         video_id = self._match_id(url)
  34
  35         video = self._download_json(
  36             f'https://hitrecord.org/api/web/records/{video_id}', video_id)
  37
  38         title = video['title']
  39         video_url = video['source_url']['mp4_url']
  40
  41         tags = None
  42         tags_list = try_get(video, lambda x: x['tags'], list)
  43         if tags_list:
  44             tags = [
  45                 t['text']
  46                 for t in tags_list
  47                 if isinstance(t, dict) and t.get('text')
  48                 and isinstance(t['text'], str)]
  49
  50         return {
  51             'id': video_id,
  52             'url': video_url,
  53             'title': title,
  54             'description': clean_html(video.get('body')),
  55             'duration': float_or_none(video.get('duration'), 1000),
  56             'timestamp': int_or_none(video.get('created_at_i')),
  57             'uploader': try_get(
  58                 video, lambda x: x['user']['username'], str),
  59             'uploader_id': try_get(
  60                 video, lambda x: str(x['user']['id'])),
  61             'view_count': int_or_none(video.get('total_views_count')),
  62             'like_count': int_or_none(video.get('hearts_count')),
  63             'comment_count': int_or_none(video.get('comments_count')),
  64             'tags': tags,
  65         }