yt_dlp/extractor/gofile.py

   1 import hashlib
   2
   3 from .common import InfoExtractor
   4 from ..utils import ExtractorError, try_get
   5
   6
   7 class GofileIE(InfoExtractor):
   8     _VALID_URL = r'https?://(?:www\.)?gofile\.io/d/(?P<id>[^/]+)'
   9     _TESTS = [{
  10         'url': 'https://gofile.io/d/AMZyDw',
  11         'info_dict': {
  12             'id': 'AMZyDw',
  13         },
  14         'playlist_mincount': 2,
  15         'playlist': [{
  16             'info_dict': {
  17                 'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31',
  18                 'filesize': 928116,
  19                 'ext': 'mp4',
  20                 'title': 'nuuh',
  21                 'release_timestamp': 1638338704,
  22                 'release_date': '20211201',
  23             }
  24         }]
  25     }, {
  26         'url': 'https://gofile.io/d/is8lKr',
  27         'info_dict': {
  28             'id': 'TMjXd9',
  29             'ext': 'mp4',
  30         },
  31         'playlist_count': 0,
  32         'skip': 'No video/audio found at provided URL.',
  33     }, {
  34         'url': 'https://gofile.io/d/TMjXd9',
  35         'info_dict': {
  36             'id': 'TMjXd9',
  37         },
  38         'playlist_count': 1,
  39     }, {
  40         'url': 'https://gofile.io/d/gqOtRf',
  41         'info_dict': {
  42             'id': 'gqOtRf',
  43         },
  44         'playlist_mincount': 1,
  45         'params': {
  46             'videopassword': 'password',
  47         },
  48     }]
  49     _TOKEN = None
  50
  51     def _real_initialize(self):
  52         token = self._get_cookies('https://gofile.io/').get('accountToken')
  53         if token:
  54             self._TOKEN = token.value
  55             return
  56
  57         account_data = self._download_json(
  58             'https://api.gofile.io/accounts', None, 'Getting a new guest account', data=b'{}')
  59         self._TOKEN = account_data['data']['token']
  60         self._set_cookie('.gofile.io', 'accountToken', self._TOKEN)
  61
  62     def _entries(self, file_id):
  63         query_params = {'wt': '4fd6sg89d7s6'}  # From https://gofile.io/dist/js/alljs.js
  64         password = self.get_param('videopassword')
  65         if password:
  66             query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest()
  67         files = self._download_json(
  68             f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist',
  69             query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'})
  70
  71         status = files['status']
  72         if status == 'error-passwordRequired':
  73             raise ExtractorError(
  74                 'This video is protected by a password, use the --video-password option', expected=True)
  75         elif status != 'ok':
  76             raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True)
  77
  78         found_files = False
  79         for file in (try_get(files, lambda x: x['data']['children'], dict) or {}).values():
  80             file_type, file_format = file.get('mimetype').split('/', 1)
  81             if file_type not in ('video', 'audio') and file_format != 'vnd.mts':
  82                 continue
  83
  84             found_files = True
  85             file_url = file.get('link')
  86             if file_url:
  87                 yield {
  88                     'id': file['id'],
  89                     'title': file['name'].rsplit('.', 1)[0],
  90                     'url': file_url,
  91                     'filesize': file.get('size'),
  92                     'release_timestamp': file.get('createTime')
  93                 }
  94
  95         if not found_files:
  96             raise ExtractorError('No video/audio found at provided URL.', expected=True)
  97
  98     def _real_extract(self, url):
  99         file_id = self._match_id(url)
 100         return self.playlist_result(self._entries(file_id), playlist_id=file_id)