yt_dlp/extractor/crowdbunker.py

   1 import itertools
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     int_or_none,
   6     try_get,
   7     unified_strdate,
   8 )
   9
  10
  11 class CrowdBunkerIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
  13
  14     _TESTS = [{
  15         'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
  16         'info_dict': {
  17             'id': '0z4Kms8pi8I',
  18             'ext': 'mp4',
  19             'title': '117) Pass vax et solutions',
  20             'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
  21             'view_count': int,
  22             'duration': 5386,
  23             'uploader': 'Jérémie Mercier',
  24             'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
  25             'like_count': int,
  26             'upload_date': '20211218',
  27             'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
  28         },
  29         'params': {'skip_download': True}
  30     }]
  31
  32     def _real_extract(self, url):
  33         id = self._match_id(url)
  34         data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
  35                                         id, headers={'accept': 'application/json, text/plain, */*'})
  36         video_json = data_json['video']
  37         formats, subtitles = [], {}
  38         for sub in video_json.get('captions') or []:
  39             sub_url = try_get(sub, lambda x: x['file']['url'])
  40             if not sub_url:
  41                 continue
  42             subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
  43                 'url': sub_url,
  44             })
  45
  46         mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
  47         if mpd_url:
  48             fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
  49             formats.extend(fmts)
  50             subtitles = self._merge_subtitles(subtitles, subs)
  51         m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
  52         if m3u8_url:
  53             fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
  54             formats.extend(fmts)
  55             subtitles = self._merge_subtitles(subtitles, subs)
  56
  57         thumbnails = [{
  58             'url': image['url'],
  59             'height': int_or_none(image.get('height')),
  60             'width': int_or_none(image.get('width')),
  61         } for image in video_json.get('thumbnails') or [] if image.get('url')]
  62
  63         self._sort_formats(formats)
  64         return {
  65             'id': id,
  66             'title': video_json.get('title'),
  67             'description': video_json.get('description'),
  68             'view_count': video_json.get('viewCount'),
  69             'duration': video_json.get('duration'),
  70             'uploader': try_get(data_json, lambda x: x['channel']['name']),
  71             'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
  72             'like_count': data_json.get('likesCount'),
  73             'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
  74             'thumbnails': thumbnails,
  75             'formats': formats,
  76             'subtitles': subtitles,
  77         }
  78
  79
  80 class CrowdBunkerChannelIE(InfoExtractor):
  81     _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
  82
  83     _TESTS = [{
  84         'url': 'https://crowdbunker.com/@Milan_UHRIN',
  85         'playlist_mincount': 14,
  86         'info_dict': {
  87             'id': 'Milan_UHRIN',
  88         },
  89     }]
  90
  91     def _entries(self, id):
  92         last = None
  93
  94         for page in itertools.count():
  95             channel_json = self._download_json(
  96                 f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
  97                 query={'after': last} if last else {}, note=f'Downloading Page {page}')
  98             for item in channel_json.get('items') or []:
  99                 v_id = item.get('uid')
 100                 if not v_id:
 101                     continue
 102                 yield self.url_result(
 103                     'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
 104             last = channel_json.get('last')
 105             if not last:
 106                 break
 107
 108     def _real_extract(self, url):
 109         id = self._match_id(url)
 110         return self.playlist_result(self._entries(id), playlist_id=id)