yt_dlp/extractor/crowdbunker.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     int_or_none,
   9     try_get,
  10     unified_strdate,
  11 )
  12
  13
  14 class CrowdBunkerIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
  16
  17     _TESTS = [{
  18         'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
  19         'info_dict': {
  20             'id': '0z4Kms8pi8I',
  21             'ext': 'mp4',
  22             'title': '117) Pass vax et solutions',
  23             'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
  24             'view_count': int,
  25             'duration': 5386,
  26             'uploader': 'Jérémie Mercier',
  27             'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
  28             'like_count': int,
  29             'upload_date': '20211218',
  30             'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
  31         },
  32         'params': {'skip_download': True}
  33     }]
  34
  35     def _real_extract(self, url):
  36         id = self._match_id(url)
  37         data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
  38                                         id, headers={'accept': 'application/json, text/plain, */*'})
  39         video_json = data_json['video']
  40         formats, subtitles = [], {}
  41         for sub in video_json.get('captions') or []:
  42             sub_url = try_get(sub, lambda x: x['file']['url'])
  43             if not sub_url:
  44                 continue
  45             subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
  46                 'url': sub_url,
  47             })
  48
  49         mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
  50         if mpd_url:
  51             fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
  52             formats.extend(fmts)
  53             subtitles = self._merge_subtitles(subtitles, subs)
  54         m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
  55         if m3u8_url:
  56             fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
  57             formats.extend(fmts)
  58             subtitles = self._merge_subtitles(subtitles, subs)
  59
  60         thumbnails = [{
  61             'url': image['url'],
  62             'height': int_or_none(image.get('height')),
  63             'width': int_or_none(image.get('width')),
  64         } for image in video_json.get('thumbnails') or [] if image.get('url')]
  65
  66         self._sort_formats(formats)
  67         return {
  68             'id': id,
  69             'title': video_json.get('title'),
  70             'description': video_json.get('description'),
  71             'view_count': video_json.get('viewCount'),
  72             'duration': video_json.get('duration'),
  73             'uploader': try_get(data_json, lambda x: x['channel']['name']),
  74             'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
  75             'like_count': data_json.get('likesCount'),
  76             'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
  77             'thumbnails': thumbnails,
  78             'formats': formats,
  79             'subtitles': subtitles,
  80         }
  81
  82
  83 class CrowdBunkerChannelIE(InfoExtractor):
  84     _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
  85
  86     _TESTS = [{
  87         'url': 'https://crowdbunker.com/@Milan_UHRIN',
  88         'playlist_mincount': 14,
  89         'info_dict': {
  90             'id': 'Milan_UHRIN',
  91         },
  92     }]
  93
  94     def _entries(self, id):
  95         last = None
  96
  97         for page in itertools.count():
  98             channel_json = self._download_json(
  99                 f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
 100                 query={'after': last} if last else {}, note=f'Downloading Page {page}')
 101             for item in channel_json.get('items') or []:
 102                 v_id = item.get('uid')
 103                 if not v_id:
 104                     continue
 105                 yield self.url_result(
 106                     'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
 107             last = channel_json.get('last')
 108             if not last:
 109                 break
 110
 111     def _real_extract(self, url):
 112         id = self._match_id(url)
 113         return self.playlist_result(self._entries(id), playlist_id=id)