]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/crowdbunker.py
[CrowdBunker] Add extractors (#2407)
[yt-dlp.git] / yt_dlp / extractor / crowdbunker.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5
6 from .common import InfoExtractor
7 from ..utils import (
8 int_or_none,
9 try_get,
10 unified_strdate,
11 )
12
13
14 class CrowdBunkerIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
16
17 _TESTS = [{
18 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
19 'info_dict': {
20 'id': '0z4Kms8pi8I',
21 'ext': 'mp4',
22 'title': '117) Pass vax et solutions',
23 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
24 'view_count': int,
25 'duration': 5386,
26 'uploader': 'Jérémie Mercier',
27 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
28 'like_count': int,
29 'upload_date': '20211218',
30 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
31 },
32 'params': {'skip_download': True}
33 }]
34
35 def _real_extract(self, url):
36 id = self._match_id(url)
37 data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
38 id, headers={'accept': 'application/json, text/plain, */*'})
39 video_json = data_json['video']
40 formats, subtitles = [], {}
41 for sub in video_json.get('captions') or []:
42 sub_url = try_get(sub, lambda x: x['file']['url'])
43 if not sub_url:
44 continue
45 subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
46 'url': sub_url,
47 })
48
49 mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
50 if mpd_url:
51 fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
52 formats.extend(fmts)
53 subtitles = self._merge_subtitles(subtitles, subs)
54 m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
55 if m3u8_url:
56 fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
57 formats.extend(fmts)
58 subtitles = self._merge_subtitles(subtitles, subs)
59
60 thumbnails = [{
61 'url': image['url'],
62 'height': int_or_none(image.get('height')),
63 'width': int_or_none(image.get('width')),
64 } for image in video_json.get('thumbnails') or [] if image.get('url')]
65
66 self._sort_formats(formats)
67 return {
68 'id': id,
69 'title': video_json.get('title'),
70 'description': video_json.get('description'),
71 'view_count': video_json.get('viewCount'),
72 'duration': video_json.get('duration'),
73 'uploader': try_get(data_json, lambda x: x['channel']['name']),
74 'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
75 'like_count': data_json.get('likesCount'),
76 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
77 'thumbnails': thumbnails,
78 'formats': formats,
79 'subtitles': subtitles,
80 }
81
82
83 class CrowdBunkerChannelIE(InfoExtractor):
84 _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
85
86 _TESTS = [{
87 'url': 'https://crowdbunker.com/@Milan_UHRIN',
88 'playlist_mincount': 14,
89 'info_dict': {
90 'id': 'Milan_UHRIN',
91 },
92 }]
93
94 def _entries(self, id):
95 last = None
96
97 for page in itertools.count():
98 channel_json = self._download_json(
99 f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
100 query={'after': last} if last else {}, note=f'Downloading Page {page}')
101 for item in channel_json.get('items') or []:
102 v_id = item.get('uid')
103 if not v_id:
104 continue
105 yield self.url_result(
106 'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
107 last = channel_json.get('last')
108 if not last:
109 break
110
111 def _real_extract(self, url):
112 id = self._match_id(url)
113 return self.playlist_result(self._entries(id), playlist_id=id)