[yt-dlp.git] / yt_dlp / extractor / crowdbunker.py

# coding: utf-8
from __future__ import unicode_literals

import itertools

from .common import InfoExtractor
from ..utils import (
    int_or_none,
    try_get,
    unified_strdate,
)


class CrowdBunkerIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'

    _TESTS = [{
        'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
        'info_dict': {
            'id': '0z4Kms8pi8I',
            'ext': 'mp4',
            'title': '117) Pass vax et solutions',
            'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
            'view_count': int,
            'duration': 5386,
            'uploader': 'Jérémie Mercier',
            'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
            'like_count': int,
            'upload_date': '20211218',
            'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
        },
        'params': {'skip_download': True}
    }]

    def _real_extract(self, url):
        id = self._match_id(url)
        data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
                                        id, headers={'accept': 'application/json, text/plain, */*'})
        video_json = data_json['video']
        formats, subtitles = [], {}
        for sub in video_json.get('captions') or []:
            sub_url = try_get(sub, lambda x: x['file']['url'])
            if not sub_url:
                continue
            subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
                'url': sub_url,
            })

        mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
        if mpd_url:
            fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)
        m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
        if m3u8_url:
            fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
            formats.extend(fmts)
            subtitles = self._merge_subtitles(subtitles, subs)

        thumbnails = [{
            'url': image['url'],
            'height': int_or_none(image.get('height')),
            'width': int_or_none(image.get('width')),
        } for image in video_json.get('thumbnails') or [] if image.get('url')]

        self._sort_formats(formats)
        return {
            'id': id,
            'title': video_json.get('title'),
            'description': video_json.get('description'),
            'view_count': video_json.get('viewCount'),
            'duration': video_json.get('duration'),
            'uploader': try_get(data_json, lambda x: x['channel']['name']),
            'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
            'like_count': data_json.get('likesCount'),
            'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
            'thumbnails': thumbnails,
            'formats': formats,
            'subtitles': subtitles,
        }


class CrowdBunkerChannelIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'

    _TESTS = [{
        'url': 'https://crowdbunker.com/@Milan_UHRIN',
        'playlist_mincount': 14,
        'info_dict': {
            'id': 'Milan_UHRIN',
        },
    }]

    def _entries(self, id):
        last = None

        for page in itertools.count():
            channel_json = self._download_json(
                f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'},
                query={'after': last} if last else {}, note=f'Downloading Page {page}')
            for item in channel_json.get('items') or []:
                v_id = item.get('uid')
                if not v_id:
                    continue
                yield self.url_result(
                    'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
            last = channel_json.get('last')
            if not last:
                break

    def _real_extract(self, url):
        id = self._match_id(url)
        return self.playlist_result(self._entries(id), playlist_id=id)
Commit	Line	Data
8fe514d3 AG	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import itertools
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	int_or_none,
	9	try_get,
	10	unified_strdate,
	11	)
	12
	13
	14	class CrowdBunkerIE(InfoExtractor):
	15	_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
	16
	17	_TESTS = [{
	18	'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
	19	'info_dict': {
	20	'id': '0z4Kms8pi8I',
	21	'ext': 'mp4',
	22	'title': '117) Pass vax et solutions',
	23	'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
	24	'view_count': int,
	25	'duration': 5386,
	26	'uploader': 'Jérémie Mercier',
	27	'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
	28	'like_count': int,
	29	'upload_date': '20211218',
	30	'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg'
	31	},
	32	'params': {'skip_download': True}
	33	}]
	34
	35	def _real_extract(self, url):
	36	id = self._match_id(url)
	37	data_json = self._download_json(f'https://api.divulg.org/post/{id}/details',
	38	id, headers={'accept': 'application/json, text/plain, /'})
	39	video_json = data_json['video']
	40	formats, subtitles = [], {}
	41	for sub in video_json.get('captions') or []:
	42	sub_url = try_get(sub, lambda x: x['file']['url'])
	43	if not sub_url:
	44	continue
	45	subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
	46	'url': sub_url,
	47	})
	48
	49	mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
	50	if mpd_url:
	51	fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id)
	52	formats.extend(fmts)
	53	subtitles = self._merge_subtitles(subtitles, subs)
	54	m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
	55	if m3u8_url:
	56	fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id)
	57	formats.extend(fmts)
	58	subtitles = self._merge_subtitles(subtitles, subs)
	59
	60	thumbnails = [{
	61	'url': image['url'],
	62	'height': int_or_none(image.get('height')),
	63	'width': int_or_none(image.get('width')),
	64	} for image in video_json.get('thumbnails') or [] if image.get('url')]
65
66	self._sort_formats(formats)
67	return {
68	'id': id,
69	'title': video_json.get('title'),
70	'description': video_json.get('description'),
71	'view_count': video_json.get('viewCount'),
72	'duration': video_json.get('duration'),
73	'uploader': try_get(data_json, lambda x: x['channel']['name']),
74	'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
75	'like_count': data_json.get('likesCount'),
76	'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
77	'thumbnails': thumbnails,
78	'formats': formats,
79	'subtitles': subtitles,
80	}
81
82
83	class CrowdBunkerChannelIE(InfoExtractor):
84	_VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
85
86	_TESTS = [{
87	'url': 'https://crowdbunker.com/@Milan_UHRIN',
88	'playlist_mincount': 14,
89	'info_dict': {
90	'id': 'Milan_UHRIN',
91	},
92	}]
93
94	def _entries(self, id):
95	last = None
96
97	for page in itertools.count():
98	channel_json = self._download_json(
99	f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, /'},
100	query={'after': last} if last else {}, note=f'Downloading Page {page}')
101	for item in channel_json.get('items') or []:
102	v_id = item.get('uid')
103	if not v_id:
104	continue
105	yield self.url_result(
106	'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id)
107	last = channel_json.get('last')
108	if not last:
109	break
110
111	def _real_extract(self, url):
112	id = self._match_id(url)
113	return self.playlist_result(self._entries(id), playlist_id=id)