]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/banbye.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / banbye.py
1 import math
2 import urllib.parse
3
4 from .common import InfoExtractor
5 from ..utils import (
6 InAdvancePagedList,
7 format_field,
8 traverse_obj,
9 unified_timestamp,
10 )
11
12
13 class BanByeBaseIE(InfoExtractor):
14 _API_BASE = 'https://api.banbye.com'
15 _CDN_BASE = 'https://cdn.banbye.com'
16 _VIDEO_BASE = 'https://banbye.com/watch'
17
18 @staticmethod
19 def _extract_playlist_id(url, param='playlist'):
20 return urllib.parse.parse_qs(
21 urllib.parse.urlparse(url).query).get(param, [None])[0]
22
23 def _extract_playlist(self, playlist_id):
24 data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
25 return self.playlist_result([
26 self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
27 for video_id in data['videoIds']], playlist_id, data.get('name'))
28
29
30 class BanByeIE(BanByeBaseIE):
31 _VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?watch/(?P<id>[\w-]+)'
32 _TESTS = [{
33 'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
34 'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
35 'info_dict': {
36 'id': 'v_ytfmvkVYLE8T',
37 'ext': 'mp4',
38 'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
39 'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
40 'uploader': 'wRealu24',
41 'channel_id': 'ch_wrealu24',
42 'channel_url': 'https://banbye.com/channel/ch_wrealu24',
43 'timestamp': 1647604800,
44 'upload_date': '20220318',
45 'duration': 1931,
46 'thumbnail': r're:https?://.*\.webp',
47 'tags': 'count:5',
48 'like_count': int,
49 'dislike_count': int,
50 'view_count': int,
51 'comment_count': int,
52 },
53 }, {
54 'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
55 'info_dict': {
56 'title': 'Krzysztof Karoń',
57 'id': 'p_Ld82N6gBw_OJ',
58 },
59 'playlist_mincount': 9,
60 }, {
61 'url': 'https://banbye.com/watch/v_kb6_o1Kyq-CD',
62 'info_dict': {
63 'id': 'v_kb6_o1Kyq-CD',
64 'ext': 'mp4',
65 'title': 'Co tak naprawdę dzieje się we Francji?! Czy Warszawa a potem cała Polska będzie drugim Paryżem?!🤔🇵🇱',
66 'description': 'md5:82be4c0e13eae8ea1ca8b9f2e07226a8',
67 'uploader': 'Marcin Rola - MOIM ZDANIEM!🇵🇱',
68 'channel_id': 'ch_QgWnHvDG2fo5',
69 'channel_url': 'https://banbye.com/channel/ch_QgWnHvDG2fo5',
70 'duration': 597,
71 'timestamp': 1688642656,
72 'upload_date': '20230706',
73 'thumbnail': 'https://cdn.banbye.com/video/v_kb6_o1Kyq-CD/96.webp',
74 'tags': ['Paryż', 'Francja', 'Polska', 'Imigranci', 'Morawiecki', 'Tusk'],
75 'like_count': int,
76 'dislike_count': int,
77 'view_count': int,
78 'comment_count': int,
79 },
80 }]
81
82 def _real_extract(self, url):
83 video_id = self._match_id(url)
84 playlist_id = self._extract_playlist_id(url, 'playlistId')
85
86 if self._yes_playlist(playlist_id, video_id):
87 return self._extract_playlist(playlist_id)
88
89 data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
90 thumbnails = [{
91 'id': f'{quality}p',
92 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
93 } for quality in [48, 96, 144, 240, 512, 1080]]
94 formats = [{
95 'format_id': f'http-{quality}p',
96 'quality': quality,
97 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
98 } for quality in data['quality']]
99
100 return {
101 'id': video_id,
102 'title': data.get('title'),
103 'description': data.get('desc'),
104 'uploader': traverse_obj(data, ('channel', 'name')),
105 'channel_id': data.get('channelId'),
106 'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
107 'timestamp': unified_timestamp(data.get('publishedAt')),
108 'duration': data.get('duration'),
109 'tags': data.get('tags'),
110 'formats': formats,
111 'thumbnails': thumbnails,
112 'like_count': data.get('likes'),
113 'dislike_count': data.get('dislikes'),
114 'view_count': data.get('views'),
115 'comment_count': data.get('commentCount'),
116 }
117
118
119 class BanByeChannelIE(BanByeBaseIE):
120 _VALID_URL = r'https?://(?:www\.)?banbye\.com/(?:en/)?channel/(?P<id>\w+)'
121 _TESTS = [{
122 'url': 'https://banbye.com/channel/ch_wrealu24',
123 'info_dict': {
124 'title': 'wRealu24',
125 'id': 'ch_wrealu24',
126 'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
127 },
128 'playlist_mincount': 791,
129 }, {
130 'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
131 'info_dict': {
132 'title': 'Krzysztof Karoń',
133 'id': 'p_Ld82N6gBw_OJ',
134 },
135 'playlist_count': 9,
136 }]
137 _PAGE_SIZE = 100
138
139 def _real_extract(self, url):
140 channel_id = self._match_id(url)
141 playlist_id = self._extract_playlist_id(url)
142
143 if playlist_id:
144 return self._extract_playlist(playlist_id)
145
146 def page_func(page_num):
147 data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
148 'channelId': channel_id,
149 'sort': 'new',
150 'limit': self._PAGE_SIZE,
151 'offset': page_num * self._PAGE_SIZE,
152 }, note=f'Downloading page {page_num + 1}')
153 return [
154 self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
155 for video in data['items']
156 ]
157
158 channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
159 entries = InAdvancePagedList(
160 page_func,
161 math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
162 self._PAGE_SIZE)
163
164 return self.playlist_result(
165 entries, channel_id, channel_data.get('name'), channel_data.get('description'))