]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/peertube.py
[peertube] Improve extraction
[yt-dlp.git] / youtube_dl / extractor / peertube.py
CommitLineData
c561b75c
S
1# coding: utf-8
2from __future__ import unicode_literals
3
6bd499e8
S
4import re
5
c561b75c
S
6from .common import InfoExtractor
7from ..compat import compat_str
8from ..utils import (
9 int_or_none,
10 parse_resolution,
ac379fa2 11 str_or_none,
c561b75c
S
12 try_get,
13 unified_timestamp,
3052a30d 14 url_or_none,
c561b75c
S
15 urljoin,
16)
17
18
19class PeerTubeIE(InfoExtractor):
6bd499e8 20 _INSTANCES_RE = r'''(?:
c561b75c 21 # Taken from https://instances.joinpeertube.org/instances
b64045cd
AM
22 peertube\.rainbowswingers\.net|
23 tube\.stanisic\.nl|
24 peer\.suiri\.us|
25 medias\.libox\.fr|
26 videomensoif\.ynh\.fr|
27 peertube\.travelpandas\.eu|
28 peertube\.rachetjay\.fr|
29 peertube\.montecsys\.fr|
30 tube\.eskuero\.me|
31 peer\.tube|
32 peertube\.umeahackerspace\.se|
33 tube\.nx-pod\.de|
34 video\.monsieurbidouille\.fr|
c561b75c 35 tube\.openalgeria\.org|
b64045cd
AM
36 vid\.lelux\.fi|
37 video\.anormallostpod\.ovh|
38 tube\.crapaud-fou\.org|
39 peertube\.stemy\.me|
40 lostpod\.space|
41 exode\.me|
42 peertube\.snargol\.com|
43 vis\.ion\.ovh|
44 videosdulib\.re|
45 v\.mbius\.io|
46 videos\.judrey\.eu|
47 peertube\.osureplayviewer\.xyz|
48 peertube\.mathieufamily\.ovh|
49 www\.videos-libr\.es|
50 fightforinfo\.com|
51 peertube\.fediverse\.ru|
52 peertube\.oiseauroch\.fr|
53 video\.nesven\.eu|
54 v\.bearvideo\.win|
55 video\.qoto\.org|
56 justporn\.cc|
57 video\.vny\.fr|
58 peervideo\.club|
59 tube\.taker\.fr|
60 peertube\.chantierlibre\.org|
61 tube\.ipfixe\.info|
62 tube\.kicou\.info|
63 tube\.dodsorf\.as|
64 videobit\.cc|
65 video\.yukari\.moe|
66 videos\.elbinario\.net|
67 hkvideo\.live|
68 pt\.tux\.tf|
69 www\.hkvideo\.live|
70 FIGHTFORINFO\.com|
71 pt\.765racing\.com|
72 peertube\.gnumeria\.eu\.org|
73 nordenmedia\.com|
74 peertube\.co\.uk|
75 tube\.darfweb\.eu|
76 tube\.kalah-france\.org|
77 0ch\.in|
78 vod\.mochi\.academy|
79 film\.node9\.org|
80 peertube\.hatthieves\.es|
81 video\.fitchfamily\.org|
82 peertube\.ddns\.net|
83 video\.ifuncle\.kr|
84 video\.fdlibre\.eu|
85 tube\.22decembre\.eu|
86 peertube\.harmoniescreatives\.com|
87 tube\.fabrigli\.fr|
88 video\.thedwyers\.co|
89 video\.bruitbruit\.com|
90 peertube\.foxfam\.club|
91 peer\.philoxweb\.be|
92 videos\.bugs\.social|
93 peertube\.malbert\.xyz|
94 peertube\.bilange\.ca|
95 libretube\.net|
96 diytelevision\.com|
97 peertube\.fedilab\.app|
98 libre\.video|
99 video\.mstddntfdn\.online|
100 us\.tv|
101 peertube\.sl-network\.fr|
102 peertube\.dynlinux\.io|
103 peertube\.david\.durieux\.family|
104 peertube\.linuxrocks\.online|
105 peerwatch\.xyz|
106 v\.kretschmann\.social|
107 tube\.otter\.sh|
108 yt\.is\.nota\.live|
109 tube\.dragonpsi\.xyz|
110 peertube\.boneheadmedia\.com|
111 videos\.funkwhale\.audio|
112 watch\.44con\.com|
113 peertube\.gcaillaut\.fr|
114 peertube\.icu|
115 pony\.tube|
116 spacepub\.space|
117 tube\.stbr\.io|
118 v\.mom-gay\.faith|
119 tube\.port0\.xyz|
120 peertube\.simounet\.net|
121 play\.jergefelt\.se|
122 peertube\.zeteo\.me|
123 tube\.danq\.me|
124 peertube\.kerenon\.com|
125 tube\.fab-l3\.org|
126 tube\.calculate\.social|
127 peertube\.mckillop\.org|
128 tube\.netzspielplatz\.de|
129 vod\.ksite\.de|
130 peertube\.laas\.fr|
131 tube\.govital\.net|
132 peertube\.stephenson\.cc|
133 bistule\.nohost\.me|
134 peertube\.kajalinifi\.de|
135 video\.ploud\.jp|
136 video\.omniatv\.com|
137 peertube\.ffs2play\.fr|
138 peertube\.leboulaire\.ovh|
139 peertube\.tronic-studio\.com|
140 peertube\.public\.cat|
141 peertube\.metalbanana\.net|
142 video\.1000i100\.fr|
143 peertube\.alter-nativ-voll\.de|
144 tube\.pasa\.tf|
145 tube\.worldofhauru\.xyz|
146 pt\.kamp\.site|
147 peertube\.teleassist\.fr|
148 videos\.mleduc\.xyz|
149 conf\.tube|
150 media\.privacyinternational\.org|
151 pt\.forty-two\.nl|
152 video\.halle-leaks\.de|
153 video\.grosskopfgames\.de|
154 peertube\.schaeferit\.de|
155 peertube\.jackbot\.fr|
156 tube\.extinctionrebellion\.fr|
157 peertube\.f-si\.org|
158 video\.subak\.ovh|
159 videos\.koweb\.fr|
160 peertube\.zergy\.net|
161 peertube\.roflcopter\.fr|
162 peertube\.floss-marketing-school\.com|
163 vloggers\.social|
164 peertube\.iriseden\.eu|
165 videos\.ubuntu-paris\.org|
166 peertube\.mastodon\.host|
167 armstube\.com|
168 peertube\.s2s\.video|
169 peertube\.lol|
170 tube\.open-plug\.eu|
171 open\.tube|
172 peertube\.ch|
173 peertube\.normandie-libre\.fr|
174 peertube\.slat\.org|
175 video\.lacaveatonton\.ovh|
176 peertube\.uno|
177 peertube\.servebeer\.com|
178 peertube\.fedi\.quebec|
179 tube\.h3z\.jp|
180 tube\.plus200\.com|
181 peertube\.eric\.ovh|
182 tube\.metadocs\.cc|
183 tube\.unmondemeilleur\.eu|
184 gouttedeau\.space|
185 video\.antirep\.net|
186 nrop\.cant\.at|
187 tube\.ksl-bmx\.de|
188 tube\.plaf\.fr|
189 tube\.tchncs\.de|
190 video\.devinberg\.com|
191 hitchtube\.fr|
192 peertube\.kosebamse\.com|
193 yunopeertube\.myddns\.me|
194 peertube\.varney\.fr|
195 peertube\.anon-kenkai\.com|
196 tube\.maiti\.info|
197 tubee\.fr|
198 videos\.dinofly\.com|
199 toobnix\.org|
200 videotape\.me|
201 voca\.tube|
202 video\.heromuster\.com|
203 video\.lemediatv\.fr|
204 video\.up\.edu\.ph|
205 balafon\.video|
206 video\.ivel\.fr|
207 thickrips\.cloud|
208 pt\.laurentkruger\.fr|
209 video\.monarch-pass\.net|
210 peertube\.artica\.center|
211 video\.alternanet\.fr|
212 indymotion\.fr|
213 fanvid\.stopthatimp\.net|
214 video\.farci\.org|
215 v\.lesterpig\.com|
216 video\.okaris\.de|
217 tube\.pawelko\.net|
218 peertube\.mablr\.org|
219 tube\.fede\.re|
220 pytu\.be|
221 evertron\.tv|
222 devtube\.dev-wiki\.de|
223 raptube\.antipub\.org|
224 video\.selea\.se|
225 peertube\.mygaia\.org|
226 video\.oh14\.de|
227 peertube\.livingutopia\.org|
228 peertube\.the-penguin\.de|
229 tube\.thechangebook\.org|
230 tube\.anjara\.eu|
231 pt\.pube\.tk|
232 video\.samedi\.pm|
233 mplayer\.demouliere\.eu|
234 widemus\.de|
235 peertube\.me|
236 peertube\.zapashcanon\.fr|
237 video\.latavernedejohnjohn\.fr|
238 peertube\.pcservice46\.fr|
239 peertube\.mazzonetto\.eu|
240 video\.irem\.univ-paris-diderot\.fr|
241 video\.livecchi\.cloud|
242 alttube\.fr|
243 video\.coop\.tools|
244 video\.cabane-libre\.org|
245 peertube\.openstreetmap\.fr|
246 videos\.alolise\.org|
247 irrsinn\.video|
248 video\.antopie\.org|
249 scitech\.video|
250 tube2\.nemsia\.org|
251 video\.amic37\.fr|
252 peertube\.freeforge\.eu|
253 video\.arbitrarion\.com|
254 video\.datsemultimedia\.com|
255 stoptrackingus\.tv|
256 peertube\.ricostrongxxx\.com|
257 docker\.videos\.lecygnenoir\.info|
258 peertube\.togart\.de|
259 tube\.postblue\.info|
260 videos\.domainepublic\.net|
261 peertube\.cyber-tribal\.com|
262 video\.gresille\.org|
263 peertube\.dsmouse\.net|
264 cinema\.yunohost\.support|
265 tube\.theocevaer\.fr|
266 repro\.video|
267 tube\.4aem\.com|
268 quaziinc\.com|
269 peertube\.metawurst\.space|
270 videos\.wakapo\.com|
271 video\.ploud\.fr|
272 video\.freeradical\.zone|
273 tube\.valinor\.fr|
274 refuznik\.video|
275 pt\.kircheneuenburg\.de|
276 peertube\.asrun\.eu|
277 peertube\.lagob\.fr|
278 videos\.side-ways\.net|
279 91video\.online|
280 video\.valme\.io|
281 video\.taboulisme\.com|
282 videos-libr\.es|
283 tv\.mooh\.fr|
284 nuage\.acostey\.fr|
285 video\.monsieur-a\.fr|
286 peertube\.librelois\.fr|
287 videos\.pair2jeux\.tube|
288 videos\.pueseso\.club|
289 peer\.mathdacloud\.ovh|
290 media\.assassinate-you\.net|
291 vidcommons\.org|
292 ptube\.rousset\.nom\.fr|
293 tube\.cyano\.at|
294 videos\.squat\.net|
295 video\.iphodase\.fr|
296 peertube\.makotoworkshop\.org|
297 peertube\.serveur\.slv-valbonne\.fr|
298 vault\.mle\.party|
299 hostyour\.tv|
300 videos\.hack2g2\.fr|
301 libre\.tube|
302 pire\.artisanlogiciel\.net|
303 videos\.numerique-en-commun\.fr|
304 video\.netsyms\.com|
305 video\.die-partei\.social|
306 video\.writeas\.org|
307 peertube\.swarm\.solvingmaz\.es|
308 tube\.pericoloso\.ovh|
309 watching\.cypherpunk\.observer|
310 videos\.adhocmusic\.com|
311 tube\.rfc1149\.net|
312 peertube\.librelabucm\.org|
313 videos\.numericoop\.fr|
314 peertube\.koehn\.com|
315 peertube\.anarchmusicall\.net|
316 tube\.kampftoast\.de|
317 vid\.y-y\.li|
318 peertube\.xtenz\.xyz|
319 diode\.zone|
320 tube\.egf\.mn|
321 peertube\.nomagic\.uk|
322 visionon\.tv|
323 videos\.koumoul\.com|
324 video\.rastapuls\.com|
325 video\.mantlepro\.com|
326 video\.deadsuperhero\.com|
327 peertube\.musicstudio\.pro|
328 peertube\.we-keys\.fr|
329 artitube\.artifaille\.fr|
330 peertube\.ethernia\.net|
331 tube\.midov\.pl|
332 peertube\.fr|
333 watch\.snoot\.tube|
334 peertube\.donnadieu\.fr|
335 argos\.aquilenet\.fr|
336 tube\.nemsia\.org|
337 tube\.bruniau\.net|
338 videos\.darckoune\.moe|
339 tube\.traydent\.info|
340 dev\.videos\.lecygnenoir\.info|
341 peertube\.nayya\.org|
342 peertube\.live|
343 peertube\.mofgao\.space|
344 video\.lequerrec\.eu|
345 peertube\.amicale\.net|
346 aperi\.tube|
347 tube\.ac-lyon\.fr|
348 video\.lw1\.at|
349 www\.yiny\.org|
350 videos\.pofilo\.fr|
351 tube\.lou\.lt|
352 choob\.h\.etbus\.ch|
353 tube\.hoga\.fr|
354 peertube\.heberge\.fr|
355 video\.obermui\.de|
356 videos\.cloudfrancois\.fr|
357 betamax\.video|
358 video\.typica\.us|
359 tube\.piweb\.be|
360 video\.blender\.org|
361 peertube\.cat|
362 tube\.kdy\.ch|
363 pe\.ertu\.be|
364 peertube\.social|
365 videos\.lescommuns\.org|
366 tv\.datamol\.org|
367 videonaute\.fr|
368 dialup\.express|
c561b75c 369 peertube\.nogafa\.org|
c561b75c
S
370 megatube\.lilomoino\.fr|
371 peertube\.tamanoir\.foucry\.net|
c561b75c
S
372 peertube\.devosi\.org|
373 peertube\.1312\.media|
c561b75c
S
374 tube\.bootlicker\.party|
375 skeptikon\.fr|
c561b75c
S
376 video\.blueline\.mg|
377 tube\.homecomputing\.fr|
c561b75c
S
378 tube\.ouahpiti\.info|
379 video\.tedomum\.net|
380 video\.g3l\.org|
381 fontube\.fr|
382 peertube\.gaialabs\.ch|
c561b75c
S
383 tube\.kher\.nl|
384 peertube\.qtg\.fr|
c561b75c 385 video\.migennes\.net|
c561b75c 386 tube\.p2p\.legal|
c561b75c 387 troll\.tv|
c561b75c
S
388 videos\.iut-orsay\.fr|
389 peertube\.solidev\.net|
c561b75c 390 videos\.cemea\.org|
c561b75c
S
391 video\.passageenseine\.fr|
392 videos\.festivalparminous\.org|
393 peertube\.touhoppai\.moe|
c561b75c 394 sikke\.fi|
c561b75c
S
395 peer\.hostux\.social|
396 share\.tube|
397 peertube\.walkingmountains\.fr|
c561b75c 398 videos\.benpro\.fr|
c561b75c 399 peertube\.parleur\.net|
c561b75c 400 peertube\.heraut\.eu|
c561b75c
S
401 tube\.aquilenet\.fr|
402 peertube\.gegeweb\.eu|
403 framatube\.org|
404 thinkerview\.video|
405 tube\.conferences-gesticulees\.net|
406 peertube\.datagueule\.tv|
407 video\.lqdn\.fr|
c561b75c 408 tube\.mochi\.academy|
c561b75c 409 media\.zat\.im|
c561b75c 410 video\.colibris-outilslibres\.org|
c561b75c
S
411 tube\.svnet\.fr|
412 peertube\.video|
c561b75c
S
413 peertube3\.cpy\.re|
414 peertube2\.cpy\.re|
415 videos\.tcit\.fr|
416 peertube\.cpy\.re
6bd499e8 417 )'''
8b4b400a 418 _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
6bd499e8 419 _VALID_URL = r'''(?x)
8b4b400a
S
420 (?:
421 peertube:(?P<host>[^:]+):|
422 https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
423 )
424 (?P<id>%s)
425 ''' % (_INSTANCES_RE, _UUID_RE)
c561b75c 426 _TESTS = [{
ac379fa2 427 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
428 'md5': '9bed8c0137913e17b86334e5885aacff',
c561b75c 429 'info_dict': {
ac379fa2 430 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
c561b75c 431 'ext': 'mp4',
ac379fa2 432 'title': 'What is PeerTube?',
433 'description': '**[Want to help to translate this video?](https://weblate.framasoft.org/projects/what-is-peertube-video/)**\r\n\r\n**Take back the control of your videos! [#JoinPeertube](https://joinpeertube.org)**\r\n*A decentralized video hosting network, based on free/libre software!*\r\n\r\n**Animation Produced by:** [LILA](https://libreart.info) - [ZeMarmot Team](https://film.zemarmot.net)\r\n*Directed by* Aryeom\r\n*Assistant* Jehan\r\n**Licence**: [CC-By-SA 4.0](https://creativecommons.org/licenses/by-sa/4.0/)\r\n\r\n**Sponsored by** [Framasoft](https://framasoft.org)\r\n\r\n**Music**: [Red Step Forward](http://play.dogmazic.net/song.php?song_id=52491) - CC-By Ken Bushima\r\n\r\n**Movie Clip**: [Caminades 3: Llamigos](http://www.caminandes.com/) CC-By Blender Institute\r\n\r\n**Video sources**: https://gitlab.gnome.org/Jehan/what-is-peertube/',
c561b75c 434 'thumbnail': r're:https?://.*\.(?:jpg|png)',
ac379fa2 435 'timestamp': 1538391166,
436 'upload_date': '20181001',
437 'uploader': 'Framasoft',
438 'uploader_id': '3',
439 'uploader_url': 'https://framatube.org/accounts/framasoft',
440 'channel': 'Les vidéos de Framasoft',
441 'channel_id': '2',
442 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
443 'language': 'en',
444 'license': 'Attribution - Share Alike',
445 'duration': 113,
c561b75c
S
446 'view_count': int,
447 'like_count': int,
448 'dislike_count': int,
ac379fa2 449 'tags': ['framasoft', 'peertube'],
450 'categories': ['Science & Technology'],
c561b75c
S
451 }
452 }, {
453 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
454 'only_matching': True,
455 }, {
456 # nsfw
457 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
458 'only_matching': True,
f2fc63a5
S
459 }, {
460 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
461 'only_matching': True,
462 }, {
463 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
464 'only_matching': True,
8b4b400a
S
465 }, {
466 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
467 'only_matching': True,
c561b75c
S
468 }]
469
6bd499e8 470 @staticmethod
8b4b400a
S
471 def _extract_peertube_url(webpage, source_url):
472 mobj = re.match(
d18003a1 473 r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
8b4b400a
S
474 % PeerTubeIE._UUID_RE, source_url)
475 if mobj and any(p in webpage for p in (
476 '<title>PeerTube<',
477 'There will be other non JS-based clients to access PeerTube',
478 '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
479 return 'peertube:%s:%s' % mobj.group('host', 'id')
480
481 @staticmethod
482 def _extract_urls(webpage, source_url):
483 entries = re.findall(
484 r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
485 % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
486 if not entries:
487 peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
488 if peertube_url:
489 entries = [peertube_url]
490 return entries
6bd499e8 491
ac379fa2 492 def _get_subtitles(self, host, video_id):
493 video_captions = self._download_json(
494 'https://%s/api/v1/videos/%s/captions' % (host, video_id), video_id, fatal=False)
495 if not isinstance(video_captions, dict):
496 return None
497
498 subtitles = {}
499 for entry in video_captions.get('data'):
500 language_id = try_get(entry, lambda x: x['language']['id'], compat_str)
501 caption_path = str_or_none(entry.get('captionPath'))
502 if language_id and caption_path:
503 caption_url = urljoin('https://%s' % host, entry.get('captionPath'))
504 subtitles.setdefault(language_id, []).append({
505 'url': caption_url,
506 })
507 return subtitles
508
c561b75c 509 def _real_extract(self, url):
8b4b400a
S
510 mobj = re.match(self._VALID_URL, url)
511 host = mobj.group('host') or mobj.group('host_2')
512 video_id = mobj.group('id')
c561b75c
S
513
514 video = self._download_json(
8b4b400a 515 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
c561b75c
S
516
517 title = video['name']
518
519 formats = []
520 for file_ in video['files']:
521 if not isinstance(file_, dict):
522 continue
3052a30d
S
523 file_url = url_or_none(file_.get('fileUrl'))
524 if not file_url:
c561b75c
S
525 continue
526 file_size = int_or_none(file_.get('size'))
527 format_id = try_get(
528 file_, lambda x: x['resolution']['label'], compat_str)
529 f = parse_resolution(format_id)
530 f.update({
531 'url': file_url,
532 'format_id': format_id,
533 'filesize': file_size,
534 })
535 formats.append(f)
536 self._sort_formats(formats)
537
ac379fa2 538 video_description = self._download_json(
539 'https://%s/api/v1/videos/%s/description' % (host, video_id), video_id, fatal=False)
540
541 description = None
542 if isinstance(video_description, dict):
543 description = str_or_none(video_description.get('description'))
544
545 subtitles = self.extract_subtitles(host, video_id)
546
547 def data(section, field, type_):
548 return try_get(video, lambda x: x[section][field], type_)
549
550 def account_data(field, type_):
551 return data('account', field, type_)
552
553 def channel_data(field, type_):
554 return data('channel', field, type_)
c561b75c 555
ac379fa2 556 category = data('category', 'label', compat_str)
c561b75c
S
557 categories = [category] if category else None
558
559 nsfw = video.get('nsfw')
560 if nsfw is bool:
561 age_limit = 18 if nsfw else 0
562 else:
563 age_limit = None
564
565 return {
566 'id': video_id,
567 'title': title,
ac379fa2 568 'description': description,
c561b75c
S
569 'thumbnail': urljoin(url, video.get('thumbnailPath')),
570 'timestamp': unified_timestamp(video.get('publishedAt')),
ac379fa2 571 'uploader': account_data('displayName', compat_str),
572 'uploader_id': str(account_data('id', int)),
573 'uploader_url': url_or_none(account_data('url', compat_str)),
574 'channel': channel_data('displayName', compat_str),
575 'channel_id': str(channel_data('id', int)),
576 'channel_url': url_or_none(channel_data('url', compat_str)),
577 'language': data('language', 'id', compat_str),
578 'license': data('licence', 'label', compat_str),
c561b75c
S
579 'duration': int_or_none(video.get('duration')),
580 'view_count': int_or_none(video.get('views')),
581 'like_count': int_or_none(video.get('likes')),
582 'dislike_count': int_or_none(video.get('dislikes')),
583 'age_limit': age_limit,
584 'tags': try_get(video, lambda x: x['tags'], list),
585 'categories': categories,
586 'formats': formats,
ac379fa2 587 'subtitles': subtitles
c561b75c 588 }