X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/86e5f3ed2e6e71eb81ea4c9e26288f16119ffd0c..5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe:/yt_dlp/extractor/nitter.py diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 251bf444f..7609b4017 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,13 +1,14 @@ +import random +import re +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( + determine_ext, parse_count, - unified_timestamp, remove_end, - determine_ext, + unified_timestamp, ) -import re -import random class NitterIE(InfoExtractor): @@ -39,59 +40,99 @@ class NitterIE(InfoExtractor): ) HTTP_INSTANCES = ( - 'nitter.42l.fr', - 'nitter.pussthecat.org', - 'nitter.nixnet.services', + 'nitter.lacontrevoie.fr', 'nitter.fdn.fr', 'nitter.1d4.us', 'nitter.kavin.rocks', 'nitter.unixfox.eu', 'nitter.domain.glass', - 'nitter.eu', 'nitter.namazso.eu', - 'nitter.actionsack.com', 'birdsite.xanny.family', - 'nitter.hu', - 'twitr.gq', 'nitter.moomoo.me', - 'nittereu.moomoo.me', - 'bird.from.tf', + 'bird.trom.tf', 'nitter.it', 'twitter.censors.us', - 'twitter.grimneko.de', - 'nitter.alefvanoon.xyz', - 'n.hyperborea.cloud', - 'nitter.ca', + 'nitter.grimneko.de', 'twitter.076.ne.jp', - 'twitter.mstdn.social', 'nitter.fly.dev', 'notabird.site', 'nitter.weiler.rocks', - 'nitter.silkky.cloud', 'nitter.sethforprivacy.com', - 'nttr.stream', 'nitter.cutelab.space', 'nitter.nl', 'nitter.mint.lgbt', 'nitter.bus-hit.me', - 'fuckthesacklers.network', - 'nitter.govt.land', - 'nitter.datatunnel.xyz', 'nitter.esmailelbob.xyz', 'tw.artemislena.eu', - 'de.nttr.stream', 'nitter.winscloud.net', 'nitter.tiekoetter.com', 'nitter.spaceint.fr', - 'twtr.bch.bar', - 'nitter.exonip.de', - 'nitter.mastodon.pro', - 'nitter.notraxx.ch', - - - # not in the list anymore - 'nitter.skrep.in', - 'nitter.snopyta.org', + 'nitter.privacy.com.de', + 'nitter.poast.org', + 'nitter.bird.froth.zone', + 'nitter.dcs0.hu', + 'twitter.dr460nf1r3.org', + 'nitter.garudalinux.org', + 'twitter.femboy.hu', + 'nitter.cz', + 'nitter.privacydev.net', + 'nitter.evil.site', + 'tweet.lambda.dance', + 'nitter.kylrth.com', + 'nitter.foss.wtf', + 'nitter.priv.pw', + 'nitter.tokhmi.xyz', + 'nitter.catalyst.sx', + 'unofficialbird.com', + 'nitter.projectsegfau.lt', + 'nitter.eu.projectsegfau.lt', + 'singapore.unofficialbird.com', + 'canada.unofficialbird.com', + 'india.unofficialbird.com', + 'nederland.unofficialbird.com', + 'uk.unofficialbird.com', + 'n.l5.ca', + 'nitter.slipfox.xyz', + 'nitter.soopy.moe', + 'nitter.qwik.space', + 'read.whatever.social', + 'nitter.rawbit.ninja', + 'nt.vern.cc', + 'ntr.odyssey346.dev', + 'nitter.ir', + 'nitter.privacytools.io', + 'nitter.sneed.network', + 'n.sneed.network', + 'nitter.manasiwibi.com', + 'nitter.smnz.de', + 'nitter.twei.space', + 'nitter.inpt.fr', + 'nitter.d420.de', + 'nitter.caioalonso.com', + 'nitter.at', + 'nitter.drivet.xyz', + 'nitter.pw', + 'nitter.nicfab.eu', + 'bird.habedieeh.re', + 'nitter.hostux.net', + 'nitter.adminforge.de', + 'nitter.platypush.tech', + 'nitter.mask.sh', + 'nitter.pufe.org', + 'nitter.us.projectsegfau.lt', + 'nitter.arcticfoxes.net', + 't.com.sb', + 'nitter.kling.gg', + 'nitter.ktachibana.party', + 'nitter.riverside.rocks', + 'nitter.girlboss.ceo', + 'nitter.lunar.icu', + 'twitter.moe.ngo', + 'nitter.freedit.eu', + 'ntr.frail.duckdns.org', + 'nitter.librenode.org', + 'n.opnxng.com', + 'nitter.plus.st', ) DEAD_INSTANCES = ( @@ -117,6 +158,32 @@ class NitterIE(InfoExtractor): 'nitter.weaponizedhumiliation.com', 'nitter.vxempire.xyz', 'tweet.lambda.dance', + 'nitter.ca', + 'nitter.42l.fr', + 'nitter.pussthecat.org', + 'nitter.nixnet.services', + 'nitter.eu', + 'nitter.actionsack.com', + 'nitter.hu', + 'twitr.gq', + 'nittereu.moomoo.me', + 'bird.from.tf', + 'twitter.grimneko.de', + 'nitter.alefvanoon.xyz', + 'n.hyperborea.cloud', + 'twitter.mstdn.social', + 'nitter.silkky.cloud', + 'nttr.stream', + 'fuckthesacklers.network', + 'nitter.govt.land', + 'nitter.datatunnel.xyz', + 'de.nttr.stream', + 'twtr.bch.bar', + 'nitter.exonip.de', + 'nitter.mastodon.pro', + 'nitter.notraxx.ch', + 'nitter.skrep.in', + 'nitter.snopyta.org', ) INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES @@ -198,13 +265,33 @@ class NitterIE(InfoExtractor): 'like_count': int, 'repost_count': int, 'comment_count': int, - } - } + }, + }, { # no OpenGraph title + 'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m', + 'info_dict': { + 'id': '1678455464038735895', + 'ext': 'mp4', + 'title': 'Your Typical Local Man - Local man, what did Romanians ever do to you?', + 'description': 'Local man, what did Romanians ever do to you?', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Your Typical Local Man', + 'uploader_id': 'LocalBateman', + 'uploader_url': f'https://{current_instance}/LocalBateman', + 'upload_date': '20230710', + 'timestamp': 1689009900, + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + 'params': {'skip_download': 'm3u8'}, + }, ] def _real_extract(self, url): video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') - parsed_url = compat_urlparse.urlparse(url) + parsed_url = urllib.parse.urlparse(url) base_url = f'{parsed_url.scheme}://{parsed_url.netloc}' self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on') @@ -214,7 +301,7 @@ def _real_extract(self, url): if main_tweet_start > 0: webpage = full_webpage[main_tweet_start:] - video_url = '%s%s' % (base_url, self._html_search_regex( + video_url = '{}{}'.format(base_url, self._html_search_regex( r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url')) ext = determine_ext(video_url) @@ -223,10 +310,10 @@ def _real_extract(self, url): else: formats = [{ 'url': video_url, - 'ext': ext + 'ext': ext, }] - title = description = self._og_search_description(full_webpage) or self._html_search_regex( + title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex( r'