[yt-dlp.git] / yt_dlp / extractor / nitter.py

from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
    parse_count,
    unified_timestamp,
    remove_end,
    determine_ext,
)
import re
import random


class NitterIE(InfoExtractor):
    # Taken from https://github.com/zedeus/nitter/wiki/Instances

    NON_HTTP_INSTANCES = (
        '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
        'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
        'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
        'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
        'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
        'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
        '26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
        'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
        'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
        'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
        'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
        'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
        'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
        'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
        'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
        'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
        'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',

        'nitter.i2p',
        'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',

        'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
    )

    HTTP_INSTANCES = (
        'nitter.42l.fr',
        'nitter.pussthecat.org',
        'nitter.nixnet.services',
        'nitter.fdn.fr',
        'nitter.1d4.us',
        'nitter.kavin.rocks',
        'nitter.unixfox.eu',
        'nitter.domain.glass',
        'nitter.eu',
        'nitter.namazso.eu',
        'nitter.actionsack.com',
        'birdsite.xanny.family',
        'nitter.hu',
        'twitr.gq',
        'nitter.moomoo.me',
        'nittereu.moomoo.me',
        'bird.from.tf',
        'nitter.it',
        'twitter.censors.us',
        'twitter.grimneko.de',
        'nitter.alefvanoon.xyz',
        'n.hyperborea.cloud',
        'nitter.ca',
        'twitter.076.ne.jp',
        'twitter.mstdn.social',
        'nitter.fly.dev',
        'notabird.site',
        'nitter.weiler.rocks',
        'nitter.silkky.cloud',
        'nitter.sethforprivacy.com',
        'nttr.stream',
        'nitter.cutelab.space',
        'nitter.nl',
        'nitter.mint.lgbt',
        'nitter.bus-hit.me',
        'fuckthesacklers.network',
        'nitter.govt.land',
        'nitter.datatunnel.xyz',
        'nitter.esmailelbob.xyz',
        'tw.artemislena.eu',
        'de.nttr.stream',
        'nitter.winscloud.net',
        'nitter.tiekoetter.com',
        'nitter.spaceint.fr',
        'twtr.bch.bar',
        'nitter.exonip.de',
        'nitter.mastodon.pro',
        'nitter.notraxx.ch',


        # not in the list anymore
        'nitter.skrep.in',
        'nitter.snopyta.org',
    )

    DEAD_INSTANCES = (
        # maintenance
        'nitter.ethibox.fr',

        # official, rate limited
        'nitter.net',
        # offline
        'is-nitter.resolv.ee',
        'lu-nitter.resolv.ee',
        'nitter.13ad.de',
        'nitter.40two.app',
        'nitter.cattube.org',
        'nitter.cc',
        'nitter.dark.fail',
        'nitter.himiko.cloud',
        'nitter.koyu.space',
        'nitter.mailstation.de',
        'nitter.mastodont.cat',
        'nitter.tedomum.net',
        'nitter.tokhmi.xyz',
        'nitter.weaponizedhumiliation.com',
        'nitter.vxempire.xyz',
        'tweet.lambda.dance',
    )

    INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES

    _INSTANCES_RE = f'(?:{"|".join(map(re.escape, INSTANCES))})'
    _VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
    current_instance = random.choice(HTTP_INSTANCES)

    _TESTS = [
        {
            # GIF (wrapped in mp4)
            'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
            'info_dict': {
                'id': '1314279897502629888',
                'ext': 'mp4',
                'title': 'md5:7890a9277da4639ab624dd899424c5d8',
                'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Firefox 🔥',
                'uploader_id': 'firefox',
                'uploader_url': f'https://{current_instance}/firefox',
                'upload_date': '20201008',
                'timestamp': 1602183720,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
        }, {  # normal video
            'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
            'info_dict': {
                'id': '1299715685392756737',
                'ext': 'mp4',
                'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
                'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 're:^Le *Doc',
                'uploader_id': 'Le___Doc',
                'uploader_url': f'https://{current_instance}/Le___Doc',
                'upload_date': '20200829',
                'timestamp': 1598711340,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
        }, {  # video embed in a "Streaming Political Ads" box
            'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
            'info_dict': {
                'id': '1321147074491092994',
                'ext': 'mp4',
                'title': 'md5:8290664aabb43b9189145c008386bf12',
                'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Mozilla',
                'uploader_id': 'mozilla',
                'uploader_url': f'https://{current_instance}/mozilla',
                'upload_date': '20201027',
                'timestamp': 1603820940,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
        }, {  # not the first tweet but main-tweet
            'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
            'info_dict': {
                'id': '1354848277481414657',
                'ext': 'mp4',
                'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
                'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Firefox 🔥',
                'uploader_id': 'firefox',
                'uploader_url': f'https://{current_instance}/firefox',
                'upload_date': '20210128',
                'timestamp': 1611855960,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            }
        }
    ]

    def _real_extract(self, url):
        video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
        parsed_url = compat_urlparse.urlparse(url)
        base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'

        self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
        full_webpage = webpage = self._download_webpage(url, video_id)

        main_tweet_start = full_webpage.find('class="main-tweet"')
        if main_tweet_start > 0:
            webpage = full_webpage[main_tweet_start:]

        video_url = '%s%s' % (base_url, self._html_search_regex(
            r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
        ext = determine_ext(video_url)

        if ext == 'unknown_video':
            formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
        else:
            formats = [{
                'url': video_url,
                'ext': ext
            }]

        title = description = self._og_search_description(full_webpage) or self._html_search_regex(
            r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)

        uploader_id = self._html_search_regex(
            r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id

        uploader = self._html_search_regex(
            r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
        if uploader:
            title = f'{uploader} - {title}'

        counts = {
            f'{x[0]}_count': self._html_search_regex(
                fr'<span[^>]+class="icon-{x[1]}[^>]*></span>([^<]*)</div>',
                webpage, f'{x[0]} count', fatal=False)
            for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
        }
        counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}

        thumbnail = (
            self._html_search_meta('og:image', full_webpage, 'thumbnail url')
            or remove_end('%s%s' % (base_url, self._html_search_regex(
                r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))

        thumbnails = [
            {'id': id, 'url': f'{thumbnail}%3A{id}'}
            for id in ('thumb', 'small', 'large', 'medium', 'orig')
        ]

        date = self._html_search_regex(
            r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
            webpage, 'upload date', default='').replace('·', '')

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'uploader': uploader,
            'timestamp': unified_timestamp(date),
            'uploader_id': uploader_id,
            'uploader_url': f'{base_url}/{uploader_id}',
            'formats': formats,
            'thumbnails': thumbnails,
            'thumbnail': thumbnail,
            **counts,
        }
Commit	Line	Data
bb8a73a0	1	from .common import InfoExtractor
	2	from ..compat import compat_urlparse
	3	from ..utils import (
	4	parse_count,
bb8a73a0	5	unified_timestamp,
	6	remove_end,
	7	determine_ext,
	8	)
	9	import re
a4ddaf23	10	import random
bb8a73a0	11
	12
	13	class NitterIE(InfoExtractor):
	14	# Taken from https://github.com/zedeus/nitter/wiki/Instances
a4ddaf23	15
	16	NON_HTTP_INSTANCES = (
	17	'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
	18	'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
	19	'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
	20	'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
	21	'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
	22	'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
	23	'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
510809f1	24	'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
	25	'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
	26	'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
	27	'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
	28	'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
	29	'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
	30	'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
	31	'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
	32	'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
	33	'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',
a4ddaf23	34
	35	'nitter.i2p',
	36	'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
	37
	38	'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
	39	)
	40
	41	HTTP_INSTANCES = (
	42	'nitter.42l.fr',
	43	'nitter.pussthecat.org',
	44	'nitter.nixnet.services',
a4ddaf23	45	'nitter.fdn.fr',
	46	'nitter.1d4.us',
	47	'nitter.kavin.rocks',
a4ddaf23	48	'nitter.unixfox.eu',
a4ddaf23	49	'nitter.domain.glass',
a4ddaf23	50	'nitter.eu',
a4ddaf23	51	'nitter.namazso.eu',
a4ddaf23	52	'nitter.actionsack.com',
a4ddaf23	53	'birdsite.xanny.family',
510809f1	54	'nitter.hu',
	55	'twitr.gq',
	56	'nitter.moomoo.me',
	57	'nittereu.moomoo.me',
	58	'bird.from.tf',
	59	'nitter.it',
	60	'twitter.censors.us',
	61	'twitter.grimneko.de',
	62	'nitter.alefvanoon.xyz',
	63	'n.hyperborea.cloud',
	64	'nitter.ca',
	65	'twitter.076.ne.jp',
	66	'twitter.mstdn.social',
	67	'nitter.fly.dev',
	68	'notabird.site',
	69	'nitter.weiler.rocks',
	70	'nitter.silkky.cloud',
	71	'nitter.sethforprivacy.com',
	72	'nttr.stream',
	73	'nitter.cutelab.space',
	74	'nitter.nl',
	75	'nitter.mint.lgbt',
	76	'nitter.bus-hit.me',
	77	'fuckthesacklers.network',
	78	'nitter.govt.land',
	79	'nitter.datatunnel.xyz',
	80	'nitter.esmailelbob.xyz',
	81	'tw.artemislena.eu',
	82	'de.nttr.stream',
	83	'nitter.winscloud.net',
	84	'nitter.tiekoetter.com',
	85	'nitter.spaceint.fr',
	86	'twtr.bch.bar',
	87	'nitter.exonip.de',
	88	'nitter.mastodon.pro',
	89	'nitter.notraxx.ch',
	90
a4ddaf23	91
a4ddaf23	92	# not in the list anymore
510809f1	93	'nitter.skrep.in',
a4ddaf23	94	'nitter.snopyta.org',
	95	)
	96
	97	DEAD_INSTANCES = (
	98	# maintenance
	99	'nitter.ethibox.fr',
	100
	101	# official, rate limited
	102	'nitter.net',
	103	# offline
510809f1	104	'is-nitter.resolv.ee',
510809f1	105	'lu-nitter.resolv.ee',
a4ddaf23	106	'nitter.13ad.de',
510809f1	107	'nitter.40two.app',
	108	'nitter.cattube.org',
	109	'nitter.cc',
	110	'nitter.dark.fail',
	111	'nitter.himiko.cloud',
	112	'nitter.koyu.space',
	113	'nitter.mailstation.de',
	114	'nitter.mastodont.cat',
	115	'nitter.tedomum.net',
	116	'nitter.tokhmi.xyz',
a4ddaf23	117	'nitter.weaponizedhumiliation.com',
510809f1	118	'nitter.vxempire.xyz',
510809f1	119	'tweet.lambda.dance',
a4ddaf23	120	)
	121
	122	INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
bb8a73a0	123
510809f1	124	_INSTANCES_RE = f'(?:{"\|".join(map(re.escape, INSTANCES))})'
510809f1	125	_VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
a4ddaf23	126	current_instance = random.choice(HTTP_INSTANCES)
a4ddaf23	127
bb8a73a0	128	_TESTS = [
	129	{
	130	# GIF (wrapped in mp4)
510809f1	131	'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
bb8a73a0	132	'info_dict': {
	133	'id': '1314279897502629888',
	134	'ext': 'mp4',
510809f1	135	'title': 'md5:7890a9277da4639ab624dd899424c5d8',
510809f1	136	'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
bb8a73a0	137	'thumbnail': r're:^https?://.*\.jpg$',
	138	'uploader': 'Firefox 🔥',
	139	'uploader_id': 'firefox',
510809f1	140	'uploader_url': f'https://{current_instance}/firefox',
bb8a73a0	141	'upload_date': '20201008',
bb8a73a0	142	'timestamp': 1602183720,
510809f1	143	'like_count': int,
	144	'repost_count': int,
	145	'comment_count': int,
bb8a73a0	146	},
bb8a73a0	147	}, { # normal video
510809f1	148	'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
bb8a73a0	149	'info_dict': {
	150	'id': '1299715685392756737',
	151	'ext': 'mp4',
510809f1	152	'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
a4ddaf23	153	'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
bb8a73a0	154	'thumbnail': r're:^https?://.*\.jpg$',
510809f1	155	'uploader': 're:^Le *Doc',
bb8a73a0	156	'uploader_id': 'Le___Doc',
510809f1	157	'uploader_url': f'https://{current_instance}/Le___Doc',
bb8a73a0	158	'upload_date': '20200829',
510809f1	159	'timestamp': 1598711340,
bb8a73a0	160	'view_count': int,
	161	'like_count': int,
	162	'repost_count': int,
	163	'comment_count': int,
	164	},
	165	}, { # video embed in a "Streaming Political Ads" box
510809f1	166	'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
bb8a73a0	167	'info_dict': {
	168	'id': '1321147074491092994',
	169	'ext': 'mp4',
510809f1	170	'title': 'md5:8290664aabb43b9189145c008386bf12',
510809f1	171	'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
bb8a73a0	172	'thumbnail': r're:^https?://.*\.jpg$',
	173	'uploader': 'Mozilla',
	174	'uploader_id': 'mozilla',
510809f1	175	'uploader_url': f'https://{current_instance}/mozilla',
bb8a73a0	176	'upload_date': '20201027',
510809f1	177	'timestamp': 1603820940,
	178	'view_count': int,
	179	'like_count': int,
	180	'repost_count': int,
	181	'comment_count': int,
bb8a73a0	182	},
510809f1	183	'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
a4ddaf23	184	}, { # not the first tweet but main-tweet
510809f1	185	'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
a4ddaf23	186	'info_dict': {
510809f1	187	'id': '1354848277481414657',
a4ddaf23	188	'ext': 'mp4',
510809f1	189	'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
510809f1	190	'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
a4ddaf23	191	'thumbnail': r're:^https?://.*\.jpg$',
510809f1	192	'uploader': 'Firefox 🔥',
	193	'uploader_id': 'firefox',
	194	'uploader_url': f'https://{current_instance}/firefox',
	195	'upload_date': '20210128',
	196	'timestamp': 1611855960,
	197	'view_count': int,
	198	'like_count': int,
	199	'repost_count': int,
	200	'comment_count': int,
a4ddaf23	201	}
a4ddaf23	202	}
bb8a73a0	203	]
	204
	205	def _real_extract(self, url):
510809f1	206	video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
bb8a73a0	207	parsed_url = compat_urlparse.urlparse(url)
510809f1	208	base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
bb8a73a0	209
bb8a73a0	210	self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
510809f1	211	full_webpage = webpage = self._download_webpage(url, video_id)
a4ddaf23	212
	213	main_tweet_start = full_webpage.find('class="main-tweet"')
	214	if main_tweet_start > 0:
	215	webpage = full_webpage[main_tweet_start:]
bb8a73a0	216
510809f1	217	video_url = '%s%s' % (base_url, self._html_search_regex(
510809f1	218	r'(?:<video[^>]+data-url\|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
bb8a73a0	219	ext = determine_ext(video_url)
	220
	221	if ext == 'unknown_video':
	222	formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
	223	else:
	224	formats = [{
	225	'url': video_url,
	226	'ext': ext
	227	}]
	228
510809f1	229	title = description = self._og_search_description(full_webpage) or self._html_search_regex(
510809f1	230	r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
bb8a73a0	231
510809f1	232	uploader_id = self._html_search_regex(
510809f1	233	r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id
bb8a73a0	234
510809f1	235	uploader = self._html_search_regex(
	236	r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
	237	if uploader:
	238	title = f'{uploader} - {title}'
bb8a73a0	239
510809f1	240	counts = {
	241	f'{x[0]}_count': self._html_search_regex(
	242	fr'<span[^>]+class="icon-{x[1]}[^>]></span>([^<])</div>',
	243	webpage, f'{x[0]} count', fatal=False)
	244	for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
	245	}
	246	counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}
bb8a73a0	247
510809f1	248	thumbnail = (
	249	self._html_search_meta('og:image', full_webpage, 'thumbnail url')
	250	or remove_end('%s%s' % (base_url, self._html_search_regex(
	251	r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))
	252
	253	thumbnails = [
	254	{'id': id, 'url': f'{thumbnail}%3A{id}'}
	255	for id in ('thumb', 'small', 'large', 'medium', 'orig')
	256	]
	257
	258	date = self._html_search_regex(
	259	r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
	260	webpage, 'upload date', default='').replace('·', '')
bb8a73a0	261
	262	return {
	263	'id': video_id,
	264	'title': title,
	265	'description': description,
	266	'uploader': uploader,
510809f1	267	'timestamp': unified_timestamp(date),
bb8a73a0	268	'uploader_id': uploader_id,
510809f1	269	'uploader_url': f'{base_url}/{uploader_id}',
bb8a73a0	270	'formats': formats,
	271	'thumbnails': thumbnails,
	272	'thumbnail': thumbnail,
510809f1	273	**counts,
bb8a73a0	274	}