[yt-dlp.git] / yt_dlp / extractor / nitter.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
    parse_count,
    unified_timestamp,
    remove_end,
    determine_ext,
)
import re
import random


class NitterIE(InfoExtractor):
    # Taken from https://github.com/zedeus/nitter/wiki/Instances

    NON_HTTP_INSTANCES = (
        '3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
        'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
        'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
        'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
        'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
        'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
        '26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
        'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
        'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
        'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
        'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
        'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
        'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
        'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
        'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
        'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
        'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',

        'nitter.i2p',
        'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',

        'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
    )

    HTTP_INSTANCES = (
        'nitter.42l.fr',
        'nitter.pussthecat.org',
        'nitter.nixnet.services',
        'nitter.fdn.fr',
        'nitter.1d4.us',
        'nitter.kavin.rocks',
        'nitter.unixfox.eu',
        'nitter.domain.glass',
        'nitter.eu',
        'nitter.namazso.eu',
        'nitter.actionsack.com',
        'birdsite.xanny.family',
        'nitter.hu',
        'twitr.gq',
        'nitter.moomoo.me',
        'nittereu.moomoo.me',
        'bird.from.tf',
        'nitter.it',
        'twitter.censors.us',
        'twitter.grimneko.de',
        'nitter.alefvanoon.xyz',
        'n.hyperborea.cloud',
        'nitter.ca',
        'twitter.076.ne.jp',
        'twitter.mstdn.social',
        'nitter.fly.dev',
        'notabird.site',
        'nitter.weiler.rocks',
        'nitter.silkky.cloud',
        'nitter.sethforprivacy.com',
        'nttr.stream',
        'nitter.cutelab.space',
        'nitter.nl',
        'nitter.mint.lgbt',
        'nitter.bus-hit.me',
        'fuckthesacklers.network',
        'nitter.govt.land',
        'nitter.datatunnel.xyz',
        'nitter.esmailelbob.xyz',
        'tw.artemislena.eu',
        'de.nttr.stream',
        'nitter.winscloud.net',
        'nitter.tiekoetter.com',
        'nitter.spaceint.fr',
        'twtr.bch.bar',
        'nitter.exonip.de',
        'nitter.mastodon.pro',
        'nitter.notraxx.ch',


        # not in the list anymore
        'nitter.skrep.in',
        'nitter.snopyta.org',
    )

    DEAD_INSTANCES = (
        # maintenance
        'nitter.ethibox.fr',

        # official, rate limited
        'nitter.net',
        # offline
        'is-nitter.resolv.ee',
        'lu-nitter.resolv.ee',
        'nitter.13ad.de',
        'nitter.40two.app',
        'nitter.cattube.org',
        'nitter.cc',
        'nitter.dark.fail',
        'nitter.himiko.cloud',
        'nitter.koyu.space',
        'nitter.mailstation.de',
        'nitter.mastodont.cat',
        'nitter.tedomum.net',
        'nitter.tokhmi.xyz',
        'nitter.weaponizedhumiliation.com',
        'nitter.vxempire.xyz',
        'tweet.lambda.dance',
    )

    INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES

    _INSTANCES_RE = f'(?:{"|".join(map(re.escape, INSTANCES))})'
    _VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
    current_instance = random.choice(HTTP_INSTANCES)

    _TESTS = [
        {
            # GIF (wrapped in mp4)
            'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
            'info_dict': {
                'id': '1314279897502629888',
                'ext': 'mp4',
                'title': 'md5:7890a9277da4639ab624dd899424c5d8',
                'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Firefox 🔥',
                'uploader_id': 'firefox',
                'uploader_url': f'https://{current_instance}/firefox',
                'upload_date': '20201008',
                'timestamp': 1602183720,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
        }, {  # normal video
            'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
            'info_dict': {
                'id': '1299715685392756737',
                'ext': 'mp4',
                'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
                'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 're:^Le *Doc',
                'uploader_id': 'Le___Doc',
                'uploader_url': f'https://{current_instance}/Le___Doc',
                'upload_date': '20200829',
                'timestamp': 1598711340,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
        }, {  # video embed in a "Streaming Political Ads" box
            'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
            'info_dict': {
                'id': '1321147074491092994',
                'ext': 'mp4',
                'title': 'md5:8290664aabb43b9189145c008386bf12',
                'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Mozilla',
                'uploader_id': 'mozilla',
                'uploader_url': f'https://{current_instance}/mozilla',
                'upload_date': '20201027',
                'timestamp': 1603820940,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            },
            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
        }, {  # not the first tweet but main-tweet
            'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
            'info_dict': {
                'id': '1354848277481414657',
                'ext': 'mp4',
                'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
                'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
                'thumbnail': r're:^https?://.*\.jpg$',
                'uploader': 'Firefox 🔥',
                'uploader_id': 'firefox',
                'uploader_url': f'https://{current_instance}/firefox',
                'upload_date': '20210128',
                'timestamp': 1611855960,
                'view_count': int,
                'like_count': int,
                'repost_count': int,
                'comment_count': int,
            }
        }
    ]

    def _real_extract(self, url):
        video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
        parsed_url = compat_urlparse.urlparse(url)
        base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'

        self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
        full_webpage = webpage = self._download_webpage(url, video_id)

        main_tweet_start = full_webpage.find('class="main-tweet"')
        if main_tweet_start > 0:
            webpage = full_webpage[main_tweet_start:]

        video_url = '%s%s' % (base_url, self._html_search_regex(
            r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
        ext = determine_ext(video_url)

        if ext == 'unknown_video':
            formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
        else:
            formats = [{
                'url': video_url,
                'ext': ext
            }]

        title = description = self._og_search_description(full_webpage) or self._html_search_regex(
            r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)

        uploader_id = self._html_search_regex(
            r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id

        uploader = self._html_search_regex(
            r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
        if uploader:
            title = f'{uploader} - {title}'

        counts = {
            f'{x[0]}_count': self._html_search_regex(
                fr'<span[^>]+class="icon-{x[1]}[^>]*></span>([^<]*)</div>',
                webpage, f'{x[0]} count', fatal=False)
            for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
        }
        counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}

        thumbnail = (
            self._html_search_meta('og:image', full_webpage, 'thumbnail url')
            or remove_end('%s%s' % (base_url, self._html_search_regex(
                r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))

        thumbnails = [
            {'id': id, 'url': f'{thumbnail}%3A{id}'}
            for id in ('thumb', 'small', 'large', 'medium', 'orig')
        ]

        date = self._html_search_regex(
            r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
            webpage, 'upload date', default='').replace('·', '')

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'uploader': uploader,
            'timestamp': unified_timestamp(date),
            'uploader_id': uploader_id,
            'uploader_url': f'{base_url}/{uploader_id}',
            'formats': formats,
            'thumbnails': thumbnails,
            'thumbnail': thumbnail,
            **counts,
        }
Commit	Line	Data
bb8a73a0	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..compat import compat_urlparse
	6	from ..utils import (
	7	parse_count,
bb8a73a0	8	unified_timestamp,
	9	remove_end,
	10	determine_ext,
	11	)
	12	import re
a4ddaf23	13	import random
bb8a73a0	14
	15
	16	class NitterIE(InfoExtractor):
	17	# Taken from https://github.com/zedeus/nitter/wiki/Instances
a4ddaf23	18
	19	NON_HTTP_INSTANCES = (
	20	'3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad.onion',
	21	'nitter.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd.onion',
	22	'nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd.onion',
	23	'npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid.onion',
	24	'nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion',
	25	'i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion',
	26	'26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion',
510809f1	27	'vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion',
	28	'iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion',
	29	'erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion',
	30	'ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion',
	31	'jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion',
	32	'nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion',
	33	'nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion',
	34	'nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion',
	35	'ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion',
	36	'ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion',
a4ddaf23	37
	38	'nitter.i2p',
	39	'u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p',
	40
	41	'nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd.onion',
	42	)
	43
	44	HTTP_INSTANCES = (
	45	'nitter.42l.fr',
	46	'nitter.pussthecat.org',
	47	'nitter.nixnet.services',
a4ddaf23	48	'nitter.fdn.fr',
	49	'nitter.1d4.us',
	50	'nitter.kavin.rocks',
a4ddaf23	51	'nitter.unixfox.eu',
a4ddaf23	52	'nitter.domain.glass',
a4ddaf23	53	'nitter.eu',
a4ddaf23	54	'nitter.namazso.eu',
a4ddaf23	55	'nitter.actionsack.com',
a4ddaf23	56	'birdsite.xanny.family',
510809f1	57	'nitter.hu',
	58	'twitr.gq',
	59	'nitter.moomoo.me',
	60	'nittereu.moomoo.me',
	61	'bird.from.tf',
	62	'nitter.it',
	63	'twitter.censors.us',
	64	'twitter.grimneko.de',
	65	'nitter.alefvanoon.xyz',
	66	'n.hyperborea.cloud',
	67	'nitter.ca',
	68	'twitter.076.ne.jp',
	69	'twitter.mstdn.social',
	70	'nitter.fly.dev',
	71	'notabird.site',
	72	'nitter.weiler.rocks',
	73	'nitter.silkky.cloud',
	74	'nitter.sethforprivacy.com',
	75	'nttr.stream',
	76	'nitter.cutelab.space',
	77	'nitter.nl',
	78	'nitter.mint.lgbt',
	79	'nitter.bus-hit.me',
	80	'fuckthesacklers.network',
	81	'nitter.govt.land',
	82	'nitter.datatunnel.xyz',
	83	'nitter.esmailelbob.xyz',
	84	'tw.artemislena.eu',
	85	'de.nttr.stream',
	86	'nitter.winscloud.net',
	87	'nitter.tiekoetter.com',
	88	'nitter.spaceint.fr',
	89	'twtr.bch.bar',
	90	'nitter.exonip.de',
	91	'nitter.mastodon.pro',
	92	'nitter.notraxx.ch',
	93
a4ddaf23	94
a4ddaf23	95	# not in the list anymore
510809f1	96	'nitter.skrep.in',
a4ddaf23	97	'nitter.snopyta.org',
	98	)
	99
	100	DEAD_INSTANCES = (
	101	# maintenance
	102	'nitter.ethibox.fr',
	103
	104	# official, rate limited
	105	'nitter.net',
	106	# offline
510809f1	107	'is-nitter.resolv.ee',
510809f1	108	'lu-nitter.resolv.ee',
a4ddaf23	109	'nitter.13ad.de',
510809f1	110	'nitter.40two.app',
	111	'nitter.cattube.org',
	112	'nitter.cc',
	113	'nitter.dark.fail',
	114	'nitter.himiko.cloud',
	115	'nitter.koyu.space',
	116	'nitter.mailstation.de',
	117	'nitter.mastodont.cat',
	118	'nitter.tedomum.net',
	119	'nitter.tokhmi.xyz',
a4ddaf23	120	'nitter.weaponizedhumiliation.com',
510809f1	121	'nitter.vxempire.xyz',
510809f1	122	'tweet.lambda.dance',
a4ddaf23	123	)
	124
	125	INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
bb8a73a0	126
510809f1	127	_INSTANCES_RE = f'(?:{"\|".join(map(re.escape, INSTANCES))})'
510809f1	128	_VALID_URL = fr'https?://{_INSTANCES_RE}/(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)?'
a4ddaf23	129	current_instance = random.choice(HTTP_INSTANCES)
a4ddaf23	130
bb8a73a0	131	_TESTS = [
	132	{
	133	# GIF (wrapped in mp4)
510809f1	134	'url': f'https://{current_instance}/firefox/status/1314279897502629888#m',
bb8a73a0	135	'info_dict': {
	136	'id': '1314279897502629888',
	137	'ext': 'mp4',
510809f1	138	'title': 'md5:7890a9277da4639ab624dd899424c5d8',
510809f1	139	'description': 'md5:5fea96a4d3716c350f8b95b21b3111fe',
bb8a73a0	140	'thumbnail': r're:^https?://.*\.jpg$',
	141	'uploader': 'Firefox 🔥',
	142	'uploader_id': 'firefox',
510809f1	143	'uploader_url': f'https://{current_instance}/firefox',
bb8a73a0	144	'upload_date': '20201008',
bb8a73a0	145	'timestamp': 1602183720,
510809f1	146	'like_count': int,
	147	'repost_count': int,
	148	'comment_count': int,
bb8a73a0	149	},
bb8a73a0	150	}, { # normal video
510809f1	151	'url': f'https://{current_instance}/Le___Doc/status/1299715685392756737#m',
bb8a73a0	152	'info_dict': {
	153	'id': '1299715685392756737',
	154	'ext': 'mp4',
510809f1	155	'title': 're:^.* - "Je ne prédis jamais rien"\nD Raoult, Août 2020...',
a4ddaf23	156	'description': '"Je ne prédis jamais rien"\nD Raoult, Août 2020...',
bb8a73a0	157	'thumbnail': r're:^https?://.*\.jpg$',
510809f1	158	'uploader': 're:^Le *Doc',
bb8a73a0	159	'uploader_id': 'Le___Doc',
510809f1	160	'uploader_url': f'https://{current_instance}/Le___Doc',
bb8a73a0	161	'upload_date': '20200829',
510809f1	162	'timestamp': 1598711340,
bb8a73a0	163	'view_count': int,
	164	'like_count': int,
	165	'repost_count': int,
	166	'comment_count': int,
	167	},
	168	}, { # video embed in a "Streaming Political Ads" box
510809f1	169	'url': f'https://{current_instance}/mozilla/status/1321147074491092994#m',
bb8a73a0	170	'info_dict': {
	171	'id': '1321147074491092994',
	172	'ext': 'mp4',
510809f1	173	'title': 'md5:8290664aabb43b9189145c008386bf12',
510809f1	174	'description': 'md5:9cf2762d49674bc416a191a689fb2aaa',
bb8a73a0	175	'thumbnail': r're:^https?://.*\.jpg$',
	176	'uploader': 'Mozilla',
	177	'uploader_id': 'mozilla',
510809f1	178	'uploader_url': f'https://{current_instance}/mozilla',
bb8a73a0	179	'upload_date': '20201027',
510809f1	180	'timestamp': 1603820940,
	181	'view_count': int,
	182	'like_count': int,
	183	'repost_count': int,
	184	'comment_count': int,
bb8a73a0	185	},
510809f1	186	'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'],
a4ddaf23	187	}, { # not the first tweet but main-tweet
510809f1	188	'url': f'https://{current_instance}/firefox/status/1354848277481414657#m',
a4ddaf23	189	'info_dict': {
510809f1	190	'id': '1354848277481414657',
a4ddaf23	191	'ext': 'mp4',
510809f1	192	'title': 'md5:bef647f03bd1c6b15b687ea70dfc9700',
510809f1	193	'description': 'md5:5efba25e2f9dac85ebcd21160cb4341f',
a4ddaf23	194	'thumbnail': r're:^https?://.*\.jpg$',
510809f1	195	'uploader': 'Firefox 🔥',
	196	'uploader_id': 'firefox',
	197	'uploader_url': f'https://{current_instance}/firefox',
	198	'upload_date': '20210128',
	199	'timestamp': 1611855960,
	200	'view_count': int,
	201	'like_count': int,
	202	'repost_count': int,
	203	'comment_count': int,
a4ddaf23	204	}
a4ddaf23	205	}
bb8a73a0	206	]
	207
	208	def _real_extract(self, url):
510809f1	209	video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
bb8a73a0	210	parsed_url = compat_urlparse.urlparse(url)
510809f1	211	base_url = f'{parsed_url.scheme}://{parsed_url.netloc}'
bb8a73a0	212
bb8a73a0	213	self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on')
510809f1	214	full_webpage = webpage = self._download_webpage(url, video_id)
a4ddaf23	215
	216	main_tweet_start = full_webpage.find('class="main-tweet"')
	217	if main_tweet_start > 0:
	218	webpage = full_webpage[main_tweet_start:]
bb8a73a0	219
510809f1	220	video_url = '%s%s' % (base_url, self._html_search_regex(
510809f1	221	r'(?:<video[^>]+data-url\|<source[^>]+src)="([^"]+)"', webpage, 'video url'))
bb8a73a0	222	ext = determine_ext(video_url)
	223
	224	if ext == 'unknown_video':
	225	formats = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
	226	else:
	227	formats = [{
	228	'url': video_url,
	229	'ext': ext
	230	}]
	231
510809f1	232	title = description = self._og_search_description(full_webpage) or self._html_search_regex(
510809f1	233	r'<div class="tweet-content[^>]+>([^<]+)</div>', webpage, 'title', fatal=False)
bb8a73a0	234
510809f1	235	uploader_id = self._html_search_regex(
510809f1	236	r'<a class="username"[^>]+title="@([^"]+)"', webpage, 'uploader id', fatal=False) or uploader_id
bb8a73a0	237
510809f1	238	uploader = self._html_search_regex(
	239	r'<a class="fullname"[^>]+title="([^"]+)"', webpage, 'uploader name', fatal=False)
	240	if uploader:
	241	title = f'{uploader} - {title}'
bb8a73a0	242
510809f1	243	counts = {
	244	f'{x[0]}_count': self._html_search_regex(
	245	fr'<span[^>]+class="icon-{x[1]}[^>]></span>([^<])</div>',
	246	webpage, f'{x[0]} count', fatal=False)
	247	for x in (('view', 'play'), ('like', 'heart'), ('repost', 'retweet'), ('comment', 'comment'))
	248	}
	249	counts = {field: 0 if count == '' else parse_count(count) for field, count in counts.items()}
bb8a73a0	250
510809f1	251	thumbnail = (
	252	self._html_search_meta('og:image', full_webpage, 'thumbnail url')
	253	or remove_end('%s%s' % (base_url, self._html_search_regex(
	254	r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall'))
	255
	256	thumbnails = [
	257	{'id': id, 'url': f'{thumbnail}%3A{id}'}
	258	for id in ('thumb', 'small', 'large', 'medium', 'orig')
	259	]
	260
	261	date = self._html_search_regex(
	262	r'<span[^>]+class="tweet-date"[^>]*><a[^>]+title="([^"]+)"',
	263	webpage, 'upload date', default='').replace('·', '')
bb8a73a0	264
	265	return {
	266	'id': video_id,
	267	'title': title,
	268	'description': description,
	269	'uploader': uploader,
510809f1	270	'timestamp': unified_timestamp(date),
bb8a73a0	271	'uploader_id': uploader_id,
510809f1	272	'uploader_url': f'{base_url}/{uploader_id}',
bb8a73a0	273	'formats': formats,
	274	'thumbnails': thumbnails,
	275	'thumbnail': thumbnail,
510809f1	276	**counts,
bb8a73a0	277	}