jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import json
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	js_to_json,
	9	qualities,
	10	)
	11
	12
	13	class TassIE(InfoExtractor):
	14	_VALID_URL = r'https?://(?:tass\.ru\|itar-tass\.com)/[^/]+/(?P<id>\d+)'
	15	_TESTS = [
	16	{
	17	'url': 'http://tass.ru/obschestvo/1586870',
	18	'md5': '3b4cdd011bc59174596b6145cda474a4',
	19	'info_dict': {
	20	'id': '1586870',
	21	'ext': 'mp4',
	22	'title': 'Посетителям московского зоопарка показали красную панду',
	23	'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
	24	'thumbnail': r're:^https?://.*\.jpg$',
	25	},
	26	},
	27	{
	28	'url': 'http://itar-tass.com/obschestvo/1600009',
	29	'only_matching': True,
	30	},
	31	]
	32
	33	def _real_extract(self, url):
	34	video_id = self._match_id(url)
	35
	36	webpage = self._download_webpage(url, video_id)
	37
	38	sources = json.loads(js_to_json(self._search_regex(
	39	r'(?s)sources\s:\s(\[.+?\])', webpage, 'sources')))
	40
	41	quality = qualities(['sd', 'hd'])
	42
	43	formats = []
	44	for source in sources:
	45	video_url = source.get('file')
	46	if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
	47	continue
	48	label = source.get('label')
	49	formats.append({
	50	'url': video_url,
	51	'format_id': label,
	52	'quality': quality(label),
	53	})
	54	self._sort_formats(formats)
	55
	56	return {
	57	'id': video_id,
	58	'title': self._og_search_title(webpage),
	59	'description': self._og_search_description(webpage),
	60	'thumbnail': self._og_search_thumbnail(webpage),
	61	'formats': formats,
	62	}