[yt-dlp.git] / yt_dlp / extractor / tass.py

import json

from .common import InfoExtractor
from ..utils import (
    js_to_json,
    qualities,
)


class TassIE(InfoExtractor):
    _VALID_URL = r'https?://(?:tass\.ru|itar-tass\.com)/[^/]+/(?P<id>\d+)'
    _TESTS = [
        {
            'url': 'http://tass.ru/obschestvo/1586870',
            'md5': '3b4cdd011bc59174596b6145cda474a4',
            'info_dict': {
                'id': '1586870',
                'ext': 'mp4',
                'title': 'Посетителям московского зоопарка показали красную панду',
                'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
                'thumbnail': r're:^https?://.*\.jpg$',
            },
        },
        {
            'url': 'http://itar-tass.com/obschestvo/1600009',
            'only_matching': True,
        },
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(url, video_id)

        sources = json.loads(js_to_json(self._search_regex(
            r'(?s)sources\s*:\s*(\[.+?\])', webpage, 'sources')))

        quality = qualities(['sd', 'hd'])

        formats = []
        for source in sources:
            video_url = source.get('file')
            if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
                continue
            label = source.get('label')
            formats.append({
                'url': video_url,
                'format_id': label,
                'quality': quality(label),
            })

        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'description': self._og_search_description(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'formats': formats,
        }
Commit	Line	Data
b9ed3af3 S	1	import json
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	js_to_json,
	6	qualities,
	7	)
	8
	9
	10	class TassIE(InfoExtractor):
	11	_VALID_URL = r'https?://(?:tass\.ru\|itar-tass\.com)/[^/]+/(?P<id>\d+)'
	12	_TESTS = [
	13	{
	14	'url': 'http://tass.ru/obschestvo/1586870',
	15	'md5': '3b4cdd011bc59174596b6145cda474a4',
	16	'info_dict': {
	17	'id': '1586870',
	18	'ext': 'mp4',
	19	'title': 'Посетителям московского зоопарка показали красную панду',
	20	'description': 'Приехавшую из Дублина Зейну можно увидеть в павильоне "Кошки тропиков"',
ec85ded8	21	'thumbnail': r're:^https?://.*\.jpg$',
b9ed3af3 S	22	},
	23	},
	24	{
	25	'url': 'http://itar-tass.com/obschestvo/1600009',
	26	'only_matching': True,
	27	},
	28	]
	29
	30	def _real_extract(self, url):
	31	video_id = self._match_id(url)
	32
	33	webpage = self._download_webpage(url, video_id)
	34
	35	sources = json.loads(js_to_json(self._search_regex(
	36	r'(?s)sources\s:\s(\[.+?\])', webpage, 'sources')))
	37
	38	quality = qualities(['sd', 'hd'])
	39
	40	formats = []
	41	for source in sources:
	42	video_url = source.get('file')
	43	if not video_url or not video_url.startswith('http') or not video_url.endswith('.mp4'):
	44	continue
	45	label = source.get('label')
	46	formats.append({
	47	'url': video_url,
	48	'format_id': label,
	49	'quality': quality(label),
	50	})
b9ed3af3 S	51
	52	return {
	53	'id': video_id,
	54	'title': self._og_search_title(webpage),
	55	'description': self._og_search_description(webpage),
	56	'thumbnail': self._og_search_thumbnail(webpage),
	57	'formats': formats,
	58	}