jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	determine_ext,
	9	int_or_none,
	10	parse_duration,
	11	remove_end,
	12	)
	13
	14
	15	class LRTIE(InfoExtractor):
	16	IE_NAME = 'lrt.lt'
	17	_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
	18	_TESTS = [{
	19	# m3u8 download
	20	'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
	21	'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
	22	'info_dict': {
	23	'id': '54391',
	24	'ext': 'mp4',
	25	'title': 'Septynios Kauno dienos',
	26	'description': 'md5:24d84534c7dc76581e59f5689462411a',
	27	'duration': 1783,
	28	'view_count': int,
	29	'like_count': int,
	30	},
	31	}, {
	32	# direct mp3 download
	33	'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
	34	'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
	35	'info_dict': {
	36	'id': '1013074524',
	37	'ext': 'mp3',
	38	'title': 'Kita tema 2016-09-05 15:05',
	39	'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
	40	'duration': 3008,
	41	'view_count': int,
	42	'like_count': int,
	43	},
	44	}]
	45
	46	def _real_extract(self, url):
	47	video_id = self._match_id(url)
	48	webpage = self._download_webpage(url, video_id)
	49
	50	title = remove_end(self._og_search_title(webpage), ' - LRT')
	51
	52	formats = []
	53	for _, file_url in re.findall(
	54	r'file\s:\s(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
	55	ext = determine_ext(file_url)
	56	if ext not in ('m3u8', 'mp3'):
	57	continue
	58	# mp3 served as m3u8 produces stuttered media file
	59	if ext == 'm3u8' and '.mp3' in file_url:
	60	continue
	61	if ext == 'm3u8':
	62	formats.extend(self._extract_m3u8_formats(
	63	file_url, video_id, 'mp4', entry_protocol='m3u8_native',
	64	fatal=False))
	65	elif ext == 'mp3':
	66	formats.append({
	67	'url': file_url,
	68	'vcodec': 'none',
	69	})
	70	self._sort_formats(formats)
	71
	72	thumbnail = self._og_search_thumbnail(webpage)
	73	description = self._og_search_description(webpage)
	74	duration = parse_duration(self._search_regex(
	75	r'var\s+record_len\s=\s(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
	76	webpage, 'duration', default=None, group='duration'))
	77
	78	view_count = int_or_none(self._html_search_regex(
	79	r'<div[^>]+class=(["\']).?record-desc-seen.?\1[^>]*>(?P<count>.+?)</div>',
	80	webpage, 'view count', fatal=False, group='count'))
	81	like_count = int_or_none(self._search_regex(
	82	r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
	83	webpage, 'like count', fatal=False, group='count'))
	84
	85	return {
	86	'id': video_id,
	87	'title': title,
	88	'formats': formats,
	89	'thumbnail': thumbnail,
	90	'description': description,
	91	'duration': duration,
	92	'view_count': view_count,
	93	'like_count': like_count,
	94	}