jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import compat_str
	8	from ..utils import (
	9	int_or_none,
	10	js_to_json,
	11	strip_or_none,
	12	try_get,
	13	unified_timestamp,
	14	)
	15
	16
	17	class WatchBoxIE(InfoExtractor):
	18	_VALID_URL = r'https?://(?:www\.)?watchbox\.de/(?P<kind>serien\|filme)/(?:[^/]+/)*[^/]+-(?P<id>\d+)'
	19	_TESTS = [{
	20	# film
	21	'url': 'https://www.watchbox.de/filme/free-jimmy-12325.html',
	22	'info_dict': {
	23	'id': '341368',
	24	'ext': 'mp4',
	25	'title': 'Free Jimmy',
	26	'description': 'md5:bcd8bafbbf9dc0ef98063d344d7cc5f6',
	27	'thumbnail': r're:^https?://.*\.jpg$',
	28	'duration': 4890,
	29	'age_limit': 16,
	30	'release_year': 2009,
	31	},
	32	'params': {
	33	'format': 'bestvideo',
	34	'skip_download': True,
	35	},
	36	'expected_warnings': ['Failed to download m3u8 information'],
	37	}, {
	38	# episode
	39	'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-1/date-in-der-hoelle-328286.html',
	40	'info_dict': {
	41	'id': '328286',
	42	'ext': 'mp4',
	43	'title': 'S01 E01 - Date in der Hölle',
	44	'description': 'md5:2f31c74a8186899f33cb5114491dae2b',
	45	'thumbnail': r're:^https?://.*\.jpg$',
	46	'duration': 1291,
	47	'age_limit': 12,
	48	'release_year': 2010,
	49	'series': 'Ugly Americans',
	50	'season_number': 1,
	51	'episode': 'Date in der Hölle',
	52	'episode_number': 1,
	53	},
	54	'params': {
	55	'format': 'bestvideo',
	56	'skip_download': True,
	57	},
	58	'expected_warnings': ['Failed to download m3u8 information'],
	59	}, {
	60	'url': 'https://www.watchbox.de/serien/ugly-americans-12231/staffel-2/der-ring-des-powers-328270',
	61	'only_matching': True,
	62	}]
	63
	64	def _real_extract(self, url):
	65	mobj = re.match(self._VALID_URL, url)
	66	kind, video_id = mobj.group('kind', 'id')
	67
	68	webpage = self._download_webpage(url, video_id)
	69
	70	source = self._parse_json(
	71	self._search_regex(
	72	r'(?s)source["\']?\s:\s({.+?})\s*[,}]', webpage, 'source',
	73	default='{}'),
	74	video_id, transform_source=js_to_json, fatal=False) or {}
	75
	76	video_id = compat_str(source.get('videoId') or video_id)
	77
	78	devapi = self._download_json(
	79	'http://api.watchbox.de/devapi/id/%s' % video_id, video_id, query={
	80	'format': 'json',
	81	'apikey': 'hbbtv',
	82	}, fatal=False)
	83
	84	item = try_get(devapi, lambda x: x['items'][0], dict) or {}
	85
	86	title = item.get('title') or try_get(
	87	item, lambda x: x['movie']['headline_movie'],
	88	compat_str) or source['title']
	89
	90	formats = []
	91	hls_url = item.get('media_videourl_hls') or source.get('hls')
	92	if hls_url:
	93	formats.extend(self._extract_m3u8_formats(
	94	hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
	95	m3u8_id='hls', fatal=False))
	96	dash_url = item.get('media_videourl_wv') or source.get('dash')
	97	if dash_url:
	98	formats.extend(self._extract_mpd_formats(
	99	dash_url, video_id, mpd_id='dash', fatal=False))
	100	mp4_url = item.get('media_videourl')
	101	if mp4_url:
	102	formats.append({
	103	'url': mp4_url,
	104	'format_id': 'mp4',
	105	'width': int_or_none(item.get('width')),
	106	'height': int_or_none(item.get('height')),
	107	'tbr': int_or_none(item.get('bitrate')),
	108	})
	109	self._sort_formats(formats)
	110
	111	description = strip_or_none(item.get('descr'))
	112	thumbnail = item.get('media_content_thumbnail_large') or source.get('poster') or item.get('media_thumbnail')
	113	duration = int_or_none(item.get('media_length') or source.get('length'))
	114	timestamp = unified_timestamp(item.get('pubDate'))
	115	view_count = int_or_none(item.get('media_views'))
	116	age_limit = int_or_none(try_get(item, lambda x: x['movie']['fsk']))
	117	release_year = int_or_none(try_get(item, lambda x: x['movie']['rel_year']))
	118
	119	info = {
	120	'id': video_id,
	121	'title': title,
	122	'description': description,
	123	'thumbnail': thumbnail,
	124	'duration': duration,
	125	'timestamp': timestamp,
	126	'view_count': view_count,
	127	'age_limit': age_limit,
	128	'release_year': release_year,
	129	'formats': formats,
	130	}
	131
	132	if kind.lower() == 'serien':
	133	series = try_get(
	134	item, lambda x: x['special']['title'],
	135	compat_str) or source.get('format')
	136	season_number = int_or_none(self._search_regex(
	137	r'^S(\d{1,2})\s*E\d{1,2}', title, 'season number',
	138	default=None) or self._search_regex(
	139	r'/staffel-(\d+)/', url, 'season number', default=None))
	140	episode = source.get('title')
	141	episode_number = int_or_none(self._search_regex(
	142	r'^S\d{1,2}\s*E(\d{1,2})', title, 'episode number',
	143	default=None))
	144	info.update({
	145	'series': series,
	146	'season_number': season_number,
	147	'episode': episode,
	148	'episode_number': episode_number,
	149	})
	150
	151	return info