jfr.im git - yt-dlp.git/blame_incremental - yt

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	int_or_none,
	9	orderedSet,
	10	)
	11
	12
	13	class WebOfStoriesIE(InfoExtractor):
	14	_VALID_URL = r'https?://(?:www\.)?webofstories\.com/play/(?:[^/]+/)?(?P<id>[0-9]+)'
	15	_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
	16	_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
	17	_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
	18	_TESTS = [{
	19	'url': 'http://www.webofstories.com/play/hans.bethe/71',
	20	'md5': '373e4dd915f60cfe3116322642ddf364',
	21	'info_dict': {
	22	'id': '4536',
	23	'ext': 'mp4',
	24	'title': 'The temperature of the sun',
	25	'thumbnail': r're:^https?://.*\.jpg$',
	26	'description': 'Hans Bethe talks about calculating the temperature of the sun',
	27	'duration': 238,
	28	}
	29	}, {
	30	'url': 'http://www.webofstories.com/play/55908',
	31	'md5': '2985a698e1fe3211022422c4b5ed962c',
	32	'info_dict': {
	33	'id': '55908',
	34	'ext': 'mp4',
	35	'title': 'The story of Gemmata obscuriglobus',
	36	'thumbnail': r're:^https?://.*\.jpg$',
	37	'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
	38	'duration': 169,
	39	},
	40	'skip': 'notfound',
	41	}, {
	42	# malformed og:title meta
	43	'url': 'http://www.webofstories.com/play/54215?o=MS',
	44	'info_dict': {
	45	'id': '54215',
	46	'ext': 'mp4',
	47	'title': '"A Leg to Stand On"',
	48	'thumbnail': r're:^https?://.*\.jpg$',
	49	'description': 'Oliver Sacks talks about the death and resurrection of a limb',
	50	'duration': 97,
	51	},
	52	'params': {
	53	'skip_download': True,
	54	},
	55	}]
	56
	57	def _real_extract(self, url):
	58	video_id = self._match_id(url)
	59
	60	webpage = self._download_webpage(url, video_id)
	61	# Sometimes og:title meta is malformed
	62	title = self._og_search_title(webpage, default=None) or self._html_search_regex(
	63	r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
	64	description = self._html_search_meta('description', webpage)
	65	thumbnail = self._og_search_thumbnail(webpage)
	66
	67	embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
	68	r'(?s)\$$"#embedCode"$.html$getEmbedCode\((.*?)$',
	69	webpage, 'embed params').split(',')]
	70
	71	(
	72	_, speaker_id, story_id, story_duration,
	73	speaker_type, great_life, _thumbnail, _has_subtitles,
	74	story_filename, _story_order) = embed_params
	75
	76	is_great_life_series = great_life == 'true'
	77	duration = int_or_none(story_duration)
	78
	79	# URL building, see: http://www.webofstories.com/scripts/player.js
	80	ms_prefix = ''
	81	if speaker_type.lower() == 'ms':
	82	ms_prefix = 'mini_sites/'
	83
	84	if is_great_life_series:
	85	mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format(
	86	self._VIDEO_DOMAIN, speaker_id, story_filename)
	87	rtmp_ext = 'flv'
	88	streamer = self._GREAT_LIFE_STREAMER
	89	play_path = 'stories/{0:}/{1:}'.format(
	90	speaker_id, story_filename)
	91	else:
	92	mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format(
	93	self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename)
	94	rtmp_ext = 'mp4'
	95	streamer = self._USER_STREAMER
	96	play_path = 'mp4:{0:}{1:}/{2}.mp4'.format(
	97	ms_prefix, speaker_id, story_filename)
	98
	99	formats = [{
	100	'format_id': 'mp4_sd',
	101	'url': mp4_url,
	102	}, {
	103	'format_id': 'rtmp_sd',
	104	'page_url': url,
	105	'url': streamer,
	106	'ext': rtmp_ext,
	107	'play_path': play_path,
	108	}]
	109
	110	self._sort_formats(formats)
	111
	112	return {
	113	'id': story_id,
	114	'title': title,
	115	'formats': formats,
	116	'thumbnail': thumbnail,
	117	'description': description,
	118	'duration': duration,
	119	}
	120
	121
	122	class WebOfStoriesPlaylistIE(InfoExtractor):
	123	_VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
	124	_TEST = {
	125	'url': 'http://www.webofstories.com/playAll/donald.knuth',
	126	'info_dict': {
	127	'id': 'donald.knuth',
	128	'title': 'Donald Knuth (Scientist)',
	129	},
	130	'playlist_mincount': 97,
	131	}
	132
	133	def _real_extract(self, url):
	134	playlist_id = self._match_id(url)
	135
	136	webpage = self._download_webpage(url, playlist_id)
	137
	138	entries = [
	139	self.url_result(
	140	'http://www.webofstories.com/play/%s' % video_id,
	141	'WebOfStories', video_id=video_id)
	142	for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
	143	]
	144
	145	title = self._search_regex(
	146	r'<div id="speakerName">\s*<span>([^<]+)</span>',
	147	webpage, 'speaker', default=None)
	148	if title:
	149	field = self._search_regex(
	150	r'<span id="primaryField">([^<]+)</span>',
	151	webpage, 'field', default=None)
	152	if field:
	153	title += ' (%s)' % field
	154
	155	if not title:
	156	title = self._search_regex(
	157	r'<title>Play\s+all\s+stories\s-\s([^<]+)\s-\sWeb\s+of\s+Stories</title>',
	158	webpage, 'title')
	159
	160	return self.playlist_result(entries, playlist_id, title)