jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import functools
	4	import os.path
	5	import re
	6
	7	from .common import InfoExtractor
	8	from ..compat import (
	9	compat_urllib_parse_urlencode,
	10	compat_urlparse,
	11	)
	12	from ..utils import (
	13	int_or_none,
	14	OnDemandPagedList,
	15	parse_duration,
	16	remove_start,
	17	xpath_text,
	18	xpath_attr,
	19	)
	20
	21
	22	class NBAIE(InfoExtractor):
	23	_VALID_URL = r'https?://(?:watch\.\|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]?))/?(?:/index\.html)?(?:\?.)?$'
	24	_TESTS = [{
	25	'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
	26	'md5': '9e7729d3010a9c71506fd1248f74e4f4',
	27	'info_dict': {
	28	'id': '0021200253-okc-bkn-recap',
	29	'ext': 'mp4',
	30	'title': 'Thunder vs. Nets',
	31	'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
	32	'duration': 181,
	33	'timestamp': 1354638466,
	34	'upload_date': '20121204',
	35	},
	36	'params': {
	37	# m3u8 download
	38	'skip_download': True,
	39	},
	40	}, {
	41	'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
	42	'only_matching': True,
	43	}, {
	44	'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
	45	'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
	46	'info_dict': {
	47	'id': '0041400301-cle-atl-recap',
	48	'ext': 'mp4',
	49	'title': 'Hawks vs. Cavaliers Game 1',
	50	'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
	51	'duration': 228,
	52	'timestamp': 1432134543,
	53	'upload_date': '20150520',
	54	}
	55	}, {
	56	'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
	57	'info_dict': {
	58	'id': '1455672027478-Doc_Feb16_720',
	59	'ext': 'mp4',
	60	'title': 'Practice: Doc Rivers - 2/16/16',
	61	'description': 'Head Coach Doc Rivers addresses the media following practice.',
	62	'upload_date': '20160217',
	63	'timestamp': 1455672000,
	64	},
	65	'params': {
	66	# m3u8 download
	67	'skip_download': True,
	68	},
	69	}, {
	70	'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
	71	'info_dict': {
	72	'id': 'timberwolves',
	73	'title': 'Shootaround Access - Dec. 12 \| Andrew Wiggins',
	74	},
	75	'playlist_count': 30,
	76	'params': {
	77	# Download the whole playlist takes too long time
	78	'playlist_items': '1-30',
	79	},
	80	}, {
	81	'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
	82	'info_dict': {
	83	'id': 'Wigginsmp4',
	84	'ext': 'mp4',
	85	'title': 'Shootaround Access - Dec. 12 \| Andrew Wiggins',
	86	'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
	87	'upload_date': '20141212',
	88	'timestamp': 1418418600,
	89	},
	90	'params': {
	91	'noplaylist': True,
	92	# m3u8 download
	93	'skip_download': True,
	94	},
	95	}]
	96
	97	_PAGE_SIZE = 30
	98
	99	def _fetch_page(self, team, video_id, page):
	100	search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
	101	'type': 'teamvideo',
	102	'start': page * self._PAGE_SIZE + 1,
	103	'npp': (page + 1) * self._PAGE_SIZE + 1,
	104	'sort': 'recent',
	105	'output': 'json',
	106	'site': team,
	107	})
	108	results = self._download_json(
	109	search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
	110	for item in results:
	111	yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
	112
	113	def _extract_playlist(self, orig_path, video_id, webpage):
	114	team = orig_path.split('/')[0]
	115
	116	if self._downloader.params.get('noplaylist'):
	117	self.to_screen('Downloading just video because of --no-playlist')
	118	video_path = self._search_regex(
	119	r'nbaVideoCore\.firstVideo\s=\s\'([^\']+)\';', webpage, 'video path')
	120	video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
	121	return self.url_result(video_url)
	122
	123	self.to_screen('Downloading playlist - add --no-playlist to just download video')
	124	playlist_title = self._og_search_title(webpage, fatal=False)
	125	entries = OnDemandPagedList(
	126	functools.partial(self._fetch_page, team, video_id),
	127	self._PAGE_SIZE, use_cache=True)
	128
	129	return self.playlist_result(entries, team, playlist_title)
	130
	131	def _real_extract(self, url):
	132	path, video_id = re.match(self._VALID_URL, url).groups()
	133	orig_path = path
	134	if path.startswith('nba/'):
	135	path = path[3:]
	136
	137	if 'video/' not in path:
	138	webpage = self._download_webpage(url, video_id)
	139	path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
	140
	141	if path == '{{id}}':
	142	return self._extract_playlist(orig_path, video_id, webpage)
	143
	144	# See prepareContentId() of pkgCvp.js
	145	if path.startswith('video/teams'):
	146	path = 'video/channels/proxy/' + path[6:]
	147
	148	video_info = self._download_xml('http://www.nba.com/%s.xml' % path, video_id)
	149	video_id = os.path.splitext(xpath_text(video_info, 'slug'))[0]
	150	title = xpath_text(video_info, 'headline')
	151	description = xpath_text(video_info, 'description')
	152	duration = parse_duration(xpath_text(video_info, 'length'))
	153	timestamp = int_or_none(xpath_attr(video_info, 'dateCreated', 'uts'))
	154
	155	thumbnails = []
	156	for image in video_info.find('images'):
	157	thumbnails.append({
	158	'id': image.attrib.get('cut'),
	159	'url': image.text,
	160	'width': int_or_none(image.attrib.get('width')),
	161	'height': int_or_none(image.attrib.get('height')),
	162	})
	163
	164	formats = []
	165	for video_file in video_info.findall('.//file'):
	166	video_url = video_file.text
	167	if video_url.startswith('/'):
	168	continue
	169	if video_url.endswith('.m3u8'):
	170	formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
	171	elif video_url.endswith('.f4m'):
	172	formats.extend(self._extract_f4m_formats(video_url + '?hdcore=3.4.1.1', video_id, f4m_id='hds', fatal=False))
	173	else:
	174	key = video_file.attrib.get('bitrate')
	175	format_info = {
	176	'format_id': key,
	177	'url': video_url,
	178	}
	179	mobj = re.search(r'(\d+)x(\d+)(?:_(\d+))?', key)
	180	if mobj:
	181	format_info.update({
	182	'width': int(mobj.group(1)),
	183	'height': int(mobj.group(2)),
	184	'tbr': int_or_none(mobj.group(3)),
	185	})
	186	formats.append(format_info)
	187	self._sort_formats(formats)
	188
	189	return {
	190	'id': video_id,
	191	'title': title,
	192	'description': description,
	193	'duration': duration,
	194	'timestamp': timestamp,
	195	'thumbnails': thumbnails,
	196	'formats': formats,
	197	}