jfr.im git - yt-dlp.git/blame_incremental - youtube

... / ...

Commit	Line	Data
	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5	import os.path
	6
	7	from .common import InfoExtractor
	8	from ..compat import compat_urlparse
	9	from ..utils import (
	10	url_basename,
	11	remove_start,
	12	)
	13
	14
	15	class DemocracynowIE(InfoExtractor):
	16	_VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P<id>[^\?]*)'
	17	IE_NAME = 'democracynow'
	18	_TESTS = [{
	19	'url': 'http://www.democracynow.org/shows/2015/7/3',
	20	'md5': '3757c182d3d84da68f5c8f506c18c196',
	21	'info_dict': {
	22	'id': '2015-0703-001',
	23	'ext': 'mp4',
	24	'title': 'Daily Show',
	25	},
	26	}, {
	27	'url': 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree',
	28	'info_dict': {
	29	'id': '2015-0703-001',
	30	'ext': 'mp4',
	31	'title': '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag',
	32	'description': 'md5:4d2bc4f0d29f5553c2210a4bc7761a21',
	33	},
	34	'params': {
	35	'skip_download': True,
	36	},
	37	}]
	38
	39	def _real_extract(self, url):
	40	display_id = self._match_id(url)
	41
	42	webpage = self._download_webpage(url, display_id)
	43
	44	json_data = self._parse_json(self._search_regex(
	45	r'<script[^>]+type="text/json"[^>]>\s({[^>]+})', webpage, 'json'),
	46	display_id)
	47
	48	title = json_data['title']
	49	formats = []
	50
	51	video_id = None
	52
	53	for key in ('file', 'audio', 'video', 'high_res_video'):
	54	media_url = json_data.get(key, '')
	55	if not media_url:
	56	continue
	57	media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url))
	58	video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn')
	59	formats.append({
	60	'url': media_url,
	61	'vcodec': 'none' if key == 'audio' else None,
	62	})
	63
	64	self._sort_formats(formats)
	65
	66	default_lang = 'en'
	67	subtitles = {}
	68
	69	def add_subtitle_item(lang, info_dict):
	70	if lang not in subtitles:
	71	subtitles[lang] = []
	72	subtitles[lang].append(info_dict)
	73
	74	# chapter_file are not subtitles
	75	if 'caption_file' in json_data:
	76	add_subtitle_item(default_lang, {
	77	'url': compat_urlparse.urljoin(url, json_data['caption_file']),
	78	})
	79
	80	for subtitle_item in json_data.get('captions', []):
	81	lang = subtitle_item.get('language', '').lower() or default_lang
	82	add_subtitle_item(lang, {
	83	'url': compat_urlparse.urljoin(url, subtitle_item['url']),
	84	})
	85
	86	description = self._og_search_description(webpage, default=None)
	87
	88	return {
	89	'id': video_id or display_id,
	90	'title': title,
	91	'description': description,
	92	'thumbnail': json_data.get('image'),
	93	'subtitles': subtitles,
	94	'formats': formats,
	95	}