jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	extract_attributes,
	34	ExtractorError,
	35	float_or_none,
	36	get_element_by_attribute,
	37	get_element_by_id,
	38	int_or_none,
	39	js_to_json,
	40	mimetype2ext,
	41	orderedSet,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	remove_quotes,
	46	remove_start,
	47	smuggle_url,
	48	str_or_none,
	49	str_to_int,
	50	try_get,
	51	unescapeHTML,
	52	unified_strdate,
	53	unsmuggle_url,
	54	uppercase_escape,
	55	url_or_none,
	56	urlencode_postdata,
	57	)
	58
	59
	60	class YoutubeBaseInfoExtractor(InfoExtractor):
	61	"""Provide base functions for Youtube extractors"""
	62	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	63	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	64
	65	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	66	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	67	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	68
	69	_NETRC_MACHINE = 'youtube'
	70	# If True it will raise an error if no login info is provided
	71	_LOGIN_REQUIRED = False
	72
	73	_PLAYLIST_ID_RE = r'(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}'
	74	_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]\|ytInitialData)\W?=\W?({.*?});'
	75	_YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
	76
	77	_YOUTUBE_CLIENT_HEADERS = {
	78	'x-youtube-client-name': '1',
	79	'x-youtube-client-version': '1.20200609.04.02',
	80	}
	81
	82	def _set_language(self):
	83	self._set_cookie(
	84	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	85	# YouTube sets the expire time to about two months
	86	expire_time=time.time() + 2 * 30 * 24 * 3600)
	87
	88	def _ids_to_results(self, ids):
	89	return [
	90	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	91	for vid_id in ids]
	92
	93	def _login(self):
	94	"""
	95	Attempt to log in to YouTube.
	96	True is returned if successful or skipped.
	97	False is returned if login failed.
	98
	99	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	100	"""
	101	username, password = self._get_login_info()
	102	# No authentication to be performed
	103	if username is None:
	104	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	105	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	106	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	107	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	108	return True
	109
	110	login_page = self._download_webpage(
	111	self._LOGIN_URL, None,
	112	note='Downloading login page',
	113	errnote='unable to fetch login page', fatal=False)
	114	if login_page is False:
	115	return
	116
	117	login_form = self._hidden_inputs(login_page)
	118
	119	def req(url, f_req, note, errnote):
	120	data = login_form.copy()
	121	data.update({
	122	'pstMsg': 1,
	123	'checkConnection': 'youtube',
	124	'checkedDomains': 'youtube',
	125	'hl': 'en',
	126	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	127	'f.req': json.dumps(f_req),
	128	'flowName': 'GlifWebSignIn',
	129	'flowEntry': 'ServiceLogin',
	130	# TODO: reverse actual botguard identifier generation algo
	131	'bgRequest': '["identifier",""]',
	132	})
	133	return self._download_json(
	134	url, None, note=note, errnote=errnote,
	135	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	136	fatal=False,
	137	data=urlencode_postdata(data), headers={
	138	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	139	'Google-Accounts-XSRF': 1,
	140	})
	141
	142	def warn(message):
	143	self._downloader.report_warning(message)
	144
	145	lookup_req = [
	146	username,
	147	None, [], None, 'US', None, None, 2, False, True,
	148	[
	149	None, None,
	150	[2, 1, None, 1,
	151	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	152	None, [], 4],
	153	1, [None, None, []], None, None, None, True
	154	],
	155	username,
	156	]
	157
	158	lookup_results = req(
	159	self._LOOKUP_URL, lookup_req,
	160	'Looking up account info', 'Unable to look up account info')
	161
	162	if lookup_results is False:
	163	return False
	164
	165	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	166	if not user_hash:
	167	warn('Unable to extract user hash')
	168	return False
	169
	170	challenge_req = [
	171	user_hash,
	172	None, 1, None, [1, None, None, None, [password, None, True]],
	173	[
	174	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	175	1, [None, None, []], None, None, None, True
	176	]]
	177
	178	challenge_results = req(
	179	self._CHALLENGE_URL, challenge_req,
	180	'Logging in', 'Unable to log in')
	181
	182	if challenge_results is False:
	183	return
	184
	185	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	186	if login_res:
	187	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	188	warn(
	189	'Unable to login: %s' % 'Invalid password'
	190	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	191	return False
	192
	193	res = try_get(challenge_results, lambda x: x[0][-1], list)
	194	if not res:
	195	warn('Unable to extract result entry')
	196	return False
	197
	198	login_challenge = try_get(res, lambda x: x[0][0], list)
	199	if login_challenge:
	200	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	201	if challenge_str == 'TWO_STEP_VERIFICATION':
	202	# SEND_SUCCESS - TFA code has been successfully sent to phone
	203	# QUOTA_EXCEEDED - reached the limit of TFA codes
	204	status = try_get(login_challenge, lambda x: x[5], compat_str)
	205	if status == 'QUOTA_EXCEEDED':
	206	warn('Exceeded the limit of TFA codes, try later')
	207	return False
	208
	209	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	210	if not tl:
	211	warn('Unable to extract TL')
	212	return False
	213
	214	tfa_code = self._get_tfa_info('2-step verification code')
	215
	216	if not tfa_code:
	217	warn(
	218	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	219	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	220	return False
	221
	222	tfa_code = remove_start(tfa_code, 'G-')
	223
	224	tfa_req = [
	225	user_hash, None, 2, None,
	226	[
	227	9, None, None, None, None, None, None, None,
	228	[None, tfa_code, True, 2]
	229	]]
	230
	231	tfa_results = req(
	232	self._TFA_URL.format(tl), tfa_req,
	233	'Submitting TFA code', 'Unable to submit TFA code')
	234
	235	if tfa_results is False:
	236	return False
	237
	238	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	239	if tfa_res:
	240	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	241	warn(
	242	'Unable to finish TFA: %s' % 'Invalid TFA code'
	243	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	244	return False
	245
	246	check_cookie_url = try_get(
	247	tfa_results, lambda x: x[0][-1][2], compat_str)
	248	else:
	249	CHALLENGES = {
	250	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	251	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	252	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	253	}
	254	challenge = CHALLENGES.get(
	255	challenge_str,
	256	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	257	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	258	return False
	259	else:
	260	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	261
	262	if not check_cookie_url:
	263	warn('Unable to extract CheckCookie URL')
	264	return False
	265
	266	check_cookie_results = self._download_webpage(
	267	check_cookie_url, None, 'Checking cookie', fatal=False)
	268
	269	if check_cookie_results is False:
	270	return False
	271
	272	if 'https://myaccount.google.com/' not in check_cookie_results:
	273	warn('Unable to log in')
	274	return False
	275
	276	return True
	277
	278	def _download_webpage_handle(self, args, *kwargs):
	279	query = kwargs.get('query', {}).copy()
	280	kwargs['query'] = query
	281	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	282	args, *compat_kwargs(kwargs))
	283
	284	def _get_yt_initial_data(self, video_id, webpage):
	285	config = self._search_regex(
	286	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	287	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	288	webpage, 'ytInitialData', default=None)
	289	if config:
	290	return self._parse_json(
	291	uppercase_escape(config), video_id, fatal=False)
	292
	293	def _real_initialize(self):
	294	if self._downloader is None:
	295	return
	296	self._set_language()
	297	if not self._login():
	298	return
	299
	300
	301	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	302
	303	def _find_entries_in_json(self, extracted):
	304	entries = []
	305	c = {}
	306
	307	def _real_find(obj):
	308	if obj is None or isinstance(obj, str):
	309	return
	310
	311	if type(obj) is list:
	312	for elem in obj:
	313	_real_find(elem)
	314
	315	if type(obj) is dict:
	316	if self._is_entry(obj):
	317	entries.append(obj)
	318	return
	319
	320	if 'continuationCommand' in obj:
	321	c['continuation'] = obj
	322	return
	323
	324	for _, o in obj.items():
	325	_real_find(o)
	326
	327	_real_find(extracted)
	328
	329	return entries, try_get(c, lambda x: x["continuation"])
	330
	331	def _entries(self, page, playlist_id, max_pages=None):
	332	seen = []
	333
	334	yt_conf = {}
	335	for m in re.finditer(self._YTCFG_DATA_RE, page):
	336	parsed = self._parse_json(m.group(1), playlist_id,
	337	transform_source=js_to_json, fatal=False)
	338	if parsed:
	339	yt_conf.update(parsed)
	340
	341	data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
	342
	343	for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
	344	entries, continuation = self._find_entries_in_json(data_json)
	345	processed = self._process_entries(entries, seen)
	346
	347	if not processed:
	348	break
	349	for entry in processed:
	350	yield entry
	351
	352	if not continuation or not yt_conf:
	353	break
	354	continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
	355	continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
	356	if not continuation_token or not continuation_url:
	357	break
	358
	359	count = 0
	360	retries = 3
	361	while count <= retries:
	362	try:
	363	# Downloading page may result in intermittent 5xx HTTP error
	364	# that is usually worked around with a retry
	365	data_json = self._download_json(
	366	'https://www.youtube.com%s' % continuation_url,
	367	playlist_id,
	368	'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
	369
	370	transform_source=uppercase_escape,
	371	query={
	372	'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
	373	},
	374	data=str(json.dumps({
	375	'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
	376	'continuation': continuation_token
	377	})).encode(encoding='UTF-8', errors='strict'),
	378	headers={
	379	'Content-Type': 'application/json'
	380	}
	381	)
	382	break
	383	except ExtractorError as e:
	384	if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
	385	count += 1
	386	if count <= retries:
	387	continue
	388	raise
	389
	390	def _extract_title(self, renderer):
	391	title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
	392	if title:
	393	return title
	394	return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
	395
	396
	397	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	398	def _is_entry(self, obj):
	399	return 'videoId' in obj
	400
	401	def _process_entries(self, entries, seen):
	402	ids_in_page = []
	403	titles_in_page = []
	404	for renderer in entries:
	405	video_id = try_get(renderer, lambda x: x['videoId'])
	406	video_title = self._extract_title(renderer)
	407
	408	if video_id is None or video_title is None:
	409	# we do not have a videoRenderer or title extraction broke
	410	continue
	411
	412	video_title = video_title.strip()
	413
	414	try:
	415	idx = ids_in_page.index(video_id)
	416	if video_title and not titles_in_page[idx]:
	417	titles_in_page[idx] = video_title
	418	except ValueError:
	419	ids_in_page.append(video_id)
	420	titles_in_page.append(video_title)
	421
	422	for video_id, video_title in zip(ids_in_page, titles_in_page):
	423	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	424
	425
	426	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	427	def _is_entry(self, obj):
	428	return 'playlistId' in obj
	429
	430	def _process_entries(self, entries, seen):
	431	for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
	432
	433	yield self.url_result(
	434	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	435
	436	def _real_extract(self, url):
	437	playlist_id = self._match_id(url)
	438	webpage = self._download_webpage(url, playlist_id)
	439	title = self._og_search_title(webpage, fatal=False)
	440	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	441
	442
	443	class YoutubeIE(YoutubeBaseInfoExtractor):
	444	IE_DESC = 'YouTube.com'
	445	_VALID_URL = r"""(?x)^
	446	(
	447	(?:https?://\|//) # http(s):// or protocol-independent URL
	448	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	449	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	450	(?:www\.)?pwnyoutube\.com/\|
	451	(?:www\.)?hooktube\.com/\|
	452	(?:www\.)?yourepeat\.com/\|
	453	tube\.majestyc\.net/\|
	454	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	455	(?:(?:www\|dev)\.)?invidio\.us/\|
	456	(?:(?:www\|no)\.)?invidiou\.sh/\|
	457	(?:(?:www\|fi\|de)\.)?invidious\.snopyta\.org/\|
	458	(?:www\.)?invidious\.kabi\.tk/\|
	459	(?:www\.)?invidious\.13ad\.de/\|
	460	(?:www\.)?invidious\.mastodon\.host/\|
	461	(?:www\.)?invidious\.nixnet\.xyz/\|
	462	(?:www\.)?invidious\.drycat\.fr/\|
	463	(?:www\.)?tube\.poal\.co/\|
	464	(?:www\.)?vid\.wxzm\.sx/\|
	465	(?:www\.)?yewtu\.be/\|
	466	(?:www\.)?yt\.elukerio\.org/\|
	467	(?:www\.)?yt\.lelux\.fi/\|
	468	(?:www\.)?invidious\.ggc-project\.de/\|
	469	(?:www\.)?yt\.maisputain\.ovh/\|
	470	(?:www\.)?invidious\.13ad\.de/\|
	471	(?:www\.)?invidious\.toot\.koeln/\|
	472	(?:www\.)?invidious\.fdn\.fr/\|
	473	(?:www\.)?watch\.nettohikari\.com/\|
	474	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	475	(?:www\.)?qklhadlycap4cnod\.onion/\|
	476	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	477	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	478	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	479	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	480	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	481	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	482	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	483	(?:.*?\#/)? # handle anchor (#/) redirect urls
	484	(?: # the various things that can precede the ID:
	485	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	486	\|(?: # or the v= param in all its forms
	487	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	488	(?:\?\|\#!?) # the params delimiter ? or # or #!
	489	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	490	v=
	491	)
	492	))
	493	\|(?:
	494	youtu\.be\| # just youtu.be/xxxx
	495	vid\.plus\| # or vid.plus/xxxx
	496	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	497	)/
	498	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	499	)
	500	)? # all until now is optional -> you can pass the naked ID

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

extract_attributes,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

js_to_json,

mimetype2ext,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

url_or_none,

urlencode_postdata,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

61

"""Provide base functions for Youtube extractors"""

62

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

63

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

64

65

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

66

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

67

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

68

69

_NETRC_MACHINE = 'youtube'

70

# If True it will raise an error if no login info is provided

71

_LOGIN_REQUIRED = False

72

73

_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'

74

_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'

75

_YTCFG_DATA_RE = r"ytcfg.set$({.*?})$"

76

77

_YOUTUBE_CLIENT_HEADERS = {

78

'x-youtube-client-name': '1',

79

'x-youtube-client-version': '1.20200609.04.02',

80

}

81

82

def _set_language(self):

83

self._set_cookie(

84

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

85

# YouTube sets the expire time to about two months

86

expire_time=time.time() + 2 * 30 * 24 * 3600)

87

88

def _ids_to_results(self, ids):

89

return [

90

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

96

True is returned if successful or skipped.

97

False is returned if login failed.

98

99

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

100

"""

101

username, password = self._get_login_info()

102

# No authentication to be performed

103

if username is None:

104

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

105

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

106

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

107

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

108

return True

109

110

login_page = self._download_webpage(

111

self._LOGIN_URL, None,

112

note='Downloading login page',

113

errnote='unable to fetch login page', fatal=False)

114

if login_page is False:

115

return

116

117

login_form = self._hidden_inputs(login_page)

118

119

def req(url, f_req, note, errnote):

120

data = login_form.copy()

121

data.update({

122

'pstMsg': 1,

123

'checkConnection': 'youtube',

124

'checkedDomains': 'youtube',

125

'hl': 'en',

126

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

127

'f.req': json.dumps(f_req),

128

'flowName': 'GlifWebSignIn',

129

'flowEntry': 'ServiceLogin',

130

# TODO: reverse actual botguard identifier generation algo

131

'bgRequest': '["identifier",""]',

132

})

133

return self._download_json(

134

url, None, note=note, errnote=errnote,

135

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

136

fatal=False,

137

data=urlencode_postdata(data), headers={

138

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

139

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

152

None, [], 4],

153

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

159

self._LOOKUP_URL, lookup_req,

160

'Looking up account info', 'Unable to look up account info')

161

162

if lookup_results is False:

163

return False

164

165

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

166

if not user_hash:

167

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

173

[

174

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

175

1, [None, None, []], None, None, None, True

176

]]

177

178

challenge_results = req(

179

self._CHALLENGE_URL, challenge_req,

180

'Logging in', 'Unable to log in')

181

182

if challenge_results is False:

183

return

184

185

login_res = try_get(challenge_results, lambda x: x[0][5], list)

186

if login_res:

187

login_msg = try_get(login_res, lambda x: x[5], compat_str)

188

warn(

189

'Unable to login: %s' % 'Invalid password'

190

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

191

return False

192

193

res = try_get(challenge_results, lambda x: x[0][-1], list)

194

if not res:

195

warn('Unable to extract result entry')

196

return False

197

198

login_challenge = try_get(res, lambda x: x[0][0], list)

199

if login_challenge:

200

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

201

if challenge_str == 'TWO_STEP_VERIFICATION':

202

# SEND_SUCCESS - TFA code has been successfully sent to phone

203

# QUOTA_EXCEEDED - reached the limit of TFA codes

204

status = try_get(login_challenge, lambda x: x[5], compat_str)

205

if status == 'QUOTA_EXCEEDED':

206

warn('Exceeded the limit of TFA codes, try later')

207

return False

208

209

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

210

if not tl:

211

warn('Unable to extract TL')

212

return False

213

214

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

219

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

220

return False

221

222

tfa_code = remove_start(tfa_code, 'G-')

223

224

tfa_req = [

225

user_hash, None, 2, None,

226

[

227

9, None, None, None, None, None, None, None,

228

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

233

'Submitting TFA code', 'Unable to submit TFA code')

234

235

if tfa_results is False:

236

return False

237

238

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

239

if tfa_res:

240

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

241

warn(

242

'Unable to finish TFA: %s' % 'Invalid TFA code'

243

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

244

return False

245

246

check_cookie_url = try_get(

247

tfa_results, lambda x: x[0][-1][2], compat_str)

248

else:

249

CHALLENGES = {

250

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

251

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

252

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

253

}

254

challenge = CHALLENGES.get(

255

challenge_str,

256

'%s returned error %s.' % (self.IE_NAME, challenge_str))

257

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

258

return False

259

else:

260

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

261

262

if not check_cookie_url:

263

warn('Unable to extract CheckCookie URL')

264

return False

265

266

check_cookie_results = self._download_webpage(

267

check_cookie_url, None, 'Checking cookie', fatal=False)

268

269

if check_cookie_results is False:

270

return False

271

272

if 'https://myaccount.google.com/' not in check_cookie_results:

273

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

279

query = kwargs.get('query', {}).copy()

280

kwargs['query'] = query

281

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

282

*args, **compat_kwargs(kwargs))

283

284

def _get_yt_initial_data(self, video_id, webpage):

285

config = self._search_regex(

286

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

287

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

288

webpage, 'ytInitialData', default=None)

289

if config:

290

return self._parse_json(

291

uppercase_escape(config), video_id, fatal=False)

292

293

def _real_initialize(self):

294

if self._downloader is None:

295

return

296

self._set_language()

297

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

302

303

def _find_entries_in_json(self, extracted):

entries = []

c = {}

def _real_find(obj):

if obj is None or isinstance(obj, str):

309

return

310

311

if type(obj) is list:

for elem in obj:

_real_find(elem)

if type(obj) is dict:

316

if self._is_entry(obj):

entries.append(obj)

return

if 'continuationCommand' in obj:

321

c['continuation'] = obj

322

return

323

324

for _, o in obj.items():

325

_real_find(o)

326

327

_real_find(extracted)

328

329

return entries, try_get(c, lambda x: x["continuation"])

330

331

def _entries(self, page, playlist_id, max_pages=None):

seen = []

yt_conf = {}

for m in re.finditer(self._YTCFG_DATA_RE, page):

336

parsed = self._parse_json(m.group(1), playlist_id,

337

transform_source=js_to_json, fatal=False)

338

if parsed:

339

yt_conf.update(parsed)

340

341

data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)

342

343

for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):

344

entries, continuation = self._find_entries_in_json(data_json)

345

processed = self._process_entries(entries, seen)

if not processed:

break

for entry in processed:

350

yield entry

351

352

if not continuation or not yt_conf:

353

break

354

continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])

355

continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])

356

if not continuation_token or not continuation_url:

break

count = 0

retries = 3

while count <= retries:

362

try:

363

# Downloading page may result in intermittent 5xx HTTP error

364

# that is usually worked around with a retry

365

data_json = self._download_json(

366

'https://www.youtube.com%s' % continuation_url,

367

playlist_id,

368

'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),

369

370

transform_source=uppercase_escape,

371

query={

372

'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])

373

},

374

data=str(json.dumps({

375

'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),

376

'continuation': continuation_token

377

})).encode(encoding='UTF-8', errors='strict'),

378

headers={

379

'Content-Type': 'application/json'

}

)

break

except ExtractorError as e:

384

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

def _extract_title(self, renderer):

391

title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

392

if title:

393

return title

394

return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)

395

396

397

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

398

def _is_entry(self, obj):

399

return 'videoId' in obj

400

401

def _process_entries(self, entries, seen):

402

ids_in_page = []

403

titles_in_page = []

404

for renderer in entries:

405

video_id = try_get(renderer, lambda x: x['videoId'])

406

video_title = self._extract_title(renderer)

407

408

if video_id is None or video_title is None:

409

# we do not have a videoRenderer or title extraction broke

410

continue

411

412

video_title = video_title.strip()

413

414

try:

415

idx = ids_in_page.index(video_id)

416

if video_title and not titles_in_page[idx]:

417

titles_in_page[idx] = video_title

418

except ValueError:

419

ids_in_page.append(video_id)

420

titles_in_page.append(video_title)

421

422

for video_id, video_title in zip(ids_in_page, titles_in_page):

423

yield self.url_result(video_id, 'Youtube', video_id, video_title)

424

425

426

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

427

def _is_entry(self, obj):

428

return 'playlistId' in obj

429

430

def _process_entries(self, entries, seen):

431

for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):

432

433

yield self.url_result(

434

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

435

436

def _real_extract(self, url):

437

playlist_id = self._match_id(url)

438

webpage = self._download_webpage(url, playlist_id)

439

title = self._og_search_title(webpage, fatal=False)

440

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

441

442

443

class YoutubeIE(YoutubeBaseInfoExtractor):

444

IE_DESC = 'YouTube.com'

445

_VALID_URL = r"""(?x)^

446

(

447

(?:https?://|//) # http(s):// or protocol-independent URL

448

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

449

(?:www\.)?deturl\.com/www\.youtube\.com/|

450

(?:www\.)?pwnyoutube\.com/|

451

(?:www\.)?hooktube\.com/|

452

(?:www\.)?yourepeat\.com/|

453

tube\.majestyc\.net/|

454

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

455

(?:(?:www|dev)\.)?invidio\.us/|

456

(?:(?:www|no)\.)?invidiou\.sh/|

457

(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|

458

(?:www\.)?invidious\.kabi\.tk/|

459

(?:www\.)?invidious\.13ad\.de/|

460

(?:www\.)?invidious\.mastodon\.host/|

461

(?:www\.)?invidious\.nixnet\.xyz/|

462

(?:www\.)?invidious\.drycat\.fr/|

463

(?:www\.)?tube\.poal\.co/|

464

(?:www\.)?vid\.wxzm\.sx/|

465

(?:www\.)?yewtu\.be/|

466

(?:www\.)?yt\.elukerio\.org/|

467

(?:www\.)?yt\.lelux\.fi/|

468

(?:www\.)?invidious\.ggc-project\.de/|

469

(?:www\.)?yt\.maisputain\.ovh/|

470

(?:www\.)?invidious\.13ad\.de/|

471

(?:www\.)?invidious\.toot\.koeln/|

472

(?:www\.)?invidious\.fdn\.fr/|

473

(?:www\.)?watch\.nettohikari\.com/|

474

(?:www\.)?kgg2m7yk5aybusll\.onion/|

475

(?:www\.)?qklhadlycap4cnod\.onion/|

476

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

477

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

478

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

479

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

480

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

481

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

482

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

483

(?:.*?\#/)? # handle anchor (#/) redirect urls

484

(?: # the various things that can precede the ID:

485

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

486

|(?: # or the v= param in all its forms

487

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

488

(?:\?|\#!?) # the params delimiter ? or # or #!

489

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

495

vid\.plus| # or vid.plus/xxxx

496

zwearz\.com/watch| # or zwearz.com/watch/xxxx

497

)/

498

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

499

)

500

)? # all until now is optional -> you can pass the naked ID

501

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

502

(?!.*?\blist=

503

(?:

504

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

505

WL # WL are handled by the watch later IE

506

)

507

)

508

(?(1).+)? # if we found the ID, everything can follow

509

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

510

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

511

_PLAYER_INFO_RE = (

512

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

513

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

514

)

515

_formats = {

516

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

517

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

518

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

519

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

520

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

521

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

522

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

523

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

524

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

525

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

526

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

527

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

528

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

529

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

530

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

531

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

532

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

533

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

538

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

539

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

540

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

541

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

542

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

543

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

544

545

# Apple HTTP Live Streaming

546

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

547

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

548

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

549

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

550

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

551

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

552

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

553

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

554

555

# DASH mp4 video

556

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

557

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

558

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

559

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

560

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

561

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

562

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

563

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

564

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

565

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

566

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

567

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

568

569

# Dash mp4 audio

570

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

571

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

572

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

573

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

574

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

575

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

576

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

577

578

# Dash webm

579

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

580

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

581

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

582

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

583

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

584

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

585

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

586

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

587

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

588

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

589

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

590

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

591

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

592

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

593

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

594

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

595

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

596

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

597

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

598

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

599

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

600

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

601

602

# Dash webm audio

603

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

604

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

605

606

# Dash webm audio with opus inside

607

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

608

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

609

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

610

611

# RTMP (unnamed)

612

'_rtmp': {'protocol': 'rtmp'},

613

614

# av01 video only formats sometimes served with "unknown" codecs

615

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

616

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

617

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

618

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

619

}

620

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

632

'uploader': 'Philipp Hagemeister',

633

'uploader_id': 'phihag',

634

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

635

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

636

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

637

'upload_date': '20121002',

638

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

639

'categories': ['Science & Technology'],

640

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

651

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

656

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

657

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

658

'uploader': 'SET India',

659

'uploader_id': 'setindia',

660

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

666

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

671

'uploader': 'Philipp Hagemeister',

672

'uploader_id': 'phihag',

673

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

674

'upload_date': '20121002',

675

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

676

'categories': ['Science & Technology'],

677

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

682

},

683

'params': {

684

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

689

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

694

'uploader_id': '8KVIDEO',

695

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

696

'description': '',

697

'uploader': '8KVIDEO',

698

'title': 'UHDTV TEST 8K VIDEO.mp4'

699

},

700

'params': {

701

'youtube_include_dash_manifest': True,

702

'format': '141',

703

},

704

'skip': 'format 141 not served anymore',

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

714

'uploader': 'Amazing Atheist',

715

'uploader_id': 'TheAmazingAtheist',

716

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

717

'title': 'Burning Everyone\'s Koran',

718

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

719

}

720

},

721

# Normal age-gate video (embed allowed)

722

{

723

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

728

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

729

'duration': 142,

730

'uploader': 'The Witcher',

731

'uploader_id': 'WitcherGame',

732

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

733

'upload_date': '20140605',

'age_limit': 18,

},

},

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

738

{

739

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

745

'uploader_id': 'olympic',

746

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

747

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

748

'uploader': 'Olympic',

749

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

750

},

751

'params': {

752

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

762

'duration': 85,

763

'upload_date': '20110310',

764

'uploader_id': 'AllenMeow',

765

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

766

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

767

'uploader': '孫ᄋᄅ',

768

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

769

},

770

},

771

# url_encoded_fmt_stream_map is empty string

772

{

773

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

778

'description': '',

779

'upload_date': '20150404',

780

'uploader_id': 'spbelect',

781

'uploader': 'Наблюдатели Петербурга',

782

},

783

'params': {

784

'skip_download': 'requires avconv',

785

},

786

'skip': 'This live event has ended.',

787

},

788

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

789

{

790

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

795

'description': 'md5:116377fd2963b81ec4ce64b542173306',

796

'duration': 220,

797

'upload_date': '20150625',

798

'uploader_id': 'dorappi2000',

799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

800

'uploader': 'dorappi2000',

801

'formats': 'mincount:31',

802

},

803

'skip': 'not actual anymore',

804

},

805

# DASH manifest with segment_list

806

{

807

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

808

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

813

'uploader': 'Airtek',

814

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

815

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

816

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

817

},

818

'params': {

819

'youtube_include_dash_manifest': True,

820

'format': '135', # bestvideo

821

},

822

'skip': 'This live event has ended.',

823

},

824

{

825

# Multifeed videos (multiple cameras), URL is for Main Camera

826

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

827

'info_dict': {

828

'id': 'jqWvoWXjCVs',

829

'title': 'teamPGP: Rocket League Noob Stream',

830

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

837

'description': 'md5:dc7872fb300e143831327f1bae3af010',

838

'duration': 7335,

839

'upload_date': '20150721',

840

'uploader': 'Beer Games Beer',

841

'uploader_id': 'beergamesbeer',

842

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

843

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

850

'description': 'md5:dc7872fb300e143831327f1bae3af010',

851

'duration': 7337,

852

'upload_date': '20150721',

853

'uploader': 'Beer Games Beer',

854

'uploader_id': 'beergamesbeer',

855

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

856

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

863

'description': 'md5:dc7872fb300e143831327f1bae3af010',

864

'duration': 7337,

865

'upload_date': '20150721',

866

'uploader': 'Beer Games Beer',

867

'uploader_id': 'beergamesbeer',

868

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

869

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

876

'description': 'md5:dc7872fb300e143831327f1bae3af010',

877

'duration': 7334,

878

'upload_date': '20150721',

879

'uploader': 'Beer Games Beer',

880

'uploader_id': 'beergamesbeer',

881

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

882

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

887

},

888

'skip': 'This video is not available.',

889

},

890

{

891

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

892

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

893

'info_dict': {

894

'id': 'gVfLd0zydlo',

895

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

896

},

897

'playlist_count': 2,

898

'skip': 'Not multifeed anymore',

899

},

900

{

901

'url': 'https://vid.plus/FlRa-iH7PGw',

902

'only_matching': True,

903

},

904

{

905

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

906

'only_matching': True,

907

},

908

{

909

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

910

# Also tests cut-off URL expansion in video description (see

911

# https://github.com/ytdl-org/youtube-dl/issues/1892,

912

# https://github.com/ytdl-org/youtube-dl/issues/8164)

913

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

918

'alt_title': 'Dark Walk - Position Music',

919

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

920

'duration': 133,

921

'upload_date': '20151119',

922

'uploader_id': 'IronSoulElf',

923

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

924

'uploader': 'IronSoulElf',

925

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

926

'track': 'Dark Walk - Position Music',

927

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

928

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

929

},

930

'params': {

931

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

936

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

937

'only_matching': True,

938

},

939

{

940

# Video with yt:stretch=17:0

941

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

946

'description': 'md5:ee18a25c350637c8faff806845bddee9',

947

'upload_date': '20151107',

948

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

949

'uploader': 'CH GAMER DROID',

950

},

951

'params': {

952

'skip_download': True,

953

},

954

'skip': 'This video does not exist.',

955

},

956

{

957

# Video licensed under Creative Commons

958

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

963

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

964

'duration': 721,

965

'upload_date': '20150127',

966

'uploader_id': 'BerkmanCenter',

967

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

968

'uploader': 'The Berkman Klein Center for Internet & Society',

969

'license': 'Creative Commons Attribution license (reuse allowed)',

970

},

971

'params': {

972

'skip_download': True,

},

},

{

# Channel-like uploader_url

977

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

982

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

983

'duration': 4060,

984

'upload_date': '20151119',

985

'uploader': 'Bernie Sanders',

986

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

987

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

988

'license': 'Creative Commons Attribution license (reuse allowed)',

989

},

990

'params': {

991

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

996

'only_matching': True,

997

},

998

{

999

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1000

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1001

'only_matching': True,

1002

},

1003

{

1004

# Rental video preview

1005

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1010

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1011

'upload_date': '20150811',

1012

'uploader': 'FlixMatrix',

1013

'uploader_id': 'FlixMatrixKaravan',

1014

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1015

'license': 'Standard YouTube License',

1016

},

1017

'params': {

1018

'skip_download': True,

1019

},

1020

'skip': 'This video is not available.',

1021

},

1022

{

1023

# YouTube Red video with episode data

1024

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1029

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

1030

'duration': 2085,

1031

'upload_date': '20170118',

1032

'uploader': 'Vsauce',

1033

'uploader_id': 'Vsauce',

1034

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1035

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1041

},

1042

'expected_warnings': [

1043

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1048

# as inappropriate or offensive to some audiences.

1049

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1054

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1055

'duration': 965,

1056

'upload_date': '20140124',

1057

'uploader': 'New Century Foundation',

1058

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1059

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1060

},

1061

'params': {

1062

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1068

'only_matching': True,

1069

},

1070

{

1071

# geo restricted to JP

1072

'url': 'sJL6WA-aGkQ',

1073

'only_matching': True,

1074

},

1075

{

1076

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

1077

'only_matching': True,

1078

},

1079

{

1080

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1081

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1086

'only_matching': True,

1087

},

1088

{

1089

# Video with unsupported adaptive stream type formats

1090

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1095

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1096

'duration': 433,

1097

'upload_date': '20130923',

1098

'uploader': 'Amelia Putri Harwita',

1099

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1100

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1101

'formats': 'maxcount:10',

1102

},

1103

'params': {

1104

'skip_download': True,

1105

'youtube_include_dash_manifest': False,

1106

},

1107

'skip': 'not actual anymore',

1108

},

1109

{

1110

# Youtube Music Auto-generated description

1111

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1116

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1117

'upload_date': '20190312',

1118

'uploader': 'Stephen - Topic',

1119

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1120

'artist': 'Stephen',

1121

'track': 'Voyeur Girl',

1122

'album': 'it\'s too much love to know my dear',

1123

'release_date': '20190313',

1124

'release_year': 2019,

1125

},

1126

'params': {

1127

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1132

# Retrieve 'artist' field from 'Artist:' in video description

1133

# when it is present on youtube music video

1134

'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',

'info_dict': {

'id': 'k0jLE7tTwjY',

'ext': 'mp4',

'title': 'Latch Feat. Sam Smith',

1139

'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',

1140

'upload_date': '20150110',

1141

'uploader': 'Various Artists - Topic',

1142

'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',

1143

'artist': 'Disclosure',

1144

'track': 'Latch Feat. Sam Smith',

1145

'album': 'Latch Featuring Sam Smith',

1146

'release_date': '20121008',

1147

'release_year': 2012,

1148

},

1149

'params': {

1150

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1155

# handle multiple artists on youtube music video

1156

'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',

'info_dict': {

'id': '74qn0eJSjpA',

'ext': 'mp4',

'title': 'Eastside',

'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',

1162

'upload_date': '20180710',

1163

'uploader': 'Benny Blanco - Topic',

1164

'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',

1165

'artist': 'benny blanco, Halsey, Khalid',

1166

'track': 'Eastside',

1167

'album': 'Eastside',

1168

'release_date': '20180713',

1169

'release_year': 2018,

1170

},

1171

'params': {

1172

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1177

# handle youtube music video with release_year and no release_date

1178

'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',

'info_dict': {

'id': '-hcAI0g-f5M',

'ext': 'mp4',

'title': 'Put It On Me',

1183

'description': 'md5:f6422397c07c4c907c6638e1fee380a5',

1184

'upload_date': '20180426',

1185

'uploader': 'Matt Maeson - Topic',

1186

'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',

1187

'artist': 'Matt Maeson',

1188

'track': 'Put It On Me',

1189

'album': 'The Hearse',

1190

'release_date': None,

1191

'release_year': 2018,

1192

},

1193

'params': {

1194

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1199

'only_matching': True,

1200

},

1201

{

1202

# invalid -> valid video id redirection

1203

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1208

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1209

'upload_date': '20090125',

1210

'uploader': 'Prochorowka',

1211

'uploader_id': 'Prochorowka',

1212

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1213

'artist': 'Panjabi MC',

1214

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1215

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1216

},

1217

'params': {

1218

'skip_download': True,

},

},

{

# empty description results in an empty string

1223

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1230

'uploader_id': 'ElevageOrVert',

1231

'uploader': 'ElevageOrVert',

1232

},

1233

'params': {

1234

'skip_download': True,

},

},

]

def __init__(self, *args, **kwargs):

1240

super(YoutubeIE, self).__init__(*args, **kwargs)

1241

self._player_cache = {}

1242

1243

def report_video_info_webpage_download(self, video_id):

1244

"""Report attempt to download video info webpage."""

1245

self.to_screen('%s: Downloading video info webpage' % video_id)

1246

1247

def report_information_extraction(self, video_id):

1248

"""Report attempt to extract video information."""

1249

self.to_screen('%s: Extracting video information' % video_id)

1250

1251

def report_unavailable_format(self, video_id, format):

1252

"""Report extracted video URL."""

1253

self.to_screen('%s: Format %s not available' % (video_id, format))

1254

1255

def report_rtmp_download(self):

1256

"""Indicate the download will use the RTMP protocol."""

1257

self.to_screen('RTMP download detected')

1258

1259

def _signature_cache_id(self, example_sig):

1260

""" Return a string representation of a signature """

1261

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1262

1263

@classmethod

1264

def _extract_player_info(cls, player_url):

1265

for player_re in cls._PLAYER_INFO_RE:

1266

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1271

return id_m.group('ext'), id_m.group('id')

1272

1273

def _extract_signature_function(self, video_id, player_url, example_sig):

1274

player_type, player_id = self._extract_player_info(player_url)

1275

1276

# Read from filesystem cache

1277

func_id = '%s_%s_%s' % (

1278

player_type, player_id, self._signature_cache_id(example_sig))

1279

assert os.path.basename(func_id) == func_id

1280

1281

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1282

if cache_spec is not None:

1283

return lambda s: ''.join(s[i] for i in cache_spec)

1284

1285

download_note = (

1286

'Downloading player %s' % player_url

1287

if self._downloader.params.get('verbose') else

1288

'Downloading %s player %s' % (player_type, player_id)

1289

)

1290

if player_type == 'js':

1291

code = self._download_webpage(

1292

player_url, video_id,

1293

note=download_note,

1294

errnote='Download of %s failed' % player_url)

1295

res = self._parse_sig_js(code)

1296

elif player_type == 'swf':

1297

urlh = self._request_webpage(

1298

player_url, video_id,

1299

note=download_note,

1300

errnote='Download of %s failed' % player_url)

1301

code = urlh.read()

1302

res = self._parse_sig_swf(code)

1303

else:

1304

assert False, 'Invalid player type %r' % player_type

1305

1306

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1307

cache_res = res(test_string)

1308

cache_spec = [ord(c) for c in cache_res]

1309

1310

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1311

return res

1312

1313

def _print_sig_code(self, func, example_sig):

1314

def gen_sig_code(idxs):

1315

def _genslice(start, end, step):

1316

starts = '' if start == 0 else str(start)

1317

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1318

steps = '' if step == 1 else (':%d' % step)

1319

return 's[%s%s%s]' % (starts, ends, steps)

1320

1321

step = None

1322

# Quelch pyflakes warnings - start will be set when step is set

1323

start = '(Never used)'

1324

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1329

step = None

1330

continue

1331

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1341

1342

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1343

cache_res = func(test_string)

1344

cache_spec = [ord(c) for c in cache_res]

1345

expr_code = ' + '.join(gen_sig_code(cache_spec))

1346

signature_id_tuple = '(%s)' % (

1347

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1348

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1349

' return %s\n') % (signature_id_tuple, expr_code)

1350

self.to_screen('Extracted signature function:\n' + code)

1351

1352

def _parse_sig_js(self, jscode):

1353

funcname = self._search_regex(

1354

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1355

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1356

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1357

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1358

# Obsolete patterns

1359

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1360

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1361

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1362

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1363

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1364

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1365

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1366

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1367

jscode, 'Initial JS player signature function name', group='sig')

1368

1369

jsi = JSInterpreter(jscode)

1370

initial_function = jsi.extract_function(funcname)

1371

return lambda s: initial_function([s])

1372

1373

def _parse_sig_swf(self, file_contents):

1374

swfi = SWFInterpreter(file_contents)

1375

TARGET_CLASSNAME = 'SignatureDecipher'

1376

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1377

initial_function = swfi.extract_function(searched_class, 'decipher')

1378

return lambda s: initial_function([s])

1379

1380

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1381

"""Turn the encrypted s field into a working signature"""

1382

1383

if player_url is None:

1384

raise ExtractorError('Cannot decrypt signature without player_url')

1385

1386

if player_url.startswith('//'):

1387

player_url = 'https:' + player_url

1388

elif not re.match(r'https?://', player_url):

1389

player_url = compat_urlparse.urljoin(

1390

'https://www.youtube.com', player_url)

1391

try:

1392

player_id = (player_url, self._signature_cache_id(s))

1393

if player_id not in self._player_cache:

1394

func = self._extract_signature_function(

1395

video_id, player_url, s

1396

)

1397

self._player_cache[player_id] = func

1398

func = self._player_cache[player_id]

1399

if self._downloader.params.get('youtube_print_sig_code'):

1400

self._print_sig_code(func, s)

1401

return func(s)

1402

except Exception as e:

1403

tb = traceback.format_exc()

1404

raise ExtractorError(

1405

'Signature extraction failed: ' + tb, cause=e)

1406

1407

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1408

try:

1409

subs_doc = self._download_xml(

1410

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1411

video_id, note=False)

1412

except ExtractorError as err:

1413

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1418

lang = track.attrib['lang_code']

1419

if lang in sub_lang_list:

1420

continue

1421

sub_formats = []

1422

for ext in self._SUBTITLE_FORMATS:

1423

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1428

})

1429

sub_formats.append({

1430

'url': 'https://www.youtube.com/api/timedtext?' + params,

1431

'ext': ext,

1432

})

1433

sub_lang_list[lang] = sub_formats

1434

if has_live_chat_replay:

1435

sub_lang_list['live_chat'] = [

1436

{

1437

'video_id': video_id,

1438

'ext': 'json',

1439

'protocol': 'youtube_live_chat_replay',

1440

},

1441

]

1442

if not sub_lang_list:

1443

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1448

patterns = (

1449

# User data may contain arbitrary character sequences that may affect

1450

# JSON extraction with regex, e.g. when '};' is contained the second

1451

# regex won't capture the whole JSON. Yet working around by trying more

1452

# concrete regex first keeping in mind proper quoted string handling

1453

# to be implemented in future that will replace this workaround (see

1454

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1455

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1456

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1457

r';ytplayer\.config\s*=\s*({.+?});',

1458

r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'

1459

)

1460

config = self._search_regex(

1461

patterns, webpage, 'ytplayer.config', default=None)

1462

if config:

1463

return self._parse_json(

1464

uppercase_escape(config), video_id, fatal=False)

1465

1466

def _get_music_metadata_from_yt_initial(self, yt_initial):

music_metadata = []

key_map = {

'Album': 'album',

'Artist': 'artist',

'Song': 'track'

}

contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])

1474

if type(contents) is list:

1475

for content in contents:

1476

music_track = {}

1477

if type(content) is not dict:

1478

continue

1479

videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])

1480

if type(videoSecondaryInfoRenderer) is not dict:

1481

continue

1482

rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])

1483

if type(rows) is not list:

1484

continue

1485

for row in rows:

1486

metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])

1487

if type(metadataRowRenderer) is not dict:

1488

continue

1489

key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])

1490

value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \

1491

try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])

1492

if type(key) is not str or type(value) is not str:

1493

continue

1494

if key in key_map:

1495

if key_map[key] in music_track:

1496

# we've started on a new track

1497

music_metadata.append(music_track)

1498

music_track = {}

1499

music_track[key_map[key]] = value

1500

if len(music_track.keys()):

1501

music_metadata.append(music_track)

1502

return music_metadata

1503

1504

def _get_automatic_captions(self, video_id, webpage):

1505

"""We need the webpage for getting the captions url, pass it as an

1506

argument to speed up the process."""

1507

self.to_screen('%s: Looking for automatic captions' % video_id)

1508

player_config = self._get_ytplayer_config(video_id, webpage)

1509

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1510

if not player_config:

1511

self._downloader.report_warning(err_msg)

1512

return {}

1513

try:

1514

if "args" in player_config and "ttsurl" in player_config["args"]:

1515

args = player_config['args']

1516

caption_url = args['ttsurl']

1517

timestamp = args['timestamp']

1518

1519

# We get the available subtitles

1520

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1526

caption_list = self._download_xml(list_url, video_id)

1527

original_lang_node = caption_list.find('track')

1528

if original_lang_node is None:

1529

self._downloader.report_warning('Video doesn\'t have automatic captions')

1530

return {}

1531

original_lang = original_lang_node.attrib['lang_code']

1532

caption_kind = original_lang_node.attrib.get('kind', '')

1533

1534

sub_lang_list = {}

1535

for lang_node in caption_list.findall('target'):

1536

sub_lang = lang_node.attrib['lang_code']

1537

sub_formats = []

1538

for ext in self._SUBTITLE_FORMATS:

1539

params = compat_urllib_parse_urlencode({

1540

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1545

})

1546

sub_formats.append({

1547

'url': caption_url + '&' + params,

1548

'ext': ext,

1549

})

1550

sub_lang_list[sub_lang] = sub_formats

1551

return sub_lang_list

1552

1553

def make_captions(sub_url, sub_langs):

1554

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1555

caption_qs = compat_parse_qs(parsed_sub_url.query)

1556

captions = {}

1557

for sub_lang in sub_langs:

1558

sub_formats = []

1559

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1565

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1571

return captions

1572

1573

# New captions format as of 22.06.2017

1574

if "args" in player_config:

1575

player_response = player_config["args"].get('player_response')

1576

else:

1577

# New player system (ytInitialPlayerResponse) as of October 2020

1578

player_response = player_config

1579

1580

if player_response:

1581

if isinstance(player_response, compat_str):

1582

player_response = self._parse_json(

1583

player_response, video_id, fatal=False)

1584

1585

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1586

caption_tracks = renderer['captionTracks']

1587

for caption_track in caption_tracks:

1588

if 'kind' not in caption_track:

1589

# not an automatic transcription

1590

continue

1591

base_url = caption_track['baseUrl']

1592

sub_lang_list = []

1593

for lang in renderer['translationLanguages']:

1594

lang_code = lang.get('languageCode')

1595

if lang_code:

1596

sub_lang_list.append(lang_code)

1597

return make_captions(base_url, sub_lang_list)

1598

1599

self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)

1600

return {}

1601

1602

if "args" in player_config:

1603

args = player_config["args"]

1604

1605

# Some videos don't provide ttsurl but rather caption_tracks and

1606

# caption_translation_languages (e.g. 20LmZk1hakA)

1607

# Does not used anymore as of 22.06.2017

1608

caption_tracks = args['caption_tracks']

1609

caption_translation_languages = args['caption_translation_languages']

1610

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1611

sub_lang_list = []

1612

for lang in caption_translation_languages.split(','):

1613

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1614

sub_lang = lang_qs.get('lc', [None])[0]

1615

if sub_lang:

1616

sub_lang_list.append(sub_lang)

1617

return make_captions(caption_url, sub_lang_list)

1618

# An extractor error can be raise by the download process if there are

1619

# no automatic captions but there are subtitles

1620

except (KeyError, IndexError, ExtractorError):

1621

self._downloader.report_warning(err_msg)

1622

return {}

1623

1624

def _mark_watched(self, video_id, video_info, player_response):

1625

playback_url = url_or_none(try_get(

1626

player_response,

1627

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1628

video_info, lambda x: x['videostats_playback_base_url'][0]))

1629

if not playback_url:

1630

return

1631

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1632

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1633

1634

# cpn generation algorithm is reverse engineered from base.js.

1635

# In fact it works even with dummy cpn.

1636

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1637

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1644

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1645

1646

self._download_webpage(

1647

playback_url, video_id, 'Marking watched',

1648

'Unable to mark watched', fatal=False)

1649

1650

@staticmethod

1651

def _extract_urls(webpage):

1652

# Embedded YouTube player

1653

entries = [

1654

unescapeHTML(mobj.group('url'))

1655

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1666

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1667

\1''', webpage)]

1668

1669

# lazyYT YouTube embed

1670

entries.extend(list(map(

1671

unescapeHTML,

1672

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1673

1674

# Wordpress "YouTube Video Importer" plugin

1675

matches = re.findall(r'''(?x)<div[^>]+

1676

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1677

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1678

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1684

urls = YoutubeIE._extract_urls(webpage)

1685

return urls[0] if urls else None

1686

1687

@classmethod

1688

def extract_id(cls, url):

1689

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1690

if mobj is None:

1691

raise ExtractorError('Invalid URL: %s' % url)

1692

video_id = mobj.group(2)

1693

return video_id

1694

1695

def _extract_chapters_from_json(self, webpage, video_id, duration):

1696

if not webpage:

1697

return

1698

initial_data = self._parse_json(

1699

self._search_regex(

1700

r'window\["ytInitialData"\] = (.+);\n', webpage,

1701

'player args', default='{}'),

1702

video_id, fatal=False)

1703

if not initial_data or not isinstance(initial_data, dict):

1704

return

1705

chapters_list = try_get(

1706

initial_data,

1707

lambda x: x['playerOverlays']

1708

['playerOverlayRenderer']

1709

['decoratedPlayerBarRenderer']

1710

['decoratedPlayerBarRenderer']

1711

['playerBar']

1712

['chapteredPlayerBarRenderer']

1713

['chapters'],

1714

list)

1715

if not chapters_list:

1716

return

1717

1718

def chapter_time(chapter):

1719

return float_or_none(

1720

try_get(

1721

chapter,

1722

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1727

start_time = chapter_time(chapter)

1728

if start_time is None:

1729

continue

1730

end_time = (chapter_time(chapters_list[next_num])

1731

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1736

compat_str)

1737

chapters.append({

1738

'start_time': start_time,

1739

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1746

if not description:

1747

return None

1748

chapter_lines = re.findall(

1749

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1750

description)

1751

if not chapter_lines:

1752

return None

1753

chapters = []

1754

for next_num, (chapter_line, time_point) in enumerate(

1755

chapter_lines, start=1):

1756

start_time = parse_duration(time_point)

1757

if start_time is None:

1758

continue

1759

if start_time > duration:

1760

break

1761

end_time = (duration if next_num == len(chapter_lines)

1762

else parse_duration(chapter_lines[next_num][1]))

1763

if end_time is None:

1764

continue

1765

if end_time > duration:

1766

end_time = duration

1767

if start_time > end_time:

1768

break

1769

chapter_title = re.sub(

1770

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1771

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1772

chapters.append({

1773

'start_time': start_time,

1774

'end_time': end_time,

1775

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1780

return (self._extract_chapters_from_json(webpage, video_id, duration)

1781

or self._extract_chapters_from_description(description, duration))

1782

1783

def _real_extract(self, url):

1784

url, smuggled_data = unsmuggle_url(url, {})

1785

1786

proto = (

1787

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1793

for component in [parsed_url.fragment, parsed_url.query]:

1794

query = compat_parse_qs(component)

1795

if start_time is None and 't' in query:

1796

start_time = parse_duration(query['t'][0])

1797

if start_time is None and 'start' in query:

1798

start_time = parse_duration(query['start'][0])

1799

if end_time is None and 'end' in query:

1800

end_time = parse_duration(query['end'][0])

1801

1802

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1803

mobj = re.search(self._NEXT_URL_RE, url)

1804

if mobj:

1805

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1806

video_id = self.extract_id(url)

1807

1808

# Get video webpage

1809

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1810

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1811

1812

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1813

video_id = qs.get('v', [None])[0] or video_id

1814

1815

# Attempt to extract SWF player URL

1816

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1817

if mobj is not None:

1818

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1825

dash_mpd = video_info.get('dashmpd')

1826

if dash_mpd and dash_mpd[0] not in dash_mpds:

1827

dash_mpds.append(dash_mpd[0])

1828

1829

def add_dash_mpd_pr(pl_response):

1830

dash_mpd = url_or_none(try_get(

1831

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1832

compat_str))

1833

if dash_mpd and dash_mpd not in dash_mpds:

1834

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1840

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1841

1842

def extract_player_response(player_response, video_id):

1843

pl_response = str_or_none(player_response)

1844

if not pl_response:

1845

return

1846

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1847

if isinstance(pl_response, dict):

1848

add_dash_mpd_pr(pl_response)

1849

return pl_response

1850

1851

def extract_embedded_config(embed_webpage, video_id):

1852

embedded_config = self._search_regex(

1853

r'setConfig$({.*})$;',

1854

embed_webpage, 'ytInitialData', default=None)

1855

if embedded_config:

1856

return embedded_config

player_response = {}

# Get video info

video_info = {}

embed_webpage = None

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1864

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1865

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1866

age_gate = True

1867

# We simulate the access to the video from www.youtube.com/v/{video_id}

1868

# this can be viewed without login into Youtube

1869

url = proto + '://www.youtube.com/embed/%s' % video_id

1870

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1871

ext = extract_embedded_config(embed_webpage, video_id)

1872

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1873

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1874

if not playable_in_embed:

1875

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1876

playable_in_embed = ''

1877

else:

1878

playable_in_embed = playable_in_embed.group('playableinEmbed')

1879

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1880

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1881

if playable_in_embed == 'false':

1882

'''

1883

# TODO apply this patch when Support for Python 2.6(!) and above drops

1884

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1885

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1886

'''

1887

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1888

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1889

age_gate = False

1890

# Try looking directly into the video webpage

1891

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1892

if ytplayer_config:

1893

args = ytplayer_config.get("args")

1894

if args is not None:

1895

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1896

# Convert to the same format returned by compat_parse_qs

1897

video_info = dict((k, [v]) for k, v in args.items())

1898

add_dash_mpd(video_info)

1899

# Rental video is not rented but preview is available (e.g.

1900

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1901

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1902

if not video_info and args.get('ypc_vid'):

1903

return self.url_result(

1904

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1905

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1906

is_live = True

1907

if not player_response:

1908

player_response = extract_player_response(args.get('player_response'), video_id)

1909

elif not player_response:

1910

player_response = ytplayer_config

1911

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1912

add_dash_mpd_pr(player_response)

1913

else:

1914

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1915

else:

1916

data = compat_urllib_parse_urlencode({

1917

'video_id': video_id,

1918

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1919

'sts': self._search_regex(

1920

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1921

})

1922

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1923

try:

1924

video_info_webpage = self._download_webpage(

1925

video_info_url, video_id,

1926

note='Refetching age-gated info webpage',

1927

errnote='unable to download video info webpage')

1928

except ExtractorError:

1929

video_info_webpage = None

1930

if video_info_webpage:

1931

video_info = compat_parse_qs(video_info_webpage)

1932

pl_response = video_info.get('player_response', [None])[0]

1933

player_response = extract_player_response(pl_response, video_id)

1934

add_dash_mpd(video_info)

1935

view_count = extract_view_count(video_info)

1936

else:

1937

age_gate = False

1938

# Try looking directly into the video webpage

1939

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1940

args = ytplayer_config.get("args")

1941

if args is not None:

1942

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1943

# Convert to the same format returned by compat_parse_qs

1944

video_info = dict((k, [v]) for k, v in args.items())

1945

add_dash_mpd(video_info)

1946

# Rental video is not rented but preview is available (e.g.

1947

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1948

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1949

if not video_info and args.get('ypc_vid'):

1950

return self.url_result(

1951

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1952

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1953

is_live = True

1954

if not player_response:

1955

player_response = extract_player_response(args.get('player_response'), video_id)

1956

elif not player_response:

1957

player_response = ytplayer_config

1958

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1959

add_dash_mpd_pr(player_response)

1960

1961

def extract_unavailable_message():

1962

messages = []

1963

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1964

msg = self._html_search_regex(

1965

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1966

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1971

1972

if not video_info and not player_response:

1973

unavailable_message = extract_unavailable_message()

1974

if not unavailable_message:

1975

unavailable_message = 'Unable to extract video data'

1976

raise ExtractorError(

1977

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1978

1979

if not isinstance(video_info, dict):

1980

video_info = {}

1981

1982

video_details = try_get(

1983

player_response, lambda x: x['videoDetails'], dict) or {}

1984

1985

microformat = try_get(

1986

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1987

1988

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1989

if not video_title:

1990

self._downloader.report_warning('Unable to extract video title')

1991

video_title = '_'

1992

1993

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1994

if video_description:

1995

1996

def replace_url(m):

1997

redir_url = compat_urlparse.urljoin(url, m.group(1))

1998

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

1999

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

2000

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

2007

<a\s+

2008

(?:[a-zA-Z-]+="[^"]*"\s+)*?

2009

(?:title|href)="([^"]+)"\s+

2010

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

2015

video_description = clean_html(video_description)

2016

else:

2017

video_description = video_details.get('shortDescription')

2018

if video_description is None:

2019

video_description = self._html_search_meta('description', video_webpage)

2020

2021

if not smuggled_data.get('force_singlefeed', False):

2022

if not self._downloader.params.get('noplaylist'):

2023

multifeed_metadata_list = try_get(

2024

player_response,

2025

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

2026

compat_str) or try_get(

2027

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

2028

if multifeed_metadata_list:

2029

entries = []

2030

feed_ids = []

2031

for feed in multifeed_metadata_list.split(','):

2032

# Unquote should take place before split on comma (,) since textual

2033

# fields may contain comma as well (see

2034

# https://github.com/ytdl-org/youtube-dl/issues/8536)

2035

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

2036

2037

def feed_entry(name):

2038

return try_get(feed_data, lambda x: x[name][0], compat_str)

2039

2040

feed_id = feed_entry('id')

2041

if not feed_id:

2042

continue

2043

feed_title = feed_entry('title')

2044

title = video_title

2045

if feed_title:

2046

title += ' (%s)' % feed_title

2047

entries.append({

2048

'_type': 'url_transparent',

2049

'ie_key': 'Youtube',

2050

'url': smuggle_url(

2051

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

2052

{'force_singlefeed': True}),

2053

'title': title,

2054

})

2055

feed_ids.append(feed_id)

2056

self.to_screen(

2057

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

2058

% (', '.join(feed_ids), video_id))

2059

return self.playlist_result(entries, video_id, video_title, video_description)

2060

else:

2061

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2062

2063

if view_count is None:

2064

view_count = extract_view_count(video_info)

2065

if view_count is None and video_details:

2066

view_count = int_or_none(video_details.get('viewCount'))

2067

if view_count is None and microformat:

2068

view_count = int_or_none(microformat.get('viewCount'))

2069

2070

if is_live is None:

2071

is_live = bool_or_none(video_details.get('isLive'))

2072

2073

has_live_chat_replay = False

2074

if not is_live:

2075

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

2076

try:

2077

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

2078

has_live_chat_replay = True

2079

except (KeyError, IndexError, TypeError):

2080

pass

2081

2082

# Check for "rental" videos

2083

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

2084

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

2085

2086

def _extract_filesize(media_url):

2087

return int_or_none(self._search_regex(

2088

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

2089

2090

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

2091

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

2092

2093

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

2094

self.report_rtmp_download()

2095

formats = [{

2096

'format_id': '_rtmp',

2097

'protocol': 'rtmp',

2098

'url': video_info['conn'][0],

2099

'player_url': player_url,

2100

}]

2101

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

2102

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

2103

if 'rtmpe%3Dyes' in encoded_url_map:

2104

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

2105

formats = []

2106

formats_spec = {}

2107

fmt_list = video_info.get('fmt_list', [''])[0]

2108

if fmt_list:

2109

for fmt in fmt_list.split(','):

2110

spec = fmt.split('/')

2111

if len(spec) > 1:

2112

width_height = spec[1].split('x')

2113

if len(width_height) == 2:

2114

formats_spec[spec[0]] = {

2115

'resolution': spec[1],

2116

'width': int_or_none(width_height[0]),

2117

'height': int_or_none(width_height[1]),

2118

}

2119

for fmt in streaming_formats:

2120

itag = str_or_none(fmt.get('itag'))

2121

if not itag:

2122

continue

2123

quality = fmt.get('quality')

2124

quality_label = fmt.get('qualityLabel') or quality

2125

formats_spec[itag] = {

2126

'asr': int_or_none(fmt.get('audioSampleRate')),

2127

'filesize': int_or_none(fmt.get('contentLength')),

2128

'format_note': quality_label,

2129

'fps': int_or_none(fmt.get('fps')),

2130

'height': int_or_none(fmt.get('height')),

2131

# bitrate for itag 43 is always 2147483647

2132

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

2133

'width': int_or_none(fmt.get('width')),

2134

}

2135

2136

for fmt in streaming_formats:

2137

if fmt.get('drmFamilies') or fmt.get('drm_families'):

2138

continue

2139

url = url_or_none(fmt.get('url'))

2140

2141

if not url:

2142

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

2143

if not cipher:

2144

continue

2145

url_data = compat_parse_qs(cipher)

2146

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

2152

2153

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

2154

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2159

if not format_id:

2160

continue

2161

format_id = compat_str(format_id)

2162

2163

if cipher:

2164

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2165

ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'

2166

jsplayer_url_json = self._search_regex(

2167

ASSETS_RE,

2168

embed_webpage if age_gate else video_webpage,

2169

'JS player URL (1)', default=None)

2170

if not jsplayer_url_json and not age_gate:

2171

# We need the embed website after all

2172

if embed_webpage is None:

2173

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2174

embed_webpage = self._download_webpage(

2175

embed_url, video_id, 'Downloading embed webpage')

2176

jsplayer_url_json = self._search_regex(

2177

ASSETS_RE, embed_webpage, 'JS player URL')

2178

2179

player_url = json.loads(jsplayer_url_json)

2180

if player_url is None:

2181

player_url_json = self._search_regex(

2182

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2183

video_webpage, 'age gate player URL')

2184

player_url = json.loads(player_url_json)

2185

2186

if 'sig' in url_data:

2187

url += '&signature=' + url_data['sig'][0]

2188

elif 's' in url_data:

2189

encrypted_sig = url_data['s'][0]

2190

2191

if self._downloader.params.get('verbose'):

2192

if player_url is None:

2193

player_desc = 'unknown'

2194

else:

2195

player_type, player_version = self._extract_player_info(player_url)

2196

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2197

parts_sizes = self._signature_cache_id(encrypted_sig)

2198

self.to_screen('{%s} signature length %s, %s' %

2199

(format_id, parts_sizes, player_desc))

2200

2201

signature = self._decrypt_signature(

2202

encrypted_sig, video_id, player_url, age_gate)

2203

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2204

url += '&%s=%s' % (sp, signature)

2205

if 'ratebypass' not in url:

2206

url += '&ratebypass=yes'

2207

2208

dct = {

2209

'format_id': format_id,

2210

'url': url,

2211

'player_url': player_url,

2212

}

2213

if format_id in self._formats:

2214

dct.update(self._formats[format_id])

2215

if format_id in formats_spec:

2216

dct.update(formats_spec[format_id])

2217

2218

# Some itags are not included in DASH manifest thus corresponding formats will

2219

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2220

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2221

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2222

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2223

2224

if width is None:

2225

width = int_or_none(fmt.get('width'))

2226

if height is None:

2227

height = int_or_none(fmt.get('height'))

2228

2229

filesize = int_or_none(url_data.get(

2230

'clen', [None])[0]) or _extract_filesize(url)

2231

2232

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2233

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2234

2235

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2236

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2237

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2238

2239

more_fields = {

2240

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2246

}

2247

for key, value in more_fields.items():

2248

if value:

2249

dct[key] = value

2250

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2251

if type_:

2252

type_split = type_.split(';')

2253

kind_ext = type_split[0].split('/')

2254

if len(kind_ext) == 2:

2255

kind, _ = kind_ext

2256

dct['ext'] = mimetype2ext(type_split[0])

2257

if kind in ('audio', 'video'):

2258

codecs = None

2259

for mobj in re.finditer(

2260

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2261

if mobj.group('key') == 'codecs':

2262

codecs = mobj.group('val')

2263

break

2264

if codecs:

2265

dct.update(parse_codecs(codecs))

2266

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2267

dct['downloader_options'] = {

2268

# Youtube throttles chunks >~10M

2269

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2277

compat_str))

2278

or url_or_none(try_get(

2279

video_info, lambda x: x['hlsvp'][0], compat_str)))

2280

if manifest_url:

2281

formats = []

2282

m3u8_formats = self._extract_m3u8_formats(

2283

manifest_url, video_id, 'mp4', fatal=False)

2284

for a_format in m3u8_formats:

2285

itag = self._search_regex(

2286

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2287

if itag:

2288

a_format['format_id'] = itag

2289

if itag in self._formats:

2290

dct = self._formats[itag].copy()

2291

dct.update(a_format)

2292

a_format = dct

2293

a_format['player_url'] = player_url

2294

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2295

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2296

if self._downloader.params.get('youtube_include_hls_manifest', True):

2297

formats.append(a_format)

2298

else:

2299

error_message = extract_unavailable_message()

2300

if not error_message:

2301

error_message = clean_html(try_get(

2302

player_response, lambda x: x['playabilityStatus']['reason'],

2303

compat_str))

2304

if not error_message:

2305

error_message = clean_html(

2306

try_get(video_info, lambda x: x['reason'][0], compat_str))

2307

if error_message:

2308

raise ExtractorError(error_message, expected=True)

2309

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2310

2311

# uploader

2312

video_uploader = try_get(

2313

video_info, lambda x: x['author'][0],

2314

compat_str) or str_or_none(video_details.get('author'))

2315

if video_uploader:

2316

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2317

else:

2318

self._downloader.report_warning('unable to extract uploader name')

2319

2320

# uploader_id

2321

video_uploader_id = None

2322

video_uploader_url = None

2323

mobj = re.search(

2324

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2325

video_webpage)

2326

if mobj is not None:

2327

video_uploader_id = mobj.group('uploader_id')

2328

video_uploader_url = mobj.group('uploader_url')

2329

else:

2330

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2331

if owner_profile_url:

2332

video_uploader_id = self._search_regex(

2333

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2334

default=None)

2335

video_uploader_url = owner_profile_url

2336

2337

channel_id = (

2338

str_or_none(video_details.get('channelId'))

2339

or self._html_search_meta(

2340

'channelId', video_webpage, 'channel id', default=None)

2341

or self._search_regex(

2342

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2343

video_webpage, 'channel id', default=None, group='id'))

2344

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2345

2346

thumbnails = []

2347

thumbnails_list = try_get(

2348

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2349

for t in thumbnails_list:

2350

if not isinstance(t, dict):

2351

continue

2352

thumbnail_url = url_or_none(t.get('url'))

2353

if not thumbnail_url:

2354

continue

2355

thumbnails.append({

2356

'url': thumbnail_url,

2357

'width': int_or_none(t.get('width')),

2358

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2363

# We try first to get a high quality image:

2364

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2365

video_webpage, re.DOTALL)

2366

if m_thumb is not None:

2367

video_thumbnail = m_thumb.group(1)

2368

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2369

if thumbnail_url:

2370

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2371

if video_thumbnail:

2372

thumbnails.append({'url': video_thumbnail})

2373

2374

# upload date

2375

upload_date = self._html_search_meta(

2376

'datePublished', video_webpage, 'upload date', default=None)

2377

if not upload_date:

2378

upload_date = self._search_regex(

2379

[r'(?s)id="eow-date.*?>(.*?)</span>',

2380

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2381

video_webpage, 'upload date', default=None)

2382

if not upload_date:

2383

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2384

upload_date = unified_strdate(upload_date)

2385

2386

video_license = self._html_search_regex(

2387

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2388

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2401

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2409

video_creator = clean_html(m_music.group('creator'))

2410

else:

2411

video_alt_title = video_creator = None

2412

2413

def extract_meta(field):

2414

return self._html_search_regex(

2415

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2416

video_webpage, field, default=None)

2417

2418

track = extract_meta('Song')

2419

artist = extract_meta('Artist')

2420

album = extract_meta('Album')

2421

2422

# Youtube Music Auto-generated description

2423

release_date = release_year = None

2424

if video_description:

2425

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2426

if mobj:

2427

if not track:

2428

track = mobj.group('track').strip()

2429

if not artist:

2430

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2431

if not album:

2432

album = mobj.group('album'.strip())

2433

release_year = mobj.group('release_year')

2434

release_date = mobj.group('release_date')

2435

if release_date:

2436

release_date = release_date.replace('-', '')

2437

if not release_year:

2438

release_year = int(release_date[:4])

2439

if release_year:

2440

release_year = int(release_year)

2441

2442

yt_initial = self._get_yt_initial_data(video_id, video_webpage)

2443

if yt_initial:

2444

music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)

2445

if len(music_metadata):

2446

album = music_metadata[0].get('album')

2447

artist = music_metadata[0].get('artist')

2448

track = music_metadata[0].get('track')

2449

2450

m_episode = re.search(

2451

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2452

video_webpage)

2453

if m_episode:

2454

series = unescapeHTML(m_episode.group('series'))

2455

season_number = int(m_episode.group('season'))

2456

episode_number = int(m_episode.group('episode'))

2457

else:

2458

series = season_number = episode_number = None

2459

2460

m_cat_container = self._search_regex(

2461

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2462

video_webpage, 'categories', default=None)

2463

category = None

2464

if m_cat_container:

2465

category = self._html_search_regex(

2466

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2471

video_categories = None if category is None else [category]

2472

2473

video_tags = [

2474

unescapeHTML(m.group('content'))

2475

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2476

if not video_tags:

2477

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2478

2479

def _extract_count(count_name):

2480

return str_to_int(self._search_regex(

2481

r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'

2482

% re.escape(count_name),

2483

video_webpage, count_name, default=None))

2484

2485

like_count = _extract_count('like')

2486

dislike_count = _extract_count('dislike')

2487

2488

if view_count is None:

2489

view_count = str_to_int(self._search_regex(

2490

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2491

'view count', default=None))

2492

2493

average_rating = (

2494

float_or_none(video_details.get('averageRating'))

2495

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2496

2497

# subtitles

2498

video_subtitles = self.extract_subtitles(

2499

video_id, video_webpage, has_live_chat_replay)

2500

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2501

2502

video_duration = try_get(

2503

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2504

if not video_duration:

2505

video_duration = int_or_none(video_details.get('lengthSeconds'))

2506

if not video_duration:

2507

video_duration = parse_duration(self._html_search_meta(

2508

'duration', video_webpage, 'video duration'))

2509

2510

# Get Subscriber Count of channel

2511

subscriber_count = parse_count(self._search_regex(

2512

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2520

if self._downloader.params.get('writeannotations', False):

2521

xsrf_token = self._search_regex(

2522

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',

2523

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2524

invideo_url = try_get(

2525

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2526

if xsrf_token and invideo_url:

2527

xsrf_field_name = self._search_regex(

2528

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2529

video_webpage, 'xsrf field name',

2530

group='xsrf_field_name', default='session_token')

2531

video_annotations = self._download_webpage(

2532

self._proto_relative_url(invideo_url),

2533

video_id, note='Downloading annotations',

2534

errnote='Unable to download video annotations', fatal=False,

2535

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2536

2537

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2538

2539

# Look for the DASH manifest

2540

if self._downloader.params.get('youtube_include_dash_manifest', True):

2541

dash_mpd_fatal = True

2542

for mpd_url in dash_mpds:

2543

dash_formats = {}

2544

try:

2545

def decrypt_sig(mobj):

2546

s = mobj.group(1)

2547

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2548

return '/signature/%s' % dec_s

2549

2550

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2551

2552

for df in self._extract_mpd_formats(

2553

mpd_url, video_id, fatal=dash_mpd_fatal,

2554

formats_dict=self._formats):

2555

if not df.get('filesize'):

2556

df['filesize'] = _extract_filesize(df['url'])

2557

# Do not overwrite DASH format found in some previous DASH manifest

2558

if df['format_id'] not in dash_formats:

2559

dash_formats[df['format_id']] = df

2560

# Additional DASH manifests may end up in HTTP Error 403 therefore

2561

# allow them to fail without bug report message if we already have

2562

# some DASH manifest succeeded. This is temporary workaround to reduce

2563

# burst of bug reports until we figure out the reason and whether it

2564

# can be fixed at all.

2565

dash_mpd_fatal = False

2566

except (ExtractorError, KeyError) as e:

2567

self.report_warning(

2568

'Skipping DASH manifest: %r' % e, video_id)

2569

if dash_formats:

2570

# Remove the formats we found through non-DASH, they

2571

# contain less info and it can be wrong, because we use

2572

# fixed values (for example the resolution). See

2573

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2574

# example.

2575

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2576

formats.extend(dash_formats.values())

2577

2578

# Check for malformed aspect ratio

2579

stretched_m = re.search(

2580

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2581

video_webpage)

2582

if stretched_m:

2583

w = float(stretched_m.group('w'))

2584

h = float(stretched_m.group('h'))

2585

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2586

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2591

f['stretched_ratio'] = ratio

2592

2593

if not formats:

2594

if 'reason' in video_info:

2595

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2596

regions_allowed = self._html_search_meta(

2597

'regionsAllowed', video_webpage, default=None)

2598

countries = regions_allowed.split(',') if regions_allowed else None

2599

self.raise_geo_restricted(

2600

msg=video_info['reason'][0], countries=countries)

2601

reason = video_info['reason'][0]

2602

if 'Invalid parameters' in reason:

2603

unavailable_message = extract_unavailable_message()

2604

if unavailable_message:

2605

reason = unavailable_message

2606

raise ExtractorError(

2607

'YouTube said: %s' % reason,

2608

expected=True, video_id=video_id)

2609

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2610

raise ExtractorError('This video is DRM protected.', expected=True)

2611

2612

self._sort_formats(formats)

2613

2614

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2619

'uploader_id': video_uploader_id,

2620

'uploader_url': video_uploader_url,

2621

'channel_id': channel_id,

2622

'channel_url': channel_url,

2623

'upload_date': upload_date,

2624

'license': video_license,

2625

'creator': video_creator or artist,

2626

'title': video_title,

2627

'alt_title': video_alt_title or track,

2628

'thumbnails': thumbnails,

2629

'description': video_description,

2630

'categories': video_categories,

2631

'tags': video_tags,

2632

'subtitles': video_subtitles,

2633

'automatic_captions': automatic_captions,

2634

'duration': video_duration,

2635

'age_limit': 18 if age_gate else 0,

2636

'annotations': video_annotations,

2637

'chapters': chapters,

2638

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2639

'view_count': view_count,

2640

'like_count': like_count,

2641

'dislike_count': dislike_count,

2642

'average_rating': average_rating,

2643

'formats': formats,

2644

'is_live': is_live,

2645

'start_time': start_time,

2646

'end_time': end_time,

2647

'series': series,

2648

'season_number': season_number,

2649

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2654

'release_year': release_year,

2655

'subscriber_count': subscriber_count,

}

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

2660

IE_DESC = 'YouTube.com playlists'

2661

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/

(?:

(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))

2672

\? (?:.*?[&;])*? (?:p|a|list)=

2673

| p/

2674

)|

2675

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

2676

)

2677

(

2678

(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}

2679

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

(%(playlist_id)s)

)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

2686

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'

2687

_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'

2688

_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'

2689

IE_NAME = 'youtube:playlist'

2690

_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'

2691

_YTM_CHANNEL_INFO = {

2692

'uploader': 'Youtube Music',

2693

'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"

2694

'uploader_url': 'https://www.youtube.com/music'

2695

}

2696

_TESTS = [{

2697

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2698

'info_dict': {

2699

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2700

'uploader': 'Sergey M.',

2701

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2702

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2707

'info_dict': {

2708

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2709

'uploader': 'Sergey M.',

2710

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2711

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2716

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2717

'info_dict': {

2718

'title': '29C3: Not my department',

2719

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2720

'uploader': 'Christiaan008',

2721

'uploader_id': 'ChRiStIaAn008',

2722

},

2723

'playlist_count': 96,

2724

}, {

2725

'note': 'issue #673',

2726

'url': 'PLBB231211A4F62143',

2727

'info_dict': {

2728

'title': '[OLD]Team Fortress 2 (Class-based LP)',

2729

'id': 'PLBB231211A4F62143',

2730

'uploader': 'Wickydoo',

2731

'uploader_id': 'Wickydoo',

2732

},

2733

'playlist_mincount': 26,

2734

}, {

2735

'note': 'Large playlist',

2736

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2737

'info_dict': {

2738

'title': 'Uploads from Cauchemar',

2739

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2740

'uploader': 'Cauchemar',

2741

'uploader_id': 'Cauchemar89',

2742

},

2743

'playlist_mincount': 799,

2744

}, {

2745

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2746

'info_dict': {

2747

'title': 'YDL_safe_search',

2748

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2749

},

2750

'playlist_count': 2,

2751

'skip': 'This playlist is private',

2752

}, {

2753

'note': 'embedded',

2754

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

2759

'uploader': 'milan',

2760

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

2761

}

2762

}, {

2763

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2764

'playlist_mincount': 485,

2765

'info_dict': {

2766

'title': '2018 Chinese New Singles (11/6 updated)',

2767

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2768

'uploader': 'LBK',

2769

'uploader_id': 'sdragonfang',

2770

}

2771

}, {

2772

'note': 'Embedded SWF player',

2773

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

2778

},

2779

'skip': 'This playlist does not exist',

2780

}, {

2781

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2782

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2783

'info_dict': {

2784

'title': 'Uploads from Interstellar Movie',

2785

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2786

'uploader': 'Interstellar Movie',

2787

'uploader_id': 'InterstellarMovie1',

2788

},

2789

'playlist_mincount': 21,

2790

}, {

2791

# Playlist URL that does not actually serve a playlist

2792

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2797

'uploader': 'STREEM',

2798

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2799

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2800

'upload_date': '20150526',

2801

'license': 'Standard YouTube License',

2802

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2803

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2808

},

2809

'params': {

2810

'skip_download': True,

2811

},

2812

'skip': 'This video is not available.',

2813

'add_ie': [YoutubeIE.ie_key()],

2814

}, {

2815

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

2820

'uploader': 'Backus-Page House Museum',

2821

'uploader_id': 'backuspagemuseum',

2822

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

2823

'upload_date': '20161008',

2824

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

2825

'categories': ['Nonprofits & Activism'],

2826

'tags': list,

2827

'like_count': int,

2828

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

2833

},

2834

}, {

2835

# https://github.com/ytdl-org/youtube-dl/issues/21844

2836

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2837

'info_dict': {

2838

'title': 'Data Analysis with Dr Mike Pound',

2839

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2840

'uploader_id': 'Computerphile',

2841

'uploader': 'Computerphile',

2842

},

2843

'playlist_mincount': 11,

2844

}, {

2845

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

2846

'only_matching': True,

2847

}, {

2848

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

2849

'only_matching': True,

2850

}, {

2851

# music album playlist

2852

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

2853

'only_matching': True,

2854

}, {

2855

'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',

2856

'only_matching': True,

2857

}, {

2858

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2859

'only_matching': True,

2860

}]

2861

2862

def _real_initialize(self):

2863

self._login()

2864

2865

def extract_videos_from_page(self, page):

ids_in_page = []

titles_in_page = []

for item in re.findall(

2870

r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):

2871

attrs = extract_attributes(item)

2872

video_id = attrs['data-video-id']

2873

video_title = unescapeHTML(attrs.get('data-title'))

2874

if video_title:

2875

video_title = video_title.strip()

2876

ids_in_page.append(video_id)

2877

titles_in_page.append(video_title)

2878

2879

# Fallback with old _VIDEO_RE

2880

self.extract_videos_from_page_impl(

2881

self._VIDEO_RE, page, ids_in_page, titles_in_page)

2882

2883

# Relaxed fallbacks

2884

self.extract_videos_from_page_impl(

2885

r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,

2886

ids_in_page, titles_in_page)

2887

self.extract_videos_from_page_impl(

2888

r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,

2889

ids_in_page, titles_in_page)

2890

2891

return zip(ids_in_page, titles_in_page)

2892

2893

def _extract_mix_ids_from_yt_initial(self, yt_initial):

2894

ids = []

2895

playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)

2896

if playlist_contents:

2897

for item in playlist_contents:

2898

videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)

if videoId:

ids.append(videoId)

return ids

def _extract_mix(self, playlist_id):

2904

# The mixes are generated from a single video

2905

# the id of the playlist is just 'RD' + video_id

2906

ids = []

2907

yt_initial = None

2908

last_id = playlist_id[-11:]

2909

for n in itertools.count(1):

2910

url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

2911

webpage = self._download_webpage(

2912

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

2913

new_ids = orderedSet(re.findall(

2914

r'''(?xs)data-video-username=".*?".*?

2915

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

2916

webpage))

2917

2918

# if no ids in html of page, try using embedded json

2919

if (len(new_ids) == 0):

2920

yt_initial = self._get_yt_initial_data(playlist_id, webpage)

2921

if yt_initial:

2922

new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)

2923

2924

# Fetch new pages until all the videos are repeated, it seems that

2925

# there are always 51 unique videos.

2926

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

2933

2934

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

2935

title_span = (

2936

search_title('playlist-title')

2937

or search_title('title long-title')

2938

or search_title('title'))

2939

title = clean_html(title_span)

2940

2941

if not title:

2942

title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)

2943

2944

return self.playlist_result(url_results, playlist_id, title)

2945

2946

def _extract_playlist(self, playlist_id):

2947

url = self._TEMPLATE_URL % playlist_id

2948

page = self._download_webpage(url, playlist_id)

2949

2950

# the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)

2951

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2952

match = match.strip()

2953

# Check if the playlist exists or is private

2954

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2955

if mobj:

2956

reason = mobj.group('reason')

2957

message = 'This playlist %s' % reason

2958

if 'private' in reason:

2959

message += ', use --username or --netrc to access it'

2960

message += '.'

2961

raise ExtractorError(message, expected=True)

2962

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2963

raise ExtractorError(

2964

'Invalid parameters. Maybe URL is incorrect.',

2965

expected=True)

2966

elif re.match(r'[^<]*Choose your language[^<]*', match):

2967

continue

2968

else:

2969

self.report_warning('Youtube gives an alert message: ' + match)

2970

2971

playlist_title = self._html_search_regex(

2972

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2973

page, 'title', default=None)

2974

2975

_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='

2976

uploader = self._html_search_regex(

2977

r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,

2978

page, 'uploader', default=None)

2979

mobj = re.search(

2980

r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,

2981

page)

2982

if mobj:

2983

uploader_id = mobj.group('uploader_id')

2984

uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))

2985

else:

2986

uploader_id = uploader_url = None

has_videos = True

if not playlist_title:

2991

try:

2992

# Some playlist URLs don't actually serve a playlist (e.g.

2993

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2994

next(self._entries(page, playlist_id))

2995

except StopIteration:

2996

has_videos = False

2997

2998

playlist = self.playlist_result(

2999

self._entries(page, playlist_id), playlist_id, playlist_title)

3000

playlist.update({

3001

'uploader': uploader,

3002

'uploader_id': uploader_id,

3003

'uploader_url': uploader_url,

3004

})

3005

if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):

3006

playlist.update(self._YTM_CHANNEL_INFO)

3007

3008

return has_videos, playlist

3009

3010

def _check_download_just_video(self, url, playlist_id):

3011

# Check if it's a video-specific URL

3012

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3013

video_id = query_dict.get('v', [None])[0] or self._search_regex(

3014

r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,

3015

'video id', default=None)

3016

if video_id:

3017

if self._downloader.params.get('noplaylist'):

3018

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3019

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

3020

else:

3021

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3022

return video_id, None

3023

return None, None

3024

3025

def _real_extract(self, url):

3026

# Extract playlist id

3027

mobj = re.match(self._VALID_URL, url)

3028

if mobj is None:

3029

raise ExtractorError('Invalid URL: %s' % url)

3030

playlist_id = mobj.group(1) or mobj.group(2)

3031

3032

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

3037

if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):

3038

# Mixes require a custom extraction process,

3039

# Youtube Music playlists act like normal playlists (with randomized order)

3040

return self._extract_mix(playlist_id)

3041

3042

has_videos, playlist = self._extract_playlist(playlist_id)

3043

if has_videos or not video_id:

3044

return playlist

3045

3046

# Some playlist URLs don't actually serve a playlist (see

3047

# https://github.com/ytdl-org/youtube-dl/issues/10537).

3048

# Fallback to plain video extraction if there is a video id

3049

# along with playlist id.

3050

return self.url_result(video_id, 'Youtube', video_id=video_id)

3051

3052

3053

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

3054

IE_DESC = 'YouTube.com channels'

3055

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'

3056

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

3057

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

3058

IE_NAME = 'youtube:channel'

3059

_TESTS = [{

3060

'note': 'paginated channel',

3061

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

3062

'playlist_mincount': 91,

3063

'info_dict': {

3064

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

3065

'title': 'Uploads from lex will',

3066

'uploader': 'lex will',

3067

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3068

}

3069

}, {

3070

'note': 'Age restricted channel',

3071

# from https://www.youtube.com/user/DeusExOfficial

3072

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

3073

'playlist_mincount': 64,

3074

'info_dict': {

3075

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

3076

'title': 'Uploads from Deus Ex',

3077

'uploader': 'Deus Ex',

3078

'uploader_id': 'DeusExOfficial',

3079

},

3080

}, {

3081

'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',

3082

'only_matching': True,

3083

}, {

3084

'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',

3085

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3090

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

3091

else super(YoutubeChannelIE, cls).suitable(url))

3092

3093

def _build_template_url(self, url, channel_id):

3094

return self._TEMPLATE_URL % channel_id

3095

3096

def _real_extract(self, url):

3097

channel_id = self._match_id(url)

3098

3099

url = self._build_template_url(url, channel_id)

3100

3101

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

3102

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

3103

# otherwise fallback on channel by page extraction

3104

channel_page = self._download_webpage(

3105

url + '?view=57', channel_id,

3106

'Downloading channel page', fatal=False)

3107

if channel_page is False:

3108

channel_playlist_id = False

3109

else:

3110

channel_playlist_id = self._html_search_meta(

3111

'channelId', channel_page, 'channel id', default=None)

3112

if not channel_playlist_id:

3113

channel_url = self._html_search_meta(

3114

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

3115

channel_page, 'channel url', default=None)

3116

if channel_url:

3117

channel_playlist_id = self._search_regex(

3118

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

3119

channel_url, 'channel id', default=None)

3120

if channel_playlist_id and channel_playlist_id.startswith('UC'):

3121

playlist_id = 'UU' + channel_playlist_id[2:]

3122

return self.url_result(

3123

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

3124

3125

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

3126

autogenerated = re.search(r'''(?x)

3127

class="[^"]*?(?:

3128

channel-header-autogenerated-label|

3129

yt-channel-title-autogenerated

3130

)[^"]*"''', channel_page) is not None

3131

3132

if autogenerated:

3133

# The videos are contained in a single page

3134

# the ajax pages can't be used, they are empty

3135

entries = [

3136

self.url_result(

3137

video_id, 'Youtube', video_id=video_id,

3138

video_title=video_title)

3139

for video_id, video_title in self.extract_videos_from_page(channel_page)]

3140

return self.playlist_result(entries, channel_id)

3141

3142

try:

3143

next(self._entries(channel_page, channel_id))

3144

except StopIteration:

3145

alert_message = self._html_search_regex(

3146

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

3147

channel_page, 'alert', default=None, group='alert')

3148

if alert_message:

3149

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

3150

3151

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

3152

3153

3154

class YoutubeUserIE(YoutubeChannelIE):

3155

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

3156

3157

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

3158

IE_NAME = 'youtube:user'

3159

3160

_TESTS = [{

3161

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

3162

'playlist_mincount': 320,

3163

'info_dict': {

3164

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

3165

'title': 'Uploads from The Linux Foundation',

3166

'uploader': 'The Linux Foundation',

3167

'uploader_id': 'TheLinuxFoundation',

3168

}

3169

}, {

3170

# Only available via https://www.youtube.com/c/12minuteathlete/videos

3171

# but not https://www.youtube.com/user/12minuteathlete/videos

3172

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

3173

'playlist_mincount': 249,

3174

'info_dict': {

3175

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

3176

'title': 'Uploads from 12 Minute Athlete',

3177

'uploader': '12 Minute Athlete',

3178

'uploader_id': 'the12minuteathlete',

3179

}

3180

}, {

3181

'url': 'ytuser:phihag',

3182

'only_matching': True,

3183

}, {

3184

'url': 'https://www.youtube.com/c/gametrailers',

3185

'only_matching': True,

3186

}, {

3187

'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',

3188

'only_matching': True,

3189

}, {

3190

'url': 'https://www.youtube.com/gametrailers',

3191

'only_matching': True,

3192

}, {

3193

# This channel is not available, geo restricted to JP

3194

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

3195

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3200

# Don't return True if the url can be extracted with other youtube

3201

# extractor, the regex would is too permissive and it would match.

3202

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

3203

if any(ie.suitable(url) for ie in other_yt_ies):

3204

return False

3205

else:

3206

return super(YoutubeUserIE, cls).suitable(url)

3207

3208

def _build_template_url(self, url, channel_id):

3209

mobj = re.match(self._VALID_URL, url)

3210

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

3211

3212

3213

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

3214

IE_DESC = 'YouTube.com live streams'

3215

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

3216

IE_NAME = 'youtube:live'

3217

3218

_TESTS = [{

3219

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3224

'uploader': 'The Young Turks',

3225

'uploader_id': 'TheYoungTurks',

3226

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3227

'upload_date': '20150715',

3228

'license': 'Standard YouTube License',

3229

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3230

'categories': ['News & Politics'],

3231

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3232

'like_count': int,

3233

'dislike_count': int,

3234

},

3235

'params': {

3236

'skip_download': True,

3237

},

3238

}, {

3239

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3240

'only_matching': True,

3241

}, {

3242

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3243

'only_matching': True,

3244

}, {

3245

'url': 'https://www.youtube.com/TheYoungTurks/live',

3246

'only_matching': True,

3247

}]

3248

3249

def _real_extract(self, url):

3250

mobj = re.match(self._VALID_URL, url)

3251

channel_id = mobj.group('id')

3252

base_url = mobj.group('base_url')

3253

webpage = self._download_webpage(url, channel_id, fatal=False)

3254

if webpage:

3255

page_type = self._og_search_property(

3256

'type', webpage, 'page type', default='')

3257

video_id = self._html_search_meta(

3258

'videoId', webpage, 'video id', default=None)

3259

if page_type.startswith('video') and video_id and re.match(

3260

r'^[0-9A-Za-z_-]{11}$', video_id):

3261

return self.url_result(video_id, YoutubeIE.ie_key())

3262

return self.url_result(base_url)

3263

3264

3265

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

3266

IE_DESC = 'YouTube.com user/channel playlists'

3267

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'

3268

IE_NAME = 'youtube:playlists'

3269

3270

_TESTS = [{

3271

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

3272

'playlist_mincount': 4,

3273

'info_dict': {

3274

'id': 'ThirstForScience',

3275

'title': 'ThirstForScience',

3276

},

3277

}, {

3278

# with "Load more" button

3279

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

3280

'playlist_mincount': 70,

3281

'info_dict': {

3282

'id': 'igorkle1',

3283

'title': 'Игорь Клейнер',

3284

},

3285

}, {

3286

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

3287

'playlist_mincount': 17,

3288

'info_dict': {

3289

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

3290

'title': 'Chem Player',

},

'skip': 'Blocked',

}, {

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

3295

'only_matching': True,

}]

class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):

3300

IE_DESC = 'YouTube.com searches'

3301

# there doesn't appear to be a real limit, for example if you search for

3302

# 'python' you get more than 8.000.000 results

3303

_MAX_RESULTS = float('inf')

3304

IE_NAME = 'youtube:search'

3305

_SEARCH_KEY = 'ytsearch'

3306

_SEARCH_PARAMS = None

3307

_TESTS = []

3308

3309

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3320

data['params'] = self._SEARCH_PARAMS

3321

total = 0

3322

for page_num in itertools.count(1):

3323

search = self._download_json(

3324

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3325

video_id='query "%s"' % query,

3326

note='Downloading page %s' % page_num,

3327

errnote='Unable to download API page', fatal=False,

3328

data=json.dumps(data).encode('utf8'),

3329

headers={'content-type': 'application/json'})

3330

if not search:

3331

break

3332

slr_contents = try_get(

3333

search,

3334

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3335

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = try_get(

3340

slr_contents,

3341

lambda x: x[0]['itemSectionRenderer']['contents'],

list)

if not isr_contents:

break

for content in isr_contents:

3346

if not isinstance(content, dict):

3347

continue

3348

video = content.get('videoRenderer')

3349

if not isinstance(video, dict):

3350

continue

3351

video_id = video.get('videoId')

3352

if not video_id:

3353

continue

3354

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3355

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3356

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3357

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3358

view_count = int_or_none(self._search_regex(

3359

r'^(\d+)', re.sub(r'\s', '', view_count_text),

3360

'view count', default=None))

3361

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3362

total += 1

3363

yield {

3364

'_type': 'url_transparent',

3365

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3370

'duration': duration,

3371

'view_count': view_count,

3372

'uploader': uploader,

}

if total == n:

return

token = try_get(

slr_contents,

lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

compat_str)

if not token:

break

data['continuation'] = token

3383

3384

def _get_n_results(self, query, n):

3385

"""Get a specified number of results for a query"""

3386

return self.playlist_result(self._entries(query, n), query)

3387

3388

3389

class YoutubeSearchDateIE(YoutubeSearchIE):

3390

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3391

_SEARCH_KEY = 'ytsearchdate'

3392

IE_DESC = 'YouTube.com searches, newest videos first'

3393

_SEARCH_PARAMS = 'CAI%3D'

3394

3395

3396

class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):

3397

IE_DESC = 'YouTube.com search URLs'

3398

IE_NAME = 'youtube:search_url'

3399

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

3400

_TESTS = [{

3401

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3402

'playlist_mincount': 5,

3403

'info_dict': {

3404

'title': 'youtube-dl test video',

3405

}

3406

}, {

3407

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3408

'only_matching': True,

3409

}]

3410

3411

def _process_json_dict(self, obj, videos, c):

if "videoId" in obj:

videos.append(obj)

return

if "nextContinuationData" in obj:

3417

c["continuation"] = obj["nextContinuationData"]

3418

return

3419

3420

def _real_extract(self, url):

3421

mobj = re.match(self._VALID_URL, url)

3422

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

3423

webpage = self._download_webpage(url, query)

3424

return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)

3425

3426

3427

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

3428

IE_DESC = 'YouTube.com (multi-season) shows'

3429

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3430

IE_NAME = 'youtube:show'

3431

_TESTS = [{

3432

'url': 'https://www.youtube.com/show/airdisasters',

3433

'playlist_mincount': 5,

3434

'info_dict': {

3435

'id': 'airdisasters',

3436

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3441

playlist_id = self._match_id(url)

3442

return super(YoutubeShowIE, self)._real_extract(

3443

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3444

3445

3446

class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):

3447

"""

3448

Base class for feed extractors

3449

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3450

"""

3451

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3456

3457

def _real_initialize(self):

3458

self._login()

3459

3460

def _process_entries(self, entries, seen):

3461

new_info = []

3462

for v in entries:

3463

v_id = try_get(v, lambda x: x['videoId'])

if not v_id:

continue

have_video = False

for old in seen:

if old['videoId'] == v_id:

have_video = True

break

if not have_video:

new_info.append(v)

if not new_info:

return

seen.extend(new_info)

3480

for video in new_info:

3481

yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))

3482

3483

def _real_extract(self, url):

3484

page = self._download_webpage(

3485

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3486

self._PLAYLIST_TITLE)

3487

return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),

3488

playlist_title=self._PLAYLIST_TITLE)

3489

3490

3491

class YoutubeWatchLaterIE(YoutubePlaylistIE):

3492

IE_NAME = 'youtube:watchlater'

3493

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3494

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

3495

3496

_TESTS = [{

3497

'url': 'https://www.youtube.com/playlist?list=WL',

3498

'only_matching': True,

3499

}, {

3500

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

3501

'only_matching': True,

3502

}]

3503

3504

def _real_extract(self, url):

3505

_, video = self._check_download_just_video(url, 'WL')

3506

if video:

3507

return video

3508

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3513

IE_NAME = 'youtube:favorites'

3514

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

3515

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

3516

_LOGIN_REQUIRED = True

3517

3518

def _real_extract(self, url):

3519

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

3520

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

3521

return self.url_result(playlist_id, 'YoutubePlaylist')

3522

3523

3524

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3525

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3526

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

3527

_FEED_NAME = 'recommended'

3528

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3529

3530

3531

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3532

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

3533

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

3534

_FEED_NAME = 'subscriptions'

3535

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3536

3537

3538

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3539

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3540

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3541

_FEED_NAME = 'history'

3542

_PLAYLIST_TITLE = 'Youtube History'

3543

3544

3545

class YoutubeTruncatedURLIE(InfoExtractor):

3546

IE_NAME = 'youtube:truncated_url'

3547

IE_DESC = False # Do not list

3548

_VALID_URL = r'''(?x)

3549

(?:https?://)?

3550

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3551

(?:watch\?(?:

3552

feature=[a-z_]+|

3553

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3566

'only_matching': True,

3567

}, {

3568

'url': 'https://www.youtube.com/watch?',

3569

'only_matching': True,

3570

}, {

3571

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3572

'only_matching': True,

3573

}, {

3574

'url': 'https://www.youtube.com/watch?feature=foo',

3575

'only_matching': True,

3576

}, {

3577

'url': 'https://www.youtube.com/watch?hl=en-GB',

3578

'only_matching': True,

3579

}, {

3580

'url': 'https://www.youtube.com/watch?t=2372',

3581

'only_matching': True,

3582

}]

3583

3584

def _real_extract(self, url):

3585

raise ExtractorError(

3586

'Did you forget to quote the URL? Remember that & is a meta '

3587

'character in most shells, so you want to put the URL in quotes, '

3588

'like youtube-dl '

3589

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3590

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3595

IE_NAME = 'youtube:truncated_id'

3596

IE_DESC = False # Do not list

3597

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3598

3599

_TESTS = [{

3600

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3601

'only_matching': True,

3602

}]

3603

3604

def _real_extract(self, url):

3605

video_id = self._match_id(url)

3606

raise ExtractorError(

3607

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

3608

expected=True)