jfr.im git - yt-dlp.git/blame_incremental - youtube

Commit	Line	Data
	1	# coding: utf-8
	2
	3	from __future__ import unicode_literals
	4
	5
	6	import itertools
	7	import json
	8	import os.path
	9	import random
	10	import re
	11	import time
	12	import traceback
	13
	14	from .common import InfoExtractor, SearchInfoExtractor
	15	from ..jsinterp import JSInterpreter
	16	from ..swfinterp import SWFInterpreter
	17	from ..compat import (
	18	compat_chr,
	19	compat_HTTPError,
	20	compat_kwargs,
	21	compat_parse_qs,
	22	compat_urllib_parse_unquote,
	23	compat_urllib_parse_unquote_plus,
	24	compat_urllib_parse_urlencode,
	25	compat_urllib_parse_urlparse,
	26	compat_urlparse,
	27	compat_str,
	28	)
	29	from ..utils import (
	30	bool_or_none,
	31	clean_html,
	32	error_to_compat_str,
	33	extract_attributes,
	34	ExtractorError,
	35	float_or_none,
	36	get_element_by_attribute,
	37	get_element_by_id,
	38	int_or_none,
	39	js_to_json,
	40	mimetype2ext,
	41	orderedSet,
	42	parse_codecs,
	43	parse_count,
	44	parse_duration,
	45	remove_quotes,
	46	remove_start,
	47	smuggle_url,
	48	str_or_none,
	49	str_to_int,
	50	try_get,
	51	unescapeHTML,
	52	unified_strdate,
	53	unsmuggle_url,
	54	uppercase_escape,
	55	url_or_none,
	56	urlencode_postdata,
	57	)
	58
	59
	60	class YoutubeBaseInfoExtractor(InfoExtractor):
	61	"""Provide base functions for Youtube extractors"""
	62	_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
	63	_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
	64
	65	_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
	66	_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
	67	_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
	68
	69	_NETRC_MACHINE = 'youtube'
	70	# If True it will raise an error if no login info is provided
	71	_LOGIN_REQUIRED = False
	72
	73	_PLAYLIST_ID_RE = r'(?:PL\|LL\|EC\|UU\|FL\|RD\|UL\|TL\|PU\|OLAK5uy_)[0-9A-Za-z-_]{10,}'
	74	_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]\|ytInitialData)\W?=\W?({.*?});'
	75	_YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
	76
	77	_YOUTUBE_CLIENT_HEADERS = {
	78	'x-youtube-client-name': '1',
	79	'x-youtube-client-version': '1.20200609.04.02',
	80	}
	81
	82	def _set_language(self):
	83	self._set_cookie(
	84	'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
	85	# YouTube sets the expire time to about two months
	86	expire_time=time.time() + 2 * 30 * 24 * 3600)
	87
	88	def _ids_to_results(self, ids):
	89	return [
	90	self.url_result(vid_id, 'Youtube', video_id=vid_id)
	91	for vid_id in ids]
	92
	93	def _login(self):
	94	"""
	95	Attempt to log in to YouTube.
	96	True is returned if successful or skipped.
	97	False is returned if login failed.
	98
	99	If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
	100	"""
	101	username, password = self._get_login_info()
	102	# No authentication to be performed
	103	if username is None:
	104	if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
	105	raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
	106	if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
	107	self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
	108	return True
	109
	110	login_page = self._download_webpage(
	111	self._LOGIN_URL, None,
	112	note='Downloading login page',
	113	errnote='unable to fetch login page', fatal=False)
	114	if login_page is False:
	115	return
	116
	117	login_form = self._hidden_inputs(login_page)
	118
	119	def req(url, f_req, note, errnote):
	120	data = login_form.copy()
	121	data.update({
	122	'pstMsg': 1,
	123	'checkConnection': 'youtube',
	124	'checkedDomains': 'youtube',
	125	'hl': 'en',
	126	'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
	127	'f.req': json.dumps(f_req),
	128	'flowName': 'GlifWebSignIn',
	129	'flowEntry': 'ServiceLogin',
	130	# TODO: reverse actual botguard identifier generation algo
	131	'bgRequest': '["identifier",""]',
	132	})
	133	return self._download_json(
	134	url, None, note=note, errnote=errnote,
	135	transform_source=lambda s: re.sub(r'^[^[]*', '', s),
	136	fatal=False,
	137	data=urlencode_postdata(data), headers={
	138	'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
	139	'Google-Accounts-XSRF': 1,
	140	})
	141
	142	def warn(message):
	143	self._downloader.report_warning(message)
	144
	145	lookup_req = [
	146	username,
	147	None, [], None, 'US', None, None, 2, False, True,
	148	[
	149	None, None,
	150	[2, 1, None, 1,
	151	'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
	152	None, [], 4],
	153	1, [None, None, []], None, None, None, True
	154	],
	155	username,
	156	]
	157
	158	lookup_results = req(
	159	self._LOOKUP_URL, lookup_req,
	160	'Looking up account info', 'Unable to look up account info')
	161
	162	if lookup_results is False:
	163	return False
	164
	165	user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
	166	if not user_hash:
	167	warn('Unable to extract user hash')
	168	return False
	169
	170	challenge_req = [
	171	user_hash,
	172	None, 1, None, [1, None, None, None, [password, None, True]],
	173	[
	174	None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
	175	1, [None, None, []], None, None, None, True
	176	]]
	177
	178	challenge_results = req(
	179	self._CHALLENGE_URL, challenge_req,
	180	'Logging in', 'Unable to log in')
	181
	182	if challenge_results is False:
	183	return
	184
	185	login_res = try_get(challenge_results, lambda x: x[0][5], list)
	186	if login_res:
	187	login_msg = try_get(login_res, lambda x: x[5], compat_str)
	188	warn(
	189	'Unable to login: %s' % 'Invalid password'
	190	if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
	191	return False
	192
	193	res = try_get(challenge_results, lambda x: x[0][-1], list)
	194	if not res:
	195	warn('Unable to extract result entry')
	196	return False
	197
	198	login_challenge = try_get(res, lambda x: x[0][0], list)
	199	if login_challenge:
	200	challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
	201	if challenge_str == 'TWO_STEP_VERIFICATION':
	202	# SEND_SUCCESS - TFA code has been successfully sent to phone
	203	# QUOTA_EXCEEDED - reached the limit of TFA codes
	204	status = try_get(login_challenge, lambda x: x[5], compat_str)
	205	if status == 'QUOTA_EXCEEDED':
	206	warn('Exceeded the limit of TFA codes, try later')
	207	return False
	208
	209	tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
	210	if not tl:
	211	warn('Unable to extract TL')
	212	return False
	213
	214	tfa_code = self._get_tfa_info('2-step verification code')
	215
	216	if not tfa_code:
	217	warn(
	218	'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
	219	'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
	220	return False
	221
	222	tfa_code = remove_start(tfa_code, 'G-')
	223
	224	tfa_req = [
	225	user_hash, None, 2, None,
	226	[
	227	9, None, None, None, None, None, None, None,
	228	[None, tfa_code, True, 2]
	229	]]
	230
	231	tfa_results = req(
	232	self._TFA_URL.format(tl), tfa_req,
	233	'Submitting TFA code', 'Unable to submit TFA code')
	234
	235	if tfa_results is False:
	236	return False
	237
	238	tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
	239	if tfa_res:
	240	tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
	241	warn(
	242	'Unable to finish TFA: %s' % 'Invalid TFA code'
	243	if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
	244	return False
	245
	246	check_cookie_url = try_get(
	247	tfa_results, lambda x: x[0][-1][2], compat_str)
	248	else:
	249	CHALLENGES = {
	250	'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
	251	'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
	252	'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
	253	}
	254	challenge = CHALLENGES.get(
	255	challenge_str,
	256	'%s returned error %s.' % (self.IE_NAME, challenge_str))
	257	warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
	258	return False
	259	else:
	260	check_cookie_url = try_get(res, lambda x: x[2], compat_str)
	261
	262	if not check_cookie_url:
	263	warn('Unable to extract CheckCookie URL')
	264	return False
	265
	266	check_cookie_results = self._download_webpage(
	267	check_cookie_url, None, 'Checking cookie', fatal=False)
	268
	269	if check_cookie_results is False:
	270	return False
	271
	272	if 'https://myaccount.google.com/' not in check_cookie_results:
	273	warn('Unable to log in')
	274	return False
	275
	276	return True
	277
	278	def _download_webpage_handle(self, args, *kwargs):
	279	query = kwargs.get('query', {}).copy()
	280	kwargs['query'] = query
	281	return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
	282	args, *compat_kwargs(kwargs))
	283
	284	def _get_yt_initial_data(self, video_id, webpage):
	285	config = self._search_regex(
	286	(r'window\["ytInitialData"\]\s=\s(.*?)(?<=});',
	287	r'var\s+ytInitialData\s=\s(.*?)(?<=});'),
	288	webpage, 'ytInitialData', default=None)
	289	if config:
	290	return self._parse_json(
	291	uppercase_escape(config), video_id, fatal=False)
	292
	293	def _real_initialize(self):
	294	if self._downloader is None:
	295	return
	296	self._set_language()
	297	if not self._login():
	298	return
	299
	300
	301	class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
	302
	303	def _find_entries_in_json(self, extracted):
	304	entries = []
	305	c = {}
	306
	307	def _real_find(obj):
	308	if obj is None or isinstance(obj, str):
	309	return
	310
	311	if type(obj) is list:
	312	for elem in obj:
	313	_real_find(elem)
	314
	315	if type(obj) is dict:
	316	if self._is_entry(obj):
	317	entries.append(obj)
	318	return
	319
	320	if 'continuationCommand' in obj:
	321	c['continuation'] = obj
	322	return
	323
	324	for _, o in obj.items():
	325	_real_find(o)
	326
	327	_real_find(extracted)
	328
	329	return entries, try_get(c, lambda x: x["continuation"])
	330
	331	def _entries(self, page, playlist_id, n=1):
	332	seen = []
	333
	334	yt_conf = {}
	335	for m in re.finditer(self._YTCFG_DATA_RE, page):
	336	parsed = self._parse_json(m.group(1), playlist_id,
	337	transform_source=js_to_json, fatal=False)
	338	if parsed:
	339	yt_conf.update(parsed)
	340
	341	data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
	342
	343	# for page_num in itertools.count(1):
	344	for page_num in range(n):
	345	entries, continuation = self._find_entries_in_json(data_json)
	346	processed = self._process_entries(entries, seen)
	347
	348	if not processed:
	349	break
	350	for entry in processed:
	351	yield entry
	352
	353	if not continuation or not yt_conf:
	354	break
	355	continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
	356	continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
	357	if not continuation_token or not continuation_url:
	358	break
	359
	360	count = 0
	361	retries = 3
	362	while count <= retries:
	363	try:
	364	# Downloading page may result in intermittent 5xx HTTP error
	365	# that is usually worked around with a retry
	366	data_json = self._download_json(
	367	'https://www.youtube.com%s' % continuation_url,
	368	playlist_id,
	369	'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
	370
	371	transform_source=uppercase_escape,
	372	query={
	373	'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
	374	},
	375	data=bytes(json.dumps({
	376	'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
	377	'continuation': continuation_token
	378	}), encoding='utf-8'),
	379	headers={
	380	'Content-Type': 'application/json'
	381	}
	382	)
	383	break
	384	except ExtractorError as e:
	385	if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
	386	count += 1
	387	if count <= retries:
	388	continue
	389	raise
	390
	391	def _extract_title(self, renderer):
	392	title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
	393	if title:
	394	return title
	395	return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
	396
	397
	398	class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	399	def _is_entry(self, obj):
	400	return 'videoId' in obj
	401
	402	def _process_entries(self, entries, seen):
	403	ids_in_page = []
	404	titles_in_page = []
	405	for renderer in entries:
	406	video_id = try_get(renderer, lambda x: x['videoId'])
	407	video_title = self._extract_title(renderer)
	408
	409	if video_id is None or video_title is None:
	410	# we do not have a videoRenderer or title extraction broke
	411	continue
	412
	413	video_title = video_title.strip()
	414
	415	try:
	416	idx = ids_in_page.index(video_id)
	417	if video_title and not titles_in_page[idx]:
	418	titles_in_page[idx] = video_title
	419	except ValueError:
	420	ids_in_page.append(video_id)
	421	titles_in_page.append(video_title)
	422
	423	for video_id, video_title in zip(ids_in_page, titles_in_page):
	424	yield self.url_result(video_id, 'Youtube', video_id, video_title)
	425
	426
	427	class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
	428	def _is_entry(self, obj):
	429	return 'playlistId' in obj
	430
	431	def _process_entries(self, entries, seen):
	432	for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
	433
	434	yield self.url_result(
	435	'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
	436
	437	def _real_extract(self, url):
	438	playlist_id = self._match_id(url)
	439	webpage = self._download_webpage(url, playlist_id)
	440	title = self._og_search_title(webpage, fatal=False)
	441	return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
	442
	443
	444	class YoutubeIE(YoutubeBaseInfoExtractor):
	445	IE_DESC = 'YouTube.com'
	446	_VALID_URL = r"""(?x)^
	447	(
	448	(?:https?://\|//) # http(s):// or protocol-independent URL
	449	(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie\|kids)?\.com/\|
	450	(?:www\.)?deturl\.com/www\.youtube\.com/\|
	451	(?:www\.)?pwnyoutube\.com/\|
	452	(?:www\.)?hooktube\.com/\|
	453	(?:www\.)?yourepeat\.com/\|
	454	tube\.majestyc\.net/\|
	455	# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
	456	(?:(?:www\|dev)\.)?invidio\.us/\|
	457	(?:(?:www\|no)\.)?invidiou\.sh/\|
	458	(?:(?:www\|fi\|de)\.)?invidious\.snopyta\.org/\|
	459	(?:www\.)?invidious\.kabi\.tk/\|
	460	(?:www\.)?invidious\.13ad\.de/\|
	461	(?:www\.)?invidious\.mastodon\.host/\|
	462	(?:www\.)?invidious\.nixnet\.xyz/\|
	463	(?:www\.)?invidious\.drycat\.fr/\|
	464	(?:www\.)?tube\.poal\.co/\|
	465	(?:www\.)?vid\.wxzm\.sx/\|
	466	(?:www\.)?yewtu\.be/\|
	467	(?:www\.)?yt\.elukerio\.org/\|
	468	(?:www\.)?yt\.lelux\.fi/\|
	469	(?:www\.)?invidious\.ggc-project\.de/\|
	470	(?:www\.)?yt\.maisputain\.ovh/\|
	471	(?:www\.)?invidious\.13ad\.de/\|
	472	(?:www\.)?invidious\.toot\.koeln/\|
	473	(?:www\.)?invidious\.fdn\.fr/\|
	474	(?:www\.)?watch\.nettohikari\.com/\|
	475	(?:www\.)?kgg2m7yk5aybusll\.onion/\|
	476	(?:www\.)?qklhadlycap4cnod\.onion/\|
	477	(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/\|
	478	(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/\|
	479	(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/\|
	480	(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/\|
	481	(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/\|
	482	(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/\|
	483	youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
	484	(?:.*?\#/)? # handle anchor (#/) redirect urls
	485	(?: # the various things that can precede the ID:
	486	(?:(?:v\|embed\|e)/(?!videoseries)) # v/ or embed/ or e/
	487	\|(?: # or the v= param in all its forms
	488	(?:(?:watch\|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup\|.php) or nothing (like /?v=xxxx)
	489	(?:\?\|\#!?) # the params delimiter ? or # or #!
	490	(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
	491	v=
	492	)
	493	))
	494	\|(?:
	495	youtu\.be\| # just youtu.be/xxxx
	496	vid\.plus\| # or vid.plus/xxxx
	497	zwearz\.com/watch\| # or zwearz.com/watch/xxxx
	498	)/
	499	\|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
	500	)

1

# coding: utf-8

2

3

from __future__ import unicode_literals

import itertools

import json

import os.path

import random

import re

import time

import traceback

from .common import InfoExtractor, SearchInfoExtractor

15

from ..jsinterp import JSInterpreter

16

from ..swfinterp import SWFInterpreter

17

from ..compat import (

compat_chr,

compat_HTTPError,

compat_kwargs,

compat_parse_qs,

compat_urllib_parse_unquote,

23

compat_urllib_parse_unquote_plus,

24

compat_urllib_parse_urlencode,

25

compat_urllib_parse_urlparse,

compat_urlparse,

compat_str,

)

from ..utils import (

bool_or_none,

clean_html,

error_to_compat_str,

extract_attributes,

ExtractorError,

float_or_none,

get_element_by_attribute,

get_element_by_id,

int_or_none,

js_to_json,

mimetype2ext,

orderedSet,

parse_codecs,

parse_count,

parse_duration,

remove_quotes,

remove_start,

smuggle_url,

str_or_none,

str_to_int,

try_get,

unescapeHTML,

unified_strdate,

unsmuggle_url,

uppercase_escape,

url_or_none,

urlencode_postdata,

)

class YoutubeBaseInfoExtractor(InfoExtractor):

61

"""Provide base functions for Youtube extractors"""

62

_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'

63

_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'

64

65

_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'

66

_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'

67

_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

68

69

_NETRC_MACHINE = 'youtube'

70

# If True it will raise an error if no login info is provided

71

_LOGIN_REQUIRED = False

72

73

_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'

74

_INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'

75

_YTCFG_DATA_RE = r"ytcfg.set$({.*?})$"

76

77

_YOUTUBE_CLIENT_HEADERS = {

78

'x-youtube-client-name': '1',

79

'x-youtube-client-version': '1.20200609.04.02',

80

}

81

82

def _set_language(self):

83

self._set_cookie(

84

'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',

85

# YouTube sets the expire time to about two months

86

expire_time=time.time() + 2 * 30 * 24 * 3600)

87

88

def _ids_to_results(self, ids):

89

return [

90

self.url_result(vid_id, 'Youtube', video_id=vid_id)

for vid_id in ids]

def _login(self):

"""

Attempt to log in to YouTube.

96

True is returned if successful or skipped.

97

False is returned if login failed.

98

99

If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.

100

"""

101

username, password = self._get_login_info()

102

# No authentication to be performed

103

if username is None:

104

if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:

105

raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)

106

if self._downloader.params.get('cookiefile') and False: # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.

107

self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')

108

return True

109

110

login_page = self._download_webpage(

111

self._LOGIN_URL, None,

112

note='Downloading login page',

113

errnote='unable to fetch login page', fatal=False)

114

if login_page is False:

115

return

116

117

login_form = self._hidden_inputs(login_page)

118

119

def req(url, f_req, note, errnote):

120

data = login_form.copy()

121

data.update({

122

'pstMsg': 1,

123

'checkConnection': 'youtube',

124

'checkedDomains': 'youtube',

125

'hl': 'en',

126

'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',

127

'f.req': json.dumps(f_req),

128

'flowName': 'GlifWebSignIn',

129

'flowEntry': 'ServiceLogin',

130

# TODO: reverse actual botguard identifier generation algo

131

'bgRequest': '["identifier",""]',

132

})

133

return self._download_json(

134

url, None, note=note, errnote=errnote,

135

transform_source=lambda s: re.sub(r'^[^[]*', '', s),

136

fatal=False,

137

data=urlencode_postdata(data), headers={

138

'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',

139

'Google-Accounts-XSRF': 1,

})

def warn(message):

self._downloader.report_warning(message)

lookup_req = [

username,

None, [], None, 'US', None, None, 2, False, True,

[

None, None,

[2, 1, None, 1,

'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',

152

None, [], 4],

153

1, [None, None, []], None, None, None, True

],

username,

]

lookup_results = req(

159

self._LOOKUP_URL, lookup_req,

160

'Looking up account info', 'Unable to look up account info')

161

162

if lookup_results is False:

163

return False

164

165

user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)

166

if not user_hash:

167

warn('Unable to extract user hash')

return False

challenge_req = [

user_hash,

None, 1, None, [1, None, None, None, [password, None, True]],

173

[

174

None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],

175

1, [None, None, []], None, None, None, True

176

]]

177

178

challenge_results = req(

179

self._CHALLENGE_URL, challenge_req,

180

'Logging in', 'Unable to log in')

181

182

if challenge_results is False:

183

return

184

185

login_res = try_get(challenge_results, lambda x: x[0][5], list)

186

if login_res:

187

login_msg = try_get(login_res, lambda x: x[5], compat_str)

188

warn(

189

'Unable to login: %s' % 'Invalid password'

190

if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)

191

return False

192

193

res = try_get(challenge_results, lambda x: x[0][-1], list)

194

if not res:

195

warn('Unable to extract result entry')

196

return False

197

198

login_challenge = try_get(res, lambda x: x[0][0], list)

199

if login_challenge:

200

challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)

201

if challenge_str == 'TWO_STEP_VERIFICATION':

202

# SEND_SUCCESS - TFA code has been successfully sent to phone

203

# QUOTA_EXCEEDED - reached the limit of TFA codes

204

status = try_get(login_challenge, lambda x: x[5], compat_str)

205

if status == 'QUOTA_EXCEEDED':

206

warn('Exceeded the limit of TFA codes, try later')

207

return False

208

209

tl = try_get(challenge_results, lambda x: x[1][2], compat_str)

210

if not tl:

211

warn('Unable to extract TL')

212

return False

213

214

tfa_code = self._get_tfa_info('2-step verification code')

if not tfa_code:

warn(

'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'

219

'(Note that only TOTP (Google Authenticator App) codes work at this time.)')

220

return False

221

222

tfa_code = remove_start(tfa_code, 'G-')

223

224

tfa_req = [

225

user_hash, None, 2, None,

226

[

227

9, None, None, None, None, None, None, None,

228

[None, tfa_code, True, 2]

]]

tfa_results = req(

self._TFA_URL.format(tl), tfa_req,

233

'Submitting TFA code', 'Unable to submit TFA code')

234

235

if tfa_results is False:

236

return False

237

238

tfa_res = try_get(tfa_results, lambda x: x[0][5], list)

239

if tfa_res:

240

tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)

241

warn(

242

'Unable to finish TFA: %s' % 'Invalid TFA code'

243

if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)

244

return False

245

246

check_cookie_url = try_get(

247

tfa_results, lambda x: x[0][-1][2], compat_str)

248

else:

249

CHALLENGES = {

250

'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",

251

'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',

252

'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",

253

}

254

challenge = CHALLENGES.get(

255

challenge_str,

256

'%s returned error %s.' % (self.IE_NAME, challenge_str))

257

warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)

258

return False

259

else:

260

check_cookie_url = try_get(res, lambda x: x[2], compat_str)

261

262

if not check_cookie_url:

263

warn('Unable to extract CheckCookie URL')

264

return False

265

266

check_cookie_results = self._download_webpage(

267

check_cookie_url, None, 'Checking cookie', fatal=False)

268

269

if check_cookie_results is False:

270

return False

271

272

if 'https://myaccount.google.com/' not in check_cookie_results:

273

warn('Unable to log in')

return False

return True

def _download_webpage_handle(self, *args, **kwargs):

279

query = kwargs.get('query', {}).copy()

280

kwargs['query'] = query

281

return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(

282

*args, **compat_kwargs(kwargs))

283

284

def _get_yt_initial_data(self, video_id, webpage):

285

config = self._search_regex(

286

(r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',

287

r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),

288

webpage, 'ytInitialData', default=None)

289

if config:

290

return self._parse_json(

291

uppercase_escape(config), video_id, fatal=False)

292

293

def _real_initialize(self):

294

if self._downloader is None:

295

return

296

self._set_language()

297

if not self._login():

return

class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):

302

303

def _find_entries_in_json(self, extracted):

entries = []

c = {}

def _real_find(obj):

if obj is None or isinstance(obj, str):

309

return

310

311

if type(obj) is list:

for elem in obj:

_real_find(elem)

if type(obj) is dict:

316

if self._is_entry(obj):

entries.append(obj)

return

if 'continuationCommand' in obj:

321

c['continuation'] = obj

322

return

323

324

for _, o in obj.items():

325

_real_find(o)

326

327

_real_find(extracted)

328

329

return entries, try_get(c, lambda x: x["continuation"])

330

331

def _entries(self, page, playlist_id, n=1):

seen = []

yt_conf = {}

for m in re.finditer(self._YTCFG_DATA_RE, page):

336

parsed = self._parse_json(m.group(1), playlist_id,

337

transform_source=js_to_json, fatal=False)

338

if parsed:

339

yt_conf.update(parsed)

340

341

data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)

342

343

# for page_num in itertools.count(1):

344

for page_num in range(n):

345

entries, continuation = self._find_entries_in_json(data_json)

346

processed = self._process_entries(entries, seen)

if not processed:

break

for entry in processed:

351

yield entry

352

353

if not continuation or not yt_conf:

354

break

355

continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])

356

continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])

357

if not continuation_token or not continuation_url:

break

count = 0

retries = 3

while count <= retries:

363

try:

364

# Downloading page may result in intermittent 5xx HTTP error

365

# that is usually worked around with a retry

366

data_json = self._download_json(

367

'https://www.youtube.com%s' % continuation_url,

368

playlist_id,

369

'Downloading page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),

370

371

transform_source=uppercase_escape,

372

query={

373

'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])

374

},

375

data=bytes(json.dumps({

376

'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),

377

'continuation': continuation_token

378

}), encoding='utf-8'),

379

headers={

380

'Content-Type': 'application/json'

}

)

break

except ExtractorError as e:

385

if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):

count += 1

if count <= retries:

continue

raise

def _extract_title(self, renderer):

392

title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)

393

if title:

394

return title

395

return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)

396

397

398

class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

399

def _is_entry(self, obj):

400

return 'videoId' in obj

401

402

def _process_entries(self, entries, seen):

403

ids_in_page = []

404

titles_in_page = []

405

for renderer in entries:

406

video_id = try_get(renderer, lambda x: x['videoId'])

407

video_title = self._extract_title(renderer)

408

409

if video_id is None or video_title is None:

410

# we do not have a videoRenderer or title extraction broke

411

continue

412

413

video_title = video_title.strip()

414

415

try:

416

idx = ids_in_page.index(video_id)

417

if video_title and not titles_in_page[idx]:

418

titles_in_page[idx] = video_title

419

except ValueError:

420

ids_in_page.append(video_id)

421

titles_in_page.append(video_title)

422

423

for video_id, video_title in zip(ids_in_page, titles_in_page):

424

yield self.url_result(video_id, 'Youtube', video_id, video_title)

425

426

427

class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):

428

def _is_entry(self, obj):

429

return 'playlistId' in obj

430

431

def _process_entries(self, entries, seen):

432

for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):

433

434

yield self.url_result(

435

'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')

436

437

def _real_extract(self, url):

438

playlist_id = self._match_id(url)

439

webpage = self._download_webpage(url, playlist_id)

440

title = self._og_search_title(webpage, fatal=False)

441

return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)

442

443

444

class YoutubeIE(YoutubeBaseInfoExtractor):

445

IE_DESC = 'YouTube.com'

446

_VALID_URL = r"""(?x)^

447

(

448

(?:https?://|//) # http(s):// or protocol-independent URL

449

(?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|

450

(?:www\.)?deturl\.com/www\.youtube\.com/|

451

(?:www\.)?pwnyoutube\.com/|

452

(?:www\.)?hooktube\.com/|

453

(?:www\.)?yourepeat\.com/|

454

tube\.majestyc\.net/|

455

# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances

456

(?:(?:www|dev)\.)?invidio\.us/|

457

(?:(?:www|no)\.)?invidiou\.sh/|

458

(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|

459

(?:www\.)?invidious\.kabi\.tk/|

460

(?:www\.)?invidious\.13ad\.de/|

461

(?:www\.)?invidious\.mastodon\.host/|

462

(?:www\.)?invidious\.nixnet\.xyz/|

463

(?:www\.)?invidious\.drycat\.fr/|

464

(?:www\.)?tube\.poal\.co/|

465

(?:www\.)?vid\.wxzm\.sx/|

466

(?:www\.)?yewtu\.be/|

467

(?:www\.)?yt\.elukerio\.org/|

468

(?:www\.)?yt\.lelux\.fi/|

469

(?:www\.)?invidious\.ggc-project\.de/|

470

(?:www\.)?yt\.maisputain\.ovh/|

471

(?:www\.)?invidious\.13ad\.de/|

472

(?:www\.)?invidious\.toot\.koeln/|

473

(?:www\.)?invidious\.fdn\.fr/|

474

(?:www\.)?watch\.nettohikari\.com/|

475

(?:www\.)?kgg2m7yk5aybusll\.onion/|

476

(?:www\.)?qklhadlycap4cnod\.onion/|

477

(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|

478

(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|

479

(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|

480

(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|

481

(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|

482

(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|

483

youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains

484

(?:.*?\#/)? # handle anchor (#/) redirect urls

485

(?: # the various things that can precede the ID:

486

(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/

487

|(?: # or the v= param in all its forms

488

(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)

489

(?:\?|\#!?) # the params delimiter ? or # or #!

490

(?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)

v=

)

))

|(?:

youtu\.be| # just youtu.be/xxxx

496

vid\.plus| # or vid.plus/xxxx

497

zwearz\.com/watch| # or zwearz.com/watch/xxxx

498

)/

499

|(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=

500

)

501

)? # all until now is optional -> you can pass the naked ID

502

([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID

503

(?!.*?\blist=

504

(?:

505

%(playlist_id)s| # combined list/video URLs are handled by the playlist IE

506

WL # WL are handled by the watch later IE

507

)

508

)

509

(?(1).+)? # if we found the ID, everything can follow

510

$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

511

_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'

512

_PLAYER_INFO_RE = (

513

r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',

514

r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',

515

)

516

_formats = {

517

'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

518

'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},

519

'13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},

520

'17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},

521

'18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},

522

'22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

523

'34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

524

'35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

525

# itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well

526

'36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},

527

'37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

528

'38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},

529

'43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

530

'44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},

531

'45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

532

'46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},

533

'59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

534

'78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},

# 3D videos

'82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

539

'83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},

540

'84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

541

'85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},

542

'100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},

543

'101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

544

'102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},

545

546

# Apple HTTP Live Streaming

547

'91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

548

'92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

549

'93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

550

'94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},

551

'95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

552

'96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},

553

'132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},

554

'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},

555

556

# DASH mp4 video

557

'133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},

558

'134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},

559

'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

560

'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},

561

'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},

562

'138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)

563

'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},

564

'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},

565

'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},

566

'298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

567

'299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},

568

'266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},

569

570

# Dash mp4 audio

571

'139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},

572

'140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},

573

'141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},

574

'256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

575

'258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},

576

'325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},

577

'328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},

578

579

# Dash webm

580

'167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

581

'168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

582

'169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

583

'170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

584

'218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

585

'219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},

586

'278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},

587

'242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},

588

'243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},

589

'244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

590

'245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

591

'246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},

592

'247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},

593

'248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},

594

'271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},

595

# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)

596

'272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

597

'302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

598

'303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

599

'308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

600

'313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},

601

'315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},

602

603

# Dash webm audio

604

'171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},

605

'172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},

606

607

# Dash webm audio with opus inside

608

'249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},

609

'250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},

610

'251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},

611

612

# RTMP (unnamed)

613

'_rtmp': {'protocol': 'rtmp'},

614

615

# av01 video only formats sometimes served with "unknown" codecs

616

'394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

617

'395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

618

'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

619

'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},

620

}

621

_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')

_GEO_BYPASS = False

IE_NAME = 'youtube'

_TESTS = [

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

633

'uploader': 'Philipp Hagemeister',

634

'uploader_id': 'phihag',

635

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

636

'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',

637

'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',

638

'upload_date': '20121002',

639

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

640

'categories': ['Science & Technology'],

641

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

'start_time': 1,

'end_time': 9,

}

},

{

'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',

652

'note': 'Embed-only video (#1746)',

'info_dict': {

'id': 'yZIXLfi8CZQ',

'ext': 'mp4',

'upload_date': '20120608',

657

'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',

658

'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',

659

'uploader': 'SET India',

660

'uploader_id': 'setindia',

661

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',

'age_limit': 18,

}

},

{

'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',

667

'note': 'Use the first video ID in the URL',

'info_dict': {

'id': 'BaW_jenozKc',

'ext': 'mp4',

'title': 'youtube-dl test video "\'/\\ä↭𝕐',

672

'uploader': 'Philipp Hagemeister',

673

'uploader_id': 'phihag',

674

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',

675

'upload_date': '20121002',

676

'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',

677

'categories': ['Science & Technology'],

678

'tags': ['youtube-dl'],

'duration': 10,

'view_count': int,

'like_count': int,

'dislike_count': int,

683

},

684

'params': {

685

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',

690

'note': '256k DASH audio (format 141) via DASH manifest',

'info_dict': {

'id': 'a9LDPn-MO4I',

'ext': 'm4a',

'upload_date': '20121002',

695

'uploader_id': '8KVIDEO',

696

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',

697

'description': '',

698

'uploader': '8KVIDEO',

699

'title': 'UHDTV TEST 8K VIDEO.mp4'

700

},

701

'params': {

702

'youtube_include_dash_manifest': True,

703

'format': '141',

704

},

705

'skip': 'format 141 not served anymore',

},

# Controversy video

{

'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',

'info_dict': {

'id': 'T4XJQO3qol8',

'ext': 'mp4',

'duration': 219,

'upload_date': '20100909',

715

'uploader': 'Amazing Atheist',

716

'uploader_id': 'TheAmazingAtheist',

717

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',

718

'title': 'Burning Everyone\'s Koran',

719

'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',

720

}

721

},

722

# Normal age-gate video (embed allowed)

723

{

724

'url': 'https://youtube.com/watch?v=HtVdAasjOgU',

'info_dict': {

'id': 'HtVdAasjOgU',

'ext': 'mp4',

'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',

729

'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',

730

'duration': 142,

731

'uploader': 'The Witcher',

732

'uploader_id': 'WitcherGame',

733

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',

734

'upload_date': '20140605',

'age_limit': 18,

},

},

# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)

739

{

740

'url': 'lqQg6PlCWgI',

'info_dict': {

'id': 'lqQg6PlCWgI',

'ext': 'mp4',

'duration': 6085,

'upload_date': '20150827',

746

'uploader_id': 'olympic',

747

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',

748

'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',

749

'uploader': 'Olympic',

750

'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',

751

},

752

'params': {

753

'skip_download': 'requires avconv',

}

},

# Non-square pixels

{

'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',

'info_dict': {

'id': '_b-2C3KPAM0',

'ext': 'mp4',

'stretched_ratio': 16 / 9.,

763

'duration': 85,

764

'upload_date': '20110310',

765

'uploader_id': 'AllenMeow',

766

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',

767

'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',

768

'uploader': '孫ᄋᄅ',

769

'title': '[A-made] 變態妍字幕版太妍我就是這樣的人',

770

},

771

},

772

# url_encoded_fmt_stream_map is empty string

773

{

774

'url': 'qEJwOuvDf7I',

'info_dict': {

'id': 'qEJwOuvDf7I',

'ext': 'webm',

'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',

779

'description': '',

780

'upload_date': '20150404',

781

'uploader_id': 'spbelect',

782

'uploader': 'Наблюдатели Петербурга',

783

},

784

'params': {

785

'skip_download': 'requires avconv',

786

},

787

'skip': 'This live event has ended.',

788

},

789

# Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)

790

{

791

'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',

'info_dict': {

'id': 'FIl7x6_3R5Y',

'ext': 'webm',

'title': 'md5:7b81415841e02ecd4313668cde88737a',

796

'description': 'md5:116377fd2963b81ec4ce64b542173306',

797

'duration': 220,

798

'upload_date': '20150625',

799

'uploader_id': 'dorappi2000',

800

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',

801

'uploader': 'dorappi2000',

802

'formats': 'mincount:31',

803

},

804

'skip': 'not actual anymore',

805

},

806

# DASH manifest with segment_list

807

{

808

'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',

809

'md5': '8ce563a1d667b599d21064e982ab9e31',

'info_dict': {

'id': 'CsmdDsKjzN8',

'ext': 'mp4',

'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510

814

'uploader': 'Airtek',

815

'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',

816

'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',

817

'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',

818

},

819

'params': {

820

'youtube_include_dash_manifest': True,

821

'format': '135', # bestvideo

822

},

823

'skip': 'This live event has ended.',

824

},

825

{

826

# Multifeed videos (multiple cameras), URL is for Main Camera

827

'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',

828

'info_dict': {

829

'id': 'jqWvoWXjCVs',

830

'title': 'teamPGP: Rocket League Noob Stream',

831

'description': 'md5:dc7872fb300e143831327f1bae3af010',

},

'playlist': [{

'info_dict': {

'id': 'jqWvoWXjCVs',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',

838

'description': 'md5:dc7872fb300e143831327f1bae3af010',

839

'duration': 7335,

840

'upload_date': '20150721',

841

'uploader': 'Beer Games Beer',

842

'uploader_id': 'beergamesbeer',

843

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

844

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': '6h8e8xoXJzg',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',

851

'description': 'md5:dc7872fb300e143831327f1bae3af010',

852

'duration': 7337,

853

'upload_date': '20150721',

854

'uploader': 'Beer Games Beer',

855

'uploader_id': 'beergamesbeer',

856

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

857

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'PUOgX5z9xZw',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (grizzle)',

864

'description': 'md5:dc7872fb300e143831327f1bae3af010',

865

'duration': 7337,

866

'upload_date': '20150721',

867

'uploader': 'Beer Games Beer',

868

'uploader_id': 'beergamesbeer',

869

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

870

'license': 'Standard YouTube License',

},

}, {

'info_dict': {

'id': 'teuwxikvS5k',

'ext': 'mp4',

'title': 'teamPGP: Rocket League Noob Stream (zim)',

877

'description': 'md5:dc7872fb300e143831327f1bae3af010',

878

'duration': 7334,

879

'upload_date': '20150721',

880

'uploader': 'Beer Games Beer',

881

'uploader_id': 'beergamesbeer',

882

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',

883

'license': 'Standard YouTube License',

},

}],

'params': {

'skip_download': True,

888

},

889

'skip': 'This video is not available.',

890

},

891

{

892

# Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)

893

'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',

894

'info_dict': {

895

'id': 'gVfLd0zydlo',

896

'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',

897

},

898

'playlist_count': 2,

899

'skip': 'Not multifeed anymore',

900

},

901

{

902

'url': 'https://vid.plus/FlRa-iH7PGw',

903

'only_matching': True,

904

},

905

{

906

'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',

907

'only_matching': True,

908

},

909

{

910

# Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)

911

# Also tests cut-off URL expansion in video description (see

912

# https://github.com/ytdl-org/youtube-dl/issues/1892,

913

# https://github.com/ytdl-org/youtube-dl/issues/8164)

914

'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',

'info_dict': {

'id': 'lsguqyKfVQg',

'ext': 'mp4',

'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',

919

'alt_title': 'Dark Walk - Position Music',

920

'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',

921

'duration': 133,

922

'upload_date': '20151119',

923

'uploader_id': 'IronSoulElf',

924

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',

925

'uploader': 'IronSoulElf',

926

'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

927

'track': 'Dark Walk - Position Music',

928

'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',

929

'album': 'Position Music - Production Music Vol. 143 - Dark Walk',

930

},

931

'params': {

932

'skip_download': True,

},

},

{

# Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)

937

'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',

938

'only_matching': True,

939

},

940

{

941

# Video with yt:stretch=17:0

942

'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',

'info_dict': {

'id': 'Q39EVAstoRM',

'ext': 'mp4',

'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',

947

'description': 'md5:ee18a25c350637c8faff806845bddee9',

948

'upload_date': '20151107',

949

'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',

950

'uploader': 'CH GAMER DROID',

951

},

952

'params': {

953

'skip_download': True,

954

},

955

'skip': 'This video does not exist.',

956

},

957

{

958

# Video licensed under Creative Commons

959

'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',

'info_dict': {

'id': 'M4gD1WSo5mA',

'ext': 'mp4',

'title': 'md5:e41008789470fc2533a3252216f1c1d1',

964

'description': 'md5:a677553cf0840649b731a3024aeff4cc',

965

'duration': 721,

966

'upload_date': '20150127',

967

'uploader_id': 'BerkmanCenter',

968

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',

969

'uploader': 'The Berkman Klein Center for Internet & Society',

970

'license': 'Creative Commons Attribution license (reuse allowed)',

971

},

972

'params': {

973

'skip_download': True,

},

},

{

# Channel-like uploader_url

978

'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',

'info_dict': {

'id': 'eQcmzGIKrzg',

'ext': 'mp4',

'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',

983

'description': 'md5:dda0d780d5a6e120758d1711d062a867',

984

'duration': 4060,

985

'upload_date': '20151119',

986

'uploader': 'Bernie Sanders',

987

'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',

988

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',

989

'license': 'Creative Commons Attribution license (reuse allowed)',

990

},

991

'params': {

992

'skip_download': True,

},

},

{

'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',

997

'only_matching': True,

998

},

999

{

1000

# YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)

1001

'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',

1002

'only_matching': True,

1003

},

1004

{

1005

# Rental video preview

1006

'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',

'info_dict': {

'id': 'uGpuVWrhIzE',

'ext': 'mp4',

'title': 'Piku - Trailer',

1011

'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',

1012

'upload_date': '20150811',

1013

'uploader': 'FlixMatrix',

1014

'uploader_id': 'FlixMatrixKaravan',

1015

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',

1016

'license': 'Standard YouTube License',

1017

},

1018

'params': {

1019

'skip_download': True,

1020

},

1021

'skip': 'This video is not available.',

1022

},

1023

{

1024

# YouTube Red video with episode data

1025

'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',

'info_dict': {

'id': 'iqKdEhx-dD4',

'ext': 'mp4',

'title': 'Isolation - Mind Field (Ep 1)',

1030

'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',

1031

'duration': 2085,

1032

'upload_date': '20170118',

1033

'uploader': 'Vsauce',

1034

'uploader_id': 'Vsauce',

1035

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',

1036

'series': 'Mind Field',

'season_number': 1,

'episode_number': 1,

},

'params': {

'skip_download': True,

1042

},

1043

'expected_warnings': [

1044

'Skipping DASH manifest',

],

},

{

# The following content has been identified by the YouTube community

1049

# as inappropriate or offensive to some audiences.

1050

'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',

'info_dict': {

'id': '6SJNVb0GnPI',

'ext': 'mp4',

'title': 'Race Differences in Intelligence',

1055

'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',

1056

'duration': 965,

1057

'upload_date': '20140124',

1058

'uploader': 'New Century Foundation',

1059

'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',

1060

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',

1061

},

1062

'params': {

1063

'skip_download': True,

},

},

{

# itag 212

'url': '1t24XAntNCY',

1069

'only_matching': True,

1070

},

1071

{

1072

# geo restricted to JP

1073

'url': 'sJL6WA-aGkQ',

1074

'only_matching': True,

1075

},

1076

{

1077

'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',

1078

'only_matching': True,

1079

},

1080

{

1081

'url': 'https://invidio.us/watch?v=BaW_jenozKc',

1082

'only_matching': True,

},

{

# DRM protected

'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',

1087

'only_matching': True,

1088

},

1089

{

1090

# Video with unsupported adaptive stream type formats

1091

'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',

'info_dict': {

'id': 'Z4Vy8R84T1U',

'ext': 'mp4',

'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',

1096

'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',

1097

'duration': 433,

1098

'upload_date': '20130923',

1099

'uploader': 'Amelia Putri Harwita',

1100

'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',

1101

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',

1102

'formats': 'maxcount:10',

1103

},

1104

'params': {

1105

'skip_download': True,

1106

'youtube_include_dash_manifest': False,

1107

},

1108

'skip': 'not actual anymore',

1109

},

1110

{

1111

# Youtube Music Auto-generated description

1112

'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',

'info_dict': {

'id': 'MgNrAu2pzNs',

'ext': 'mp4',

'title': 'Voyeur Girl',

1117

'description': 'md5:7ae382a65843d6df2685993e90a8628f',

1118

'upload_date': '20190312',

1119

'uploader': 'Stephen - Topic',

1120

'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',

1121

'artist': 'Stephen',

1122

'track': 'Voyeur Girl',

1123

'album': 'it\'s too much love to know my dear',

1124

'release_date': '20190313',

1125

'release_year': 2019,

1126

},

1127

'params': {

1128

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1133

# Retrieve 'artist' field from 'Artist:' in video description

1134

# when it is present on youtube music video

1135

'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',

'info_dict': {

'id': 'k0jLE7tTwjY',

'ext': 'mp4',

'title': 'Latch Feat. Sam Smith',

1140

'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',

1141

'upload_date': '20150110',

1142

'uploader': 'Various Artists - Topic',

1143

'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',

1144

'artist': 'Disclosure',

1145

'track': 'Latch Feat. Sam Smith',

1146

'album': 'Latch Featuring Sam Smith',

1147

'release_date': '20121008',

1148

'release_year': 2012,

1149

},

1150

'params': {

1151

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1156

# handle multiple artists on youtube music video

1157

'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',

'info_dict': {

'id': '74qn0eJSjpA',

'ext': 'mp4',

'title': 'Eastside',

'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',

1163

'upload_date': '20180710',

1164

'uploader': 'Benny Blanco - Topic',

1165

'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',

1166

'artist': 'benny blanco, Halsey, Khalid',

1167

'track': 'Eastside',

1168

'album': 'Eastside',

1169

'release_date': '20180713',

1170

'release_year': 2018,

1171

},

1172

'params': {

1173

'skip_download': True,

},

},

{

# Youtube Music Auto-generated description

1178

# handle youtube music video with release_year and no release_date

1179

'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',

'info_dict': {

'id': '-hcAI0g-f5M',

'ext': 'mp4',

'title': 'Put It On Me',

1184

'description': 'md5:f6422397c07c4c907c6638e1fee380a5',

1185

'upload_date': '20180426',

1186

'uploader': 'Matt Maeson - Topic',

1187

'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',

1188

'artist': 'Matt Maeson',

1189

'track': 'Put It On Me',

1190

'album': 'The Hearse',

1191

'release_date': None,

1192

'release_year': 2018,

1193

},

1194

'params': {

1195

'skip_download': True,

},

},

{

'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',

1200

'only_matching': True,

1201

},

1202

{

1203

# invalid -> valid video id redirection

1204

'url': 'DJztXj2GPfl',

'info_dict': {

'id': 'DJztXj2GPfk',

'ext': 'mp4',

'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',

1209

'description': 'md5:bf577a41da97918e94fa9798d9228825',

1210

'upload_date': '20090125',

1211

'uploader': 'Prochorowka',

1212

'uploader_id': 'Prochorowka',

1213

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',

1214

'artist': 'Panjabi MC',

1215

'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',

1216

'album': 'Beware of the Boys (Mundian To Bach Ke)',

1217

},

1218

'params': {

1219

'skip_download': True,

},

},

{

# empty description results in an empty string

1224

'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',

'info_dict': {

'id': 'x41yOUIvK2k',

'ext': 'mp4',

'title': 'IMG 3456',

'description': '',

'upload_date': '20170613',

1231

'uploader_id': 'ElevageOrVert',

1232

'uploader': 'ElevageOrVert',

1233

},

1234

'params': {

1235

'skip_download': True,

},

},

]

def __init__(self, *args, **kwargs):

1241

super(YoutubeIE, self).__init__(*args, **kwargs)

1242

self._player_cache = {}

1243

1244

def report_video_info_webpage_download(self, video_id):

1245

"""Report attempt to download video info webpage."""

1246

self.to_screen('%s: Downloading video info webpage' % video_id)

1247

1248

def report_information_extraction(self, video_id):

1249

"""Report attempt to extract video information."""

1250

self.to_screen('%s: Extracting video information' % video_id)

1251

1252

def report_unavailable_format(self, video_id, format):

1253

"""Report extracted video URL."""

1254

self.to_screen('%s: Format %s not available' % (video_id, format))

1255

1256

def report_rtmp_download(self):

1257

"""Indicate the download will use the RTMP protocol."""

1258

self.to_screen('RTMP download detected')

1259

1260

def _signature_cache_id(self, example_sig):

1261

""" Return a string representation of a signature """

1262

return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))

1263

1264

@classmethod

1265

def _extract_player_info(cls, player_url):

1266

for player_re in cls._PLAYER_INFO_RE:

1267

id_m = re.search(player_re, player_url)

if id_m:

break

else:

raise ExtractorError('Cannot identify player %r' % player_url)

1272

return id_m.group('ext'), id_m.group('id')

1273

1274

def _extract_signature_function(self, video_id, player_url, example_sig):

1275

player_type, player_id = self._extract_player_info(player_url)

1276

1277

# Read from filesystem cache

1278

func_id = '%s_%s_%s' % (

1279

player_type, player_id, self._signature_cache_id(example_sig))

1280

assert os.path.basename(func_id) == func_id

1281

1282

cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)

1283

if cache_spec is not None:

1284

return lambda s: ''.join(s[i] for i in cache_spec)

1285

1286

download_note = (

1287

'Downloading player %s' % player_url

1288

if self._downloader.params.get('verbose') else

1289

'Downloading %s player %s' % (player_type, player_id)

1290

)

1291

if player_type == 'js':

1292

code = self._download_webpage(

1293

player_url, video_id,

1294

note=download_note,

1295

errnote='Download of %s failed' % player_url)

1296

res = self._parse_sig_js(code)

1297

elif player_type == 'swf':

1298

urlh = self._request_webpage(

1299

player_url, video_id,

1300

note=download_note,

1301

errnote='Download of %s failed' % player_url)

1302

code = urlh.read()

1303

res = self._parse_sig_swf(code)

1304

else:

1305

assert False, 'Invalid player type %r' % player_type

1306

1307

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1308

cache_res = res(test_string)

1309

cache_spec = [ord(c) for c in cache_res]

1310

1311

self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)

1312

return res

1313

1314

def _print_sig_code(self, func, example_sig):

1315

def gen_sig_code(idxs):

1316

def _genslice(start, end, step):

1317

starts = '' if start == 0 else str(start)

1318

ends = (':%d' % (end + step)) if end + step >= 0 else ':'

1319

steps = '' if step == 1 else (':%d' % step)

1320

return 's[%s%s%s]' % (starts, ends, steps)

1321

1322

step = None

1323

# Quelch pyflakes warnings - start will be set when step is set

1324

start = '(Never used)'

1325

for i, prev in zip(idxs[1:], idxs[:-1]):

if step is not None:

if i - prev == step:

continue

yield _genslice(start, prev, step)

1330

step = None

1331

continue

1332

if i - prev in [-1, 1]:

step = i - prev

start = prev

continue

else:

yield 's[%d]' % prev

if step is None:

yield 's[%d]' % i

else:

yield _genslice(start, i, step)

1342

1343

test_string = ''.join(map(compat_chr, range(len(example_sig))))

1344

cache_res = func(test_string)

1345

cache_spec = [ord(c) for c in cache_res]

1346

expr_code = ' + '.join(gen_sig_code(cache_spec))

1347

signature_id_tuple = '(%s)' % (

1348

', '.join(compat_str(len(p)) for p in example_sig.split('.')))

1349

code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'

1350

' return %s\n') % (signature_id_tuple, expr_code)

1351

self.to_screen('Extracted signature function:\n' + code)

1352

1353

def _parse_sig_js(self, jscode):

1354

funcname = self._search_regex(

1355

(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1356

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1357

r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1358

r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function$\s*a\s*$\s*{\s*a\s*=\s*a\.split$\s*""\s*$',

1359

# Obsolete patterns

1360

r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1361

r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',

1362

r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1363

r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1364

r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1365

r'\bc\s*&&\s*a\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1366

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',

1367

r'\bc\s*&&\s*[a-zA-Z0-9]+\.set$[^,]+\s*,\s*\([^)]*$\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),

1368

jscode, 'Initial JS player signature function name', group='sig')

1369

1370

jsi = JSInterpreter(jscode)

1371

initial_function = jsi.extract_function(funcname)

1372

return lambda s: initial_function([s])

1373

1374

def _parse_sig_swf(self, file_contents):

1375

swfi = SWFInterpreter(file_contents)

1376

TARGET_CLASSNAME = 'SignatureDecipher'

1377

searched_class = swfi.extract_class(TARGET_CLASSNAME)

1378

initial_function = swfi.extract_function(searched_class, 'decipher')

1379

return lambda s: initial_function([s])

1380

1381

def _decrypt_signature(self, s, video_id, player_url, age_gate=False):

1382

"""Turn the encrypted s field into a working signature"""

1383

1384

if player_url is None:

1385

raise ExtractorError('Cannot decrypt signature without player_url')

1386

1387

if player_url.startswith('//'):

1388

player_url = 'https:' + player_url

1389

elif not re.match(r'https?://', player_url):

1390

player_url = compat_urlparse.urljoin(

1391

'https://www.youtube.com', player_url)

1392

try:

1393

player_id = (player_url, self._signature_cache_id(s))

1394

if player_id not in self._player_cache:

1395

func = self._extract_signature_function(

1396

video_id, player_url, s

1397

)

1398

self._player_cache[player_id] = func

1399

func = self._player_cache[player_id]

1400

if self._downloader.params.get('youtube_print_sig_code'):

1401

self._print_sig_code(func, s)

1402

return func(s)

1403

except Exception as e:

1404

tb = traceback.format_exc()

1405

raise ExtractorError(

1406

'Signature extraction failed: ' + tb, cause=e)

1407

1408

def _get_subtitles(self, video_id, webpage, has_live_chat_replay):

1409

try:

1410

subs_doc = self._download_xml(

1411

'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,

1412

video_id, note=False)

1413

except ExtractorError as err:

1414

self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))

return {}

sub_lang_list = {}

for track in subs_doc.findall('track'):

1419

lang = track.attrib['lang_code']

1420

if lang in sub_lang_list:

1421

continue

1422

sub_formats = []

1423

for ext in self._SUBTITLE_FORMATS:

1424

params = compat_urllib_parse_urlencode({

'lang': lang,

'v': video_id,

'fmt': ext,

'name': track.attrib['name'].encode('utf-8'),

1429

})

1430

sub_formats.append({

1431

'url': 'https://www.youtube.com/api/timedtext?' + params,

1432

'ext': ext,

1433

})

1434

sub_lang_list[lang] = sub_formats

1435

if has_live_chat_replay:

1436

sub_lang_list['live_chat'] = [

1437

{

1438

'video_id': video_id,

1439

'ext': 'json',

1440

'protocol': 'youtube_live_chat_replay',

1441

},

1442

]

1443

if not sub_lang_list:

1444

self._downloader.report_warning('video doesn\'t have subtitles')

return {}

return sub_lang_list

def _get_ytplayer_config(self, video_id, webpage):

1449

patterns = (

1450

# User data may contain arbitrary character sequences that may affect

1451

# JSON extraction with regex, e.g. when '};' is contained the second

1452

# regex won't capture the whole JSON. Yet working around by trying more

1453

# concrete regex first keeping in mind proper quoted string handling

1454

# to be implemented in future that will replace this workaround (see

1455

# https://github.com/ytdl-org/youtube-dl/issues/7468,

1456

# https://github.com/ytdl-org/youtube-dl/pull/7599)

1457

r';ytplayer\.config\s*=\s*({.+?});ytplayer',

1458

r';ytplayer\.config\s*=\s*({.+?});',

1459

r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'

1460

)

1461

config = self._search_regex(

1462

patterns, webpage, 'ytplayer.config', default=None)

1463

if config:

1464

return self._parse_json(

1465

uppercase_escape(config), video_id, fatal=False)

1466

1467

def _get_music_metadata_from_yt_initial(self, yt_initial):

music_metadata = []

key_map = {

'Album': 'album',

'Artist': 'artist',

'Song': 'track'

}

contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])

1475

if type(contents) is list:

1476

for content in contents:

1477

music_track = {}

1478

if type(content) is not dict:

1479

continue

1480

videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])

1481

if type(videoSecondaryInfoRenderer) is not dict:

1482

continue

1483

rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])

1484

if type(rows) is not list:

1485

continue

1486

for row in rows:

1487

metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])

1488

if type(metadataRowRenderer) is not dict:

1489

continue

1490

key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])

1491

value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \

1492

try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])

1493

if type(key) is not str or type(value) is not str:

1494

continue

1495

if key in key_map:

1496

if key_map[key] in music_track:

1497

# we've started on a new track

1498

music_metadata.append(music_track)

1499

music_track = {}

1500

music_track[key_map[key]] = value

1501

if len(music_track.keys()):

1502

music_metadata.append(music_track)

1503

return music_metadata

1504

1505

def _get_automatic_captions(self, video_id, webpage):

1506

"""We need the webpage for getting the captions url, pass it as an

1507

argument to speed up the process."""

1508

self.to_screen('%s: Looking for automatic captions' % video_id)

1509

player_config = self._get_ytplayer_config(video_id, webpage)

1510

err_msg = 'Couldn\'t find automatic captions for %s' % video_id

1511

if not player_config:

1512

self._downloader.report_warning(err_msg)

1513

return {}

1514

try:

1515

if "args" in player_config and "ttsurl" in player_config["args"]:

1516

args = player_config['args']

1517

caption_url = args['ttsurl']

1518

timestamp = args['timestamp']

1519

1520

# We get the available subtitles

1521

list_params = compat_urllib_parse_urlencode({

'type': 'list',

'tlangs': 1,

'asrs': 1,

})

list_url = caption_url + '&' + list_params

1527

caption_list = self._download_xml(list_url, video_id)

1528

original_lang_node = caption_list.find('track')

1529

if original_lang_node is None:

1530

self._downloader.report_warning('Video doesn\'t have automatic captions')

1531

return {}

1532

original_lang = original_lang_node.attrib['lang_code']

1533

caption_kind = original_lang_node.attrib.get('kind', '')

1534

1535

sub_lang_list = {}

1536

for lang_node in caption_list.findall('target'):

1537

sub_lang = lang_node.attrib['lang_code']

1538

sub_formats = []

1539

for ext in self._SUBTITLE_FORMATS:

1540

params = compat_urllib_parse_urlencode({

1541

'lang': original_lang,

'tlang': sub_lang,

'fmt': ext,

'ts': timestamp,

'kind': caption_kind,

1546

})

1547

sub_formats.append({

1548

'url': caption_url + '&' + params,

1549

'ext': ext,

1550

})

1551

sub_lang_list[sub_lang] = sub_formats

1552

return sub_lang_list

1553

1554

def make_captions(sub_url, sub_langs):

1555

parsed_sub_url = compat_urllib_parse_urlparse(sub_url)

1556

caption_qs = compat_parse_qs(parsed_sub_url.query)

1557

captions = {}

1558

for sub_lang in sub_langs:

1559

sub_formats = []

1560

for ext in self._SUBTITLE_FORMATS:

caption_qs.update({

'tlang': [sub_lang],

'fmt': [ext],

})

sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(

1566

query=compat_urllib_parse_urlencode(caption_qs, True)))

sub_formats.append({

'url': sub_url,

'ext': ext,

})

captions[sub_lang] = sub_formats

1572

return captions

1573

1574

# New captions format as of 22.06.2017

1575

if "args" in player_config:

1576

player_response = player_config["args"].get('player_response')

1577

else:

1578

# New player system (ytInitialPlayerResponse) as of October 2020

1579

player_response = player_config

1580

1581

if player_response:

1582

if isinstance(player_response, compat_str):

1583

player_response = self._parse_json(

1584

player_response, video_id, fatal=False)

1585

1586

renderer = player_response['captions']['playerCaptionsTracklistRenderer']

1587

caption_tracks = renderer['captionTracks']

1588

for caption_track in caption_tracks:

1589

if 'kind' not in caption_track:

1590

# not an automatic transcription

1591

continue

1592

base_url = caption_track['baseUrl']

1593

sub_lang_list = []

1594

for lang in renderer['translationLanguages']:

1595

lang_code = lang.get('languageCode')

1596

if lang_code:

1597

sub_lang_list.append(lang_code)

1598

return make_captions(base_url, sub_lang_list)

1599

1600

self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)

1601

return {}

1602

1603

if "args" in player_config:

1604

args = player_config["args"]

1605

1606

# Some videos don't provide ttsurl but rather caption_tracks and

1607

# caption_translation_languages (e.g. 20LmZk1hakA)

1608

# Does not used anymore as of 22.06.2017

1609

caption_tracks = args['caption_tracks']

1610

caption_translation_languages = args['caption_translation_languages']

1611

caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]

1612

sub_lang_list = []

1613

for lang in caption_translation_languages.split(','):

1614

lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))

1615

sub_lang = lang_qs.get('lc', [None])[0]

1616

if sub_lang:

1617

sub_lang_list.append(sub_lang)

1618

return make_captions(caption_url, sub_lang_list)

1619

# An extractor error can be raise by the download process if there are

1620

# no automatic captions but there are subtitles

1621

except (KeyError, IndexError, ExtractorError):

1622

self._downloader.report_warning(err_msg)

1623

return {}

1624

1625

def _mark_watched(self, video_id, video_info, player_response):

1626

playback_url = url_or_none(try_get(

1627

player_response,

1628

lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(

1629

video_info, lambda x: x['videostats_playback_base_url'][0]))

1630

if not playback_url:

1631

return

1632

parsed_playback_url = compat_urlparse.urlparse(playback_url)

1633

qs = compat_urlparse.parse_qs(parsed_playback_url.query)

1634

1635

# cpn generation algorithm is reverse engineered from base.js.

1636

# In fact it works even with dummy cpn.

1637

CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'

1638

cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))

qs.update({

'ver': ['2'],

'cpn': [cpn],

})

playback_url = compat_urlparse.urlunparse(

1645

parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))

1646

1647

self._download_webpage(

1648

playback_url, video_id, 'Marking watched',

1649

'Unable to mark watched', fatal=False)

1650

1651

@staticmethod

1652

def _extract_urls(webpage):

1653

# Embedded YouTube player

1654

entries = [

1655

unescapeHTML(mobj.group('url'))

1656

for mobj in re.finditer(r'''(?x)

(?:

<iframe[^>]+?src=|

data-video-url=|

<embed[^>]+?src=|

embedSWF\(?:\s*|

<object[^>]+data=|

new\s+SWFObject\(

)

(["\'])

(?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/

1667

(?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)

1668

\1''', webpage)]

1669

1670

# lazyYT YouTube embed

1671

entries.extend(list(map(

1672

unescapeHTML,

1673

re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))

1674

1675

# Wordpress "YouTube Video Importer" plugin

1676

matches = re.findall(r'''(?x)<div[^>]+

1677

class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+

1678

data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)

1679

entries.extend(m[-1] for m in matches)

return entries

@staticmethod

def _extract_url(webpage):

1685

urls = YoutubeIE._extract_urls(webpage)

1686

return urls[0] if urls else None

1687

1688

@classmethod

1689

def extract_id(cls, url):

1690

mobj = re.match(cls._VALID_URL, url, re.VERBOSE)

1691

if mobj is None:

1692

raise ExtractorError('Invalid URL: %s' % url)

1693

video_id = mobj.group(2)

1694

return video_id

1695

1696

def _extract_chapters_from_json(self, webpage, video_id, duration):

1697

if not webpage:

1698

return

1699

initial_data = self._parse_json(

1700

self._search_regex(

1701

r'window\["ytInitialData"\] = (.+);\n', webpage,

1702

'player args', default='{}'),

1703

video_id, fatal=False)

1704

if not initial_data or not isinstance(initial_data, dict):

1705

return

1706

chapters_list = try_get(

1707

initial_data,

1708

lambda x: x['playerOverlays']

1709

['playerOverlayRenderer']

1710

['decoratedPlayerBarRenderer']

1711

['decoratedPlayerBarRenderer']

1712

['playerBar']

1713

['chapteredPlayerBarRenderer']

1714

['chapters'],

1715

list)

1716

if not chapters_list:

1717

return

1718

1719

def chapter_time(chapter):

1720

return float_or_none(

1721

try_get(

1722

chapter,

1723

lambda x: x['chapterRenderer']['timeRangeStartMillis'],

int),

scale=1000)

chapters = []

for next_num, chapter in enumerate(chapters_list, start=1):

1728

start_time = chapter_time(chapter)

1729

if start_time is None:

1730

continue

1731

end_time = (chapter_time(chapters_list[next_num])

1732

if next_num < len(chapters_list) else duration)

if end_time is None:

continue

title = try_get(

chapter, lambda x: x['chapterRenderer']['title']['simpleText'],

1737

compat_str)

1738

chapters.append({

1739

'start_time': start_time,

1740

'end_time': end_time,

'title': title,

})

return chapters

@staticmethod

def _extract_chapters_from_description(description, duration):

1747

if not description:

1748

return None

1749

chapter_lines = re.findall(

1750

r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',

1751

description)

1752

if not chapter_lines:

1753

return None

1754

chapters = []

1755

for next_num, (chapter_line, time_point) in enumerate(

1756

chapter_lines, start=1):

1757

start_time = parse_duration(time_point)

1758

if start_time is None:

1759

continue

1760

if start_time > duration:

1761

break

1762

end_time = (duration if next_num == len(chapter_lines)

1763

else parse_duration(chapter_lines[next_num][1]))

1764

if end_time is None:

1765

continue

1766

if end_time > duration:

1767

end_time = duration

1768

if start_time > end_time:

1769

break

1770

chapter_title = re.sub(

1771

r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')

1772

chapter_title = re.sub(r'\s+', ' ', chapter_title)

1773

chapters.append({

1774

'start_time': start_time,

1775

'end_time': end_time,

1776

'title': chapter_title,

})

return chapters

def _extract_chapters(self, webpage, description, video_id, duration):

1781

return (self._extract_chapters_from_json(webpage, video_id, duration)

1782

or self._extract_chapters_from_description(description, duration))

1783

1784

def _real_extract(self, url):

1785

url, smuggled_data = unsmuggle_url(url, {})

1786

1787

proto = (

1788

'http' if self._downloader.params.get('prefer_insecure', False)

else 'https')

start_time = None

end_time = None

parsed_url = compat_urllib_parse_urlparse(url)

1794

for component in [parsed_url.fragment, parsed_url.query]:

1795

query = compat_parse_qs(component)

1796

if start_time is None and 't' in query:

1797

start_time = parse_duration(query['t'][0])

1798

if start_time is None and 'start' in query:

1799

start_time = parse_duration(query['start'][0])

1800

if end_time is None and 'end' in query:

1801

end_time = parse_duration(query['end'][0])

1802

1803

# Extract original video URL from URL with redirection, like age verification, using next_url parameter

1804

mobj = re.search(self._NEXT_URL_RE, url)

1805

if mobj:

1806

url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')

1807

video_id = self.extract_id(url)

1808

1809

# Get video webpage

1810

url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id

1811

video_webpage, urlh = self._download_webpage_handle(url, video_id)

1812

1813

qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)

1814

video_id = qs.get('v', [None])[0] or video_id

1815

1816

# Attempt to extract SWF player URL

1817

mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)

1818

if mobj is not None:

1819

player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))

else:

player_url = None

dash_mpds = []

def add_dash_mpd(video_info):

1826

dash_mpd = video_info.get('dashmpd')

1827

if dash_mpd and dash_mpd[0] not in dash_mpds:

1828

dash_mpds.append(dash_mpd[0])

1829

1830

def add_dash_mpd_pr(pl_response):

1831

dash_mpd = url_or_none(try_get(

1832

pl_response, lambda x: x['streamingData']['dashManifestUrl'],

1833

compat_str))

1834

if dash_mpd and dash_mpd not in dash_mpds:

1835

dash_mpds.append(dash_mpd)

is_live = None

view_count = None

def extract_view_count(v_info):

1841

return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))

1842

1843

def extract_player_response(player_response, video_id):

1844

pl_response = str_or_none(player_response)

1845

if not pl_response:

1846

return

1847

pl_response = self._parse_json(pl_response, video_id, fatal=False)

1848

if isinstance(pl_response, dict):

1849

add_dash_mpd_pr(pl_response)

1850

return pl_response

1851

1852

def extract_embedded_config(embed_webpage, video_id):

1853

embedded_config = self._search_regex(

1854

r'setConfig$({.*})$;',

1855

embed_webpage, 'ytInitialData', default=None)

1856

if embedded_config:

1857

return embedded_config

player_response = {}

# Get video info

video_info = {}

embed_webpage = None

if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'

1865

or re.search(r'player-age-gate-content">', video_webpage) is not None):

1866

cookie_keys = self._get_cookies('https://www.youtube.com').keys()

1867

age_gate = True

1868

# We simulate the access to the video from www.youtube.com/v/{video_id}

1869

# this can be viewed without login into Youtube

1870

url = proto + '://www.youtube.com/embed/%s' % video_id

1871

embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')

1872

ext = extract_embedded_config(embed_webpage, video_id)

1873

# playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)

1874

playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)

1875

if not playable_in_embed:

1876

self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)

1877

playable_in_embed = ''

1878

else:

1879

playable_in_embed = playable_in_embed.group('playableinEmbed')

1880

# check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)

1881

# if re.search(r'player-unavailable">', embed_webpage) is not None:

1882

if playable_in_embed == 'false':

1883

'''

1884

# TODO apply this patch when Support for Python 2.6(!) and above drops

1885

if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys

1886

or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):

1887

'''

1888

if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)

1889

or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):

1890

age_gate = False

1891

# Try looking directly into the video webpage

1892

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1893

if ytplayer_config:

1894

args = ytplayer_config.get("args")

1895

if args is not None:

1896

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1897

# Convert to the same format returned by compat_parse_qs

1898

video_info = dict((k, [v]) for k, v in args.items())

1899

add_dash_mpd(video_info)

1900

# Rental video is not rented but preview is available (e.g.

1901

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1902

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1903

if not video_info and args.get('ypc_vid'):

1904

return self.url_result(

1905

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1906

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1907

is_live = True

1908

if not player_response:

1909

player_response = extract_player_response(args.get('player_response'), video_id)

1910

elif not player_response:

1911

player_response = ytplayer_config

1912

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1913

add_dash_mpd_pr(player_response)

1914

else:

1915

raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)

1916

else:

1917

data = compat_urllib_parse_urlencode({

1918

'video_id': video_id,

1919

'eurl': 'https://youtube.googleapis.com/v/' + video_id,

1920

'sts': self._search_regex(

1921

r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),

1922

})

1923

video_info_url = proto + '://www.youtube.com/get_video_info?' + data

1924

try:

1925

video_info_webpage = self._download_webpage(

1926

video_info_url, video_id,

1927

note='Refetching age-gated info webpage',

1928

errnote='unable to download video info webpage')

1929

except ExtractorError:

1930

video_info_webpage = None

1931

if video_info_webpage:

1932

video_info = compat_parse_qs(video_info_webpage)

1933

pl_response = video_info.get('player_response', [None])[0]

1934

player_response = extract_player_response(pl_response, video_id)

1935

add_dash_mpd(video_info)

1936

view_count = extract_view_count(video_info)

1937

else:

1938

age_gate = False

1939

# Try looking directly into the video webpage

1940

ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)

1941

args = ytplayer_config.get("args")

1942

if args is not None:

1943

if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):

1944

# Convert to the same format returned by compat_parse_qs

1945

video_info = dict((k, [v]) for k, v in args.items())

1946

add_dash_mpd(video_info)

1947

# Rental video is not rented but preview is available (e.g.

1948

# https://www.youtube.com/watch?v=yYr8q0y5Jfg,

1949

# https://github.com/ytdl-org/youtube-dl/issues/10532)

1950

if not video_info and args.get('ypc_vid'):

1951

return self.url_result(

1952

args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])

1953

if args.get('livestream') == '1' or args.get('live_playback') == 1:

1954

is_live = True

1955

if not player_response:

1956

player_response = extract_player_response(args.get('player_response'), video_id)

1957

elif not player_response:

1958

player_response = ytplayer_config

1959

if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):

1960

add_dash_mpd_pr(player_response)

1961

1962

def extract_unavailable_message():

1963

messages = []

1964

for tag, kind in (('h1', 'message'), ('div', 'submessage')):

1965

msg = self._html_search_regex(

1966

r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),

1967

video_webpage, 'unavailable %s' % kind, default=None)

if msg:

messages.append(msg)

if messages:

return '\n'.join(messages)

1972

1973

if not video_info and not player_response:

1974

unavailable_message = extract_unavailable_message()

1975

if not unavailable_message:

1976

unavailable_message = 'Unable to extract video data'

1977

raise ExtractorError(

1978

'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)

1979

1980

if not isinstance(video_info, dict):

1981

video_info = {}

1982

1983

video_details = try_get(

1984

player_response, lambda x: x['videoDetails'], dict) or {}

1985

1986

microformat = try_get(

1987

player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}

1988

1989

video_title = video_info.get('title', [None])[0] or video_details.get('title')

1990

if not video_title:

1991

self._downloader.report_warning('Unable to extract video title')

1992

video_title = '_'

1993

1994

description_original = video_description = get_element_by_id("eow-description", video_webpage)

1995

if video_description:

1996

1997

def replace_url(m):

1998

redir_url = compat_urlparse.urljoin(url, m.group(1))

1999

parsed_redir_url = compat_urllib_parse_urlparse(redir_url)

2000

if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':

2001

qs = compat_parse_qs(parsed_redir_url.query)

q = qs.get('q')

if q and q[0]:

return q[0]

return redir_url

description_original = video_description = re.sub(r'''(?x)

2008

<a\s+

2009

(?:[a-zA-Z-]+="[^"]*"\s+)*?

2010

(?:title|href)="([^"]+)"\s+

2011

(?:[a-zA-Z-]+="[^"]*"\s+)*?

class="[^"]*"[^>]*>

[^<]+\.{3}\s*

</a>

''', replace_url, video_description)

2016

video_description = clean_html(video_description)

2017

else:

2018

video_description = video_details.get('shortDescription')

2019

if video_description is None:

2020

video_description = self._html_search_meta('description', video_webpage)

2021

2022

if not smuggled_data.get('force_singlefeed', False):

2023

if not self._downloader.params.get('noplaylist'):

2024

multifeed_metadata_list = try_get(

2025

player_response,

2026

lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],

2027

compat_str) or try_get(

2028

video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)

2029

if multifeed_metadata_list:

2030

entries = []

2031

feed_ids = []

2032

for feed in multifeed_metadata_list.split(','):

2033

# Unquote should take place before split on comma (,) since textual

2034

# fields may contain comma as well (see

2035

# https://github.com/ytdl-org/youtube-dl/issues/8536)

2036

feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))

2037

2038

def feed_entry(name):

2039

return try_get(feed_data, lambda x: x[name][0], compat_str)

2040

2041

feed_id = feed_entry('id')

2042

if not feed_id:

2043

continue

2044

feed_title = feed_entry('title')

2045

title = video_title

2046

if feed_title:

2047

title += ' (%s)' % feed_title

2048

entries.append({

2049

'_type': 'url_transparent',

2050

'ie_key': 'Youtube',

2051

'url': smuggle_url(

2052

'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),

2053

{'force_singlefeed': True}),

2054

'title': title,

2055

})

2056

feed_ids.append(feed_id)

2057

self.to_screen(

2058

'Downloading multifeed video (%s) - add --no-playlist to just download video %s'

2059

% (', '.join(feed_ids), video_id))

2060

return self.playlist_result(entries, video_id, video_title, video_description)

2061

else:

2062

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

2063

2064

if view_count is None:

2065

view_count = extract_view_count(video_info)

2066

if view_count is None and video_details:

2067

view_count = int_or_none(video_details.get('viewCount'))

2068

if view_count is None and microformat:

2069

view_count = int_or_none(microformat.get('viewCount'))

2070

2071

if is_live is None:

2072

is_live = bool_or_none(video_details.get('isLive'))

2073

2074

has_live_chat_replay = False

2075

if not is_live:

2076

yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)

2077

try:

2078

yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']

2079

has_live_chat_replay = True

2080

except (KeyError, IndexError, TypeError):

2081

pass

2082

2083

# Check for "rental" videos

2084

if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:

2085

raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)

2086

2087

def _extract_filesize(media_url):

2088

return int_or_none(self._search_regex(

2089

r'\bclen[=/](\d+)', media_url, 'filesize', default=None))

2090

2091

streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []

2092

streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])

2093

2094

if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):

2095

self.report_rtmp_download()

2096

formats = [{

2097

'format_id': '_rtmp',

2098

'protocol': 'rtmp',

2099

'url': video_info['conn'][0],

2100

'player_url': player_url,

2101

}]

2102

elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):

2103

encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]

2104

if 'rtmpe%3Dyes' in encoded_url_map:

2105

raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)

2106

formats = []

2107

formats_spec = {}

2108

fmt_list = video_info.get('fmt_list', [''])[0]

2109

if fmt_list:

2110

for fmt in fmt_list.split(','):

2111

spec = fmt.split('/')

2112

if len(spec) > 1:

2113

width_height = spec[1].split('x')

2114

if len(width_height) == 2:

2115

formats_spec[spec[0]] = {

2116

'resolution': spec[1],

2117

'width': int_or_none(width_height[0]),

2118

'height': int_or_none(width_height[1]),

2119

}

2120

for fmt in streaming_formats:

2121

itag = str_or_none(fmt.get('itag'))

2122

if not itag:

2123

continue

2124

quality = fmt.get('quality')

2125

quality_label = fmt.get('qualityLabel') or quality

2126

formats_spec[itag] = {

2127

'asr': int_or_none(fmt.get('audioSampleRate')),

2128

'filesize': int_or_none(fmt.get('contentLength')),

2129

'format_note': quality_label,

2130

'fps': int_or_none(fmt.get('fps')),

2131

'height': int_or_none(fmt.get('height')),

2132

# bitrate for itag 43 is always 2147483647

2133

'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,

2134

'width': int_or_none(fmt.get('width')),

2135

}

2136

2137

for fmt in streaming_formats:

2138

if fmt.get('drmFamilies') or fmt.get('drm_families'):

2139

continue

2140

url = url_or_none(fmt.get('url'))

2141

2142

if not url:

2143

cipher = fmt.get('cipher') or fmt.get('signatureCipher')

2144

if not cipher:

2145

continue

2146

url_data = compat_parse_qs(cipher)

2147

url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))

if not url:

continue

else:

cipher = None

url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)

2153

2154

stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))

2155

# Unsupported FORMAT_STREAM_TYPE_OTF

if stream_type == 3:

continue

format_id = fmt.get('itag') or url_data['itag'][0]

2160

if not format_id:

2161

continue

2162

format_id = compat_str(format_id)

2163

2164

if cipher:

2165

if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):

2166

ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'

2167

jsplayer_url_json = self._search_regex(

2168

ASSETS_RE,

2169

embed_webpage if age_gate else video_webpage,

2170

'JS player URL (1)', default=None)

2171

if not jsplayer_url_json and not age_gate:

2172

# We need the embed website after all

2173

if embed_webpage is None:

2174

embed_url = proto + '://www.youtube.com/embed/%s' % video_id

2175

embed_webpage = self._download_webpage(

2176

embed_url, video_id, 'Downloading embed webpage')

2177

jsplayer_url_json = self._search_regex(

2178

ASSETS_RE, embed_webpage, 'JS player URL')

2179

2180

player_url = json.loads(jsplayer_url_json)

2181

if player_url is None:

2182

player_url_json = self._search_regex(

2183

r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',

2184

video_webpage, 'age gate player URL')

2185

player_url = json.loads(player_url_json)

2186

2187

if 'sig' in url_data:

2188

url += '&signature=' + url_data['sig'][0]

2189

elif 's' in url_data:

2190

encrypted_sig = url_data['s'][0]

2191

2192

if self._downloader.params.get('verbose'):

2193

if player_url is None:

2194

player_desc = 'unknown'

2195

else:

2196

player_type, player_version = self._extract_player_info(player_url)

2197

player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)

2198

parts_sizes = self._signature_cache_id(encrypted_sig)

2199

self.to_screen('{%s} signature length %s, %s' %

2200

(format_id, parts_sizes, player_desc))

2201

2202

signature = self._decrypt_signature(

2203

encrypted_sig, video_id, player_url, age_gate)

2204

sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'

2205

url += '&%s=%s' % (sp, signature)

2206

if 'ratebypass' not in url:

2207

url += '&ratebypass=yes'

2208

2209

dct = {

2210

'format_id': format_id,

2211

'url': url,

2212

'player_url': player_url,

2213

}

2214

if format_id in self._formats:

2215

dct.update(self._formats[format_id])

2216

if format_id in formats_spec:

2217

dct.update(formats_spec[format_id])

2218

2219

# Some itags are not included in DASH manifest thus corresponding formats will

2220

# lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).

2221

# Trying to extract metadata from url_encoded_fmt_stream_map entry.

2222

mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])

2223

width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)

2224

2225

if width is None:

2226

width = int_or_none(fmt.get('width'))

2227

if height is None:

2228

height = int_or_none(fmt.get('height'))

2229

2230

filesize = int_or_none(url_data.get(

2231

'clen', [None])[0]) or _extract_filesize(url)

2232

2233

quality = url_data.get('quality', [None])[0] or fmt.get('quality')

2234

quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')

2235

2236

tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)

2237

or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None

2238

fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))

2239

2240

more_fields = {

2241

'filesize': filesize,

'tbr': tbr,

'width': width,

'height': height,

'fps': fps,

'format_note': quality_label or quality,

2247

}

2248

for key, value in more_fields.items():

2249

if value:

2250

dct[key] = value

2251

type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')

2252

if type_:

2253

type_split = type_.split(';')

2254

kind_ext = type_split[0].split('/')

2255

if len(kind_ext) == 2:

2256

kind, _ = kind_ext

2257

dct['ext'] = mimetype2ext(type_split[0])

2258

if kind in ('audio', 'video'):

2259

codecs = None

2260

for mobj in re.finditer(

2261

r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):

2262

if mobj.group('key') == 'codecs':

2263

codecs = mobj.group('val')

2264

break

2265

if codecs:

2266

dct.update(parse_codecs(codecs))

2267

if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':

2268

dct['downloader_options'] = {

2269

# Youtube throttles chunks >~10M

2270

'http_chunk_size': 10485760,

}

formats.append(dct)

else:

manifest_url = (

url_or_none(try_get(

player_response,

lambda x: x['streamingData']['hlsManifestUrl'],

2278

compat_str))

2279

or url_or_none(try_get(

2280

video_info, lambda x: x['hlsvp'][0], compat_str)))

2281

if manifest_url:

2282

formats = []

2283

m3u8_formats = self._extract_m3u8_formats(

2284

manifest_url, video_id, 'mp4', fatal=False)

2285

for a_format in m3u8_formats:

2286

itag = self._search_regex(

2287

r'/itag/(\d+)/', a_format['url'], 'itag', default=None)

2288

if itag:

2289

a_format['format_id'] = itag

2290

if itag in self._formats:

2291

dct = self._formats[itag].copy()

2292

dct.update(a_format)

2293

a_format = dct

2294

a_format['player_url'] = player_url

2295

# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming

2296

a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'

2297

if self._downloader.params.get('youtube_include_hls_manifest', True):

2298

formats.append(a_format)

2299

else:

2300

error_message = extract_unavailable_message()

2301

if not error_message:

2302

error_message = clean_html(try_get(

2303

player_response, lambda x: x['playabilityStatus']['reason'],

2304

compat_str))

2305

if not error_message:

2306

error_message = clean_html(

2307

try_get(video_info, lambda x: x['reason'][0], compat_str))

2308

if error_message:

2309

raise ExtractorError(error_message, expected=True)

2310

raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')

2311

2312

# uploader

2313

video_uploader = try_get(

2314

video_info, lambda x: x['author'][0],

2315

compat_str) or str_or_none(video_details.get('author'))

2316

if video_uploader:

2317

video_uploader = compat_urllib_parse_unquote_plus(video_uploader)

2318

else:

2319

self._downloader.report_warning('unable to extract uploader name')

2320

2321

# uploader_id

2322

video_uploader_id = None

2323

video_uploader_url = None

2324

mobj = re.search(

2325

r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',

2326

video_webpage)

2327

if mobj is not None:

2328

video_uploader_id = mobj.group('uploader_id')

2329

video_uploader_url = mobj.group('uploader_url')

2330

else:

2331

owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))

2332

if owner_profile_url:

2333

video_uploader_id = self._search_regex(

2334

r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',

2335

default=None)

2336

video_uploader_url = owner_profile_url

2337

2338

channel_id = (

2339

str_or_none(video_details.get('channelId'))

2340

or self._html_search_meta(

2341

'channelId', video_webpage, 'channel id', default=None)

2342

or self._search_regex(

2343

r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',

2344

video_webpage, 'channel id', default=None, group='id'))

2345

channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None

2346

2347

thumbnails = []

2348

thumbnails_list = try_get(

2349

video_details, lambda x: x['thumbnail']['thumbnails'], list) or []

2350

for t in thumbnails_list:

2351

if not isinstance(t, dict):

2352

continue

2353

thumbnail_url = url_or_none(t.get('url'))

2354

if not thumbnail_url:

2355

continue

2356

thumbnails.append({

2357

'url': thumbnail_url,

2358

'width': int_or_none(t.get('width')),

2359

'height': int_or_none(t.get('height')),

})

if not thumbnails:

video_thumbnail = None

2364

# We try first to get a high quality image:

2365

m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',

2366

video_webpage, re.DOTALL)

2367

if m_thumb is not None:

2368

video_thumbnail = m_thumb.group(1)

2369

thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)

2370

if thumbnail_url:

2371

video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)

2372

if video_thumbnail:

2373

thumbnails.append({'url': video_thumbnail})

2374

2375

# upload date

2376

upload_date = self._html_search_meta(

2377

'datePublished', video_webpage, 'upload date', default=None)

2378

if not upload_date:

2379

upload_date = self._search_regex(

2380

[r'(?s)id="eow-date.*?>(.*?)</span>',

2381

r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],

2382

video_webpage, 'upload date', default=None)

2383

if not upload_date:

2384

upload_date = microformat.get('publishDate') or microformat.get('uploadDate')

2385

upload_date = unified_strdate(upload_date)

2386

2387

video_license = self._html_search_regex(

2388

r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',

2389

video_webpage, 'license', default=None)

m_music = re.search(

r'''(?x)

<h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*

<ul[^>]*>\s*

<li>(?P<title>.+?)

by (?P<creator>.+?)

(?:

$.+?$|

<a[^>]*

(?:

\bhref=["\']/red[^>]*>| # drop possible

2402

>\s*Listen ad-free with YouTube Red # YouTube Red ad

)

.*?

)?</li

''',

video_webpage)

if m_music:

video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))

2410

video_creator = clean_html(m_music.group('creator'))

2411

else:

2412

video_alt_title = video_creator = None

2413

2414

def extract_meta(field):

2415

return self._html_search_regex(

2416

r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,

2417

video_webpage, field, default=None)

2418

2419

track = extract_meta('Song')

2420

artist = extract_meta('Artist')

2421

album = extract_meta('Album')

2422

2423

# Youtube Music Auto-generated description

2424

release_date = release_year = None

2425

if video_description:

2426

mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)

2427

if mobj:

2428

if not track:

2429

track = mobj.group('track').strip()

2430

if not artist:

2431

artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))

2432

if not album:

2433

album = mobj.group('album'.strip())

2434

release_year = mobj.group('release_year')

2435

release_date = mobj.group('release_date')

2436

if release_date:

2437

release_date = release_date.replace('-', '')

2438

if not release_year:

2439

release_year = int(release_date[:4])

2440

if release_year:

2441

release_year = int(release_year)

2442

2443

yt_initial = self._get_yt_initial_data(video_id, video_webpage)

2444

if yt_initial:

2445

music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)

2446

if len(music_metadata):

2447

album = music_metadata[0].get('album')

2448

artist = music_metadata[0].get('artist')

2449

track = music_metadata[0].get('track')

2450

2451

m_episode = re.search(

2452

r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

2453

video_webpage)

2454

if m_episode:

2455

series = unescapeHTML(m_episode.group('series'))

2456

season_number = int(m_episode.group('season'))

2457

episode_number = int(m_episode.group('episode'))

2458

else:

2459

series = season_number = episode_number = None

2460

2461

m_cat_container = self._search_regex(

2462

r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',

2463

video_webpage, 'categories', default=None)

2464

category = None

2465

if m_cat_container:

2466

category = self._html_search_regex(

2467

r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',

default=None)

if not category:

category = try_get(

microformat, lambda x: x['category'], compat_str)

2472

video_categories = None if category is None else [category]

2473

2474

video_tags = [

2475

unescapeHTML(m.group('content'))

2476

for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]

2477

if not video_tags:

2478

video_tags = try_get(video_details, lambda x: x['keywords'], list)

2479

2480

def _extract_count(count_name):

2481

return str_to_int(self._search_regex(

2482

r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'

2483

% re.escape(count_name),

2484

video_webpage, count_name, default=None))

2485

2486

like_count = _extract_count('like')

2487

dislike_count = _extract_count('dislike')

2488

2489

if view_count is None:

2490

view_count = str_to_int(self._search_regex(

2491

r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,

2492

'view count', default=None))

2493

2494

average_rating = (

2495

float_or_none(video_details.get('averageRating'))

2496

or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))

2497

2498

# subtitles

2499

video_subtitles = self.extract_subtitles(

2500

video_id, video_webpage, has_live_chat_replay)

2501

automatic_captions = self.extract_automatic_captions(video_id, video_webpage)

2502

2503

video_duration = try_get(

2504

video_info, lambda x: int_or_none(x['length_seconds'][0]))

2505

if not video_duration:

2506

video_duration = int_or_none(video_details.get('lengthSeconds'))

2507

if not video_duration:

2508

video_duration = parse_duration(self._html_search_meta(

2509

'duration', video_webpage, 'video duration'))

2510

2511

# Get Subscriber Count of channel

2512

subscriber_count = parse_count(self._search_regex(

2513

r'"text":"([\d\.]+\w?) subscribers"',

video_webpage,

'subscriber count',

default=None

))

# annotations

video_annotations = None

2521

if self._downloader.params.get('writeannotations', False):

2522

xsrf_token = self._search_regex(

2523

r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',

2524

video_webpage, 'xsrf token', group='xsrf_token', fatal=False)

2525

invideo_url = try_get(

2526

player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)

2527

if xsrf_token and invideo_url:

2528

xsrf_field_name = self._search_regex(

2529

r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',

2530

video_webpage, 'xsrf field name',

2531

group='xsrf_field_name', default='session_token')

2532

video_annotations = self._download_webpage(

2533

self._proto_relative_url(invideo_url),

2534

video_id, note='Downloading annotations',

2535

errnote='Unable to download video annotations', fatal=False,

2536

data=urlencode_postdata({xsrf_field_name: xsrf_token}))

2537

2538

chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)

2539

2540

# Look for the DASH manifest

2541

if self._downloader.params.get('youtube_include_dash_manifest', True):

2542

dash_mpd_fatal = True

2543

for mpd_url in dash_mpds:

2544

dash_formats = {}

2545

try:

2546

def decrypt_sig(mobj):

2547

s = mobj.group(1)

2548

dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)

2549

return '/signature/%s' % dec_s

2550

2551

mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)

2552

2553

for df in self._extract_mpd_formats(

2554

mpd_url, video_id, fatal=dash_mpd_fatal,

2555

formats_dict=self._formats):

2556

if not df.get('filesize'):

2557

df['filesize'] = _extract_filesize(df['url'])

2558

# Do not overwrite DASH format found in some previous DASH manifest

2559

if df['format_id'] not in dash_formats:

2560

dash_formats[df['format_id']] = df

2561

# Additional DASH manifests may end up in HTTP Error 403 therefore

2562

# allow them to fail without bug report message if we already have

2563

# some DASH manifest succeeded. This is temporary workaround to reduce

2564

# burst of bug reports until we figure out the reason and whether it

2565

# can be fixed at all.

2566

dash_mpd_fatal = False

2567

except (ExtractorError, KeyError) as e:

2568

self.report_warning(

2569

'Skipping DASH manifest: %r' % e, video_id)

2570

if dash_formats:

2571

# Remove the formats we found through non-DASH, they

2572

# contain less info and it can be wrong, because we use

2573

# fixed values (for example the resolution). See

2574

# https://github.com/ytdl-org/youtube-dl/issues/5774 for an

2575

# example.

2576

formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]

2577

formats.extend(dash_formats.values())

2578

2579

# Check for malformed aspect ratio

2580

stretched_m = re.search(

2581

r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',

2582

video_webpage)

2583

if stretched_m:

2584

w = float(stretched_m.group('w'))

2585

h = float(stretched_m.group('h'))

2586

# yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).

2587

# We will only process correct ratios.

if w > 0 and h > 0:

ratio = w / h

for f in formats:

if f.get('vcodec') != 'none':

2592

f['stretched_ratio'] = ratio

2593

2594

if not formats:

2595

if 'reason' in video_info:

2596

if 'The uploader has not made this video available in your country.' in video_info['reason']:

2597

regions_allowed = self._html_search_meta(

2598

'regionsAllowed', video_webpage, default=None)

2599

countries = regions_allowed.split(',') if regions_allowed else None

2600

self.raise_geo_restricted(

2601

msg=video_info['reason'][0], countries=countries)

2602

reason = video_info['reason'][0]

2603

if 'Invalid parameters' in reason:

2604

unavailable_message = extract_unavailable_message()

2605

if unavailable_message:

2606

reason = unavailable_message

2607

raise ExtractorError(

2608

'YouTube said: %s' % reason,

2609

expected=True, video_id=video_id)

2610

if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):

2611

raise ExtractorError('This video is DRM protected.', expected=True)

2612

2613

self._sort_formats(formats)

2614

2615

self.mark_watched(video_id, video_info, player_response)

return {

'id': video_id,

'uploader': video_uploader,

2620

'uploader_id': video_uploader_id,

2621

'uploader_url': video_uploader_url,

2622

'channel_id': channel_id,

2623

'channel_url': channel_url,

2624

'upload_date': upload_date,

2625

'license': video_license,

2626

'creator': video_creator or artist,

2627

'title': video_title,

2628

'alt_title': video_alt_title or track,

2629

'thumbnails': thumbnails,

2630

'description': video_description,

2631

'categories': video_categories,

2632

'tags': video_tags,

2633

'subtitles': video_subtitles,

2634

'automatic_captions': automatic_captions,

2635

'duration': video_duration,

2636

'age_limit': 18 if age_gate else 0,

2637

'annotations': video_annotations,

2638

'chapters': chapters,

2639

'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,

2640

'view_count': view_count,

2641

'like_count': like_count,

2642

'dislike_count': dislike_count,

2643

'average_rating': average_rating,

2644

'formats': formats,

2645

'is_live': is_live,

2646

'start_time': start_time,

2647

'end_time': end_time,

2648

'series': series,

2649

'season_number': season_number,

2650

'episode_number': episode_number,

'track': track,

'artist': artist,

'album': album,

'release_date': release_date,

2655

'release_year': release_year,

2656

'subscriber_count': subscriber_count,

}

class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):

2661

IE_DESC = 'YouTube.com playlists'

2662

_VALID_URL = r"""(?x)(?:

(?:https?://)?

(?:\w+\.)?

(?:

(?:

youtube(?:kids)?\.com|

invidio\.us

)

/

(?:

(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))

2673

\? (?:.*?[&;])*? (?:p|a|list)=

2674

| p/

2675

)|

2676

youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=

2677

)

2678

(

2679

(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}

2680

# Top tracks, they can also include dots

|(?:MC)[\w\.]*

)

.*

|

(%(playlist_id)s)

)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}

2687

_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'

2688

_VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'

2689

_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'

2690

IE_NAME = 'youtube:playlist'

2691

_YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'

2692

_YTM_CHANNEL_INFO = {

2693

'uploader': 'Youtube Music',

2694

'uploader_id': 'music', # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"

2695

'uploader_url': 'https://www.youtube.com/music'

2696

}

2697

_TESTS = [{

2698

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2699

'info_dict': {

2700

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2701

'uploader': 'Sergey M.',

2702

'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',

2703

'title': 'youtube-dl public playlist',

},

'playlist_count': 1,

}, {

'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2708

'info_dict': {

2709

'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',

2710

'uploader': 'Sergey M.',

2711

'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',

2712

'title': 'youtube-dl empty playlist',

},

'playlist_count': 0,

}, {

'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',

2717

'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2718

'info_dict': {

2719

'title': '29C3: Not my department',

2720

'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',

2721

'uploader': 'Christiaan008',

2722

'uploader_id': 'ChRiStIaAn008',

2723

},

2724

'playlist_count': 96,

2725

}, {

2726

'note': 'issue #673',

2727

'url': 'PLBB231211A4F62143',

2728

'info_dict': {

2729

'title': '[OLD]Team Fortress 2 (Class-based LP)',

2730

'id': 'PLBB231211A4F62143',

2731

'uploader': 'Wickydoo',

2732

'uploader_id': 'Wickydoo',

2733

},

2734

'playlist_mincount': 26,

2735

}, {

2736

'note': 'Large playlist',

2737

'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',

2738

'info_dict': {

2739

'title': 'Uploads from Cauchemar',

2740

'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',

2741

'uploader': 'Cauchemar',

2742

'uploader_id': 'Cauchemar89',

2743

},

2744

'playlist_mincount': 799,

2745

}, {

2746

'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2747

'info_dict': {

2748

'title': 'YDL_safe_search',

2749

'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',

2750

},

2751

'playlist_count': 2,

2752

'skip': 'This playlist is private',

2753

}, {

2754

'note': 'embedded',

2755

'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

'playlist_count': 4,

'info_dict': {

'title': 'JODA15',

'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',

2760

'uploader': 'milan',

2761

'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',

2762

}

2763

}, {

2764

'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2765

'playlist_mincount': 485,

2766

'info_dict': {

2767

'title': '2018 Chinese New Singles (11/6 updated)',

2768

'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',

2769

'uploader': 'LBK',

2770

'uploader_id': 'sdragonfang',

2771

}

2772

}, {

2773

'note': 'Embedded SWF player',

2774

'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',

'playlist_count': 4,

'info_dict': {

'title': 'JODA7',

'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',

2779

},

2780

'skip': 'This playlist does not exist',

2781

}, {

2782

'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',

2783

'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',

2784

'info_dict': {

2785

'title': 'Uploads from Interstellar Movie',

2786

'id': 'UUXw-G3eDE9trcvY2sBMM_aA',

2787

'uploader': 'Interstellar Movie',

2788

'uploader_id': 'InterstellarMovie1',

2789

},

2790

'playlist_mincount': 21,

2791

}, {

2792

# Playlist URL that does not actually serve a playlist

2793

'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',

'info_dict': {

'id': 'FqZTN594JQw',

'ext': 'webm',

'title': "Smiley's People 01 detective, Adventure Series, Action",

2798

'uploader': 'STREEM',

2799

'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',

2800

'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',

2801

'upload_date': '20150526',

2802

'license': 'Standard YouTube License',

2803

'description': 'md5:507cdcb5a49ac0da37a920ece610be80',

2804

'categories': ['People & Blogs'],

'tags': list,

'view_count': int,

'like_count': int,

'dislike_count': int,

2809

},

2810

'params': {

2811

'skip_download': True,

2812

},

2813

'skip': 'This video is not available.',

2814

'add_ie': [YoutubeIE.ie_key()],

2815

}, {

2816

'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',

'info_dict': {

'id': 'yeWKywCrFtk',

'ext': 'mp4',

'title': 'Small Scale Baler and Braiding Rugs',

2821

'uploader': 'Backus-Page House Museum',

2822

'uploader_id': 'backuspagemuseum',

2823

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',

2824

'upload_date': '20161008',

2825

'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',

2826

'categories': ['Nonprofits & Activism'],

2827

'tags': list,

2828

'like_count': int,

2829

'dislike_count': int,

},

'params': {

'noplaylist': True,

'skip_download': True,

2834

},

2835

}, {

2836

# https://github.com/ytdl-org/youtube-dl/issues/21844

2837

'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2838

'info_dict': {

2839

'title': 'Data Analysis with Dr Mike Pound',

2840

'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',

2841

'uploader_id': 'Computerphile',

2842

'uploader': 'Computerphile',

2843

},

2844

'playlist_mincount': 11,

2845

}, {

2846

'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',

2847

'only_matching': True,

2848

}, {

2849

'url': 'TLGGrESM50VT6acwMjAyMjAxNw',

2850

'only_matching': True,

2851

}, {

2852

# music album playlist

2853

'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',

2854

'only_matching': True,

2855

}, {

2856

'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',

2857

'only_matching': True,

2858

}, {

2859

'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',

2860

'only_matching': True,

2861

}]

2862

2863

def _real_initialize(self):

2864

self._login()

2865

2866

def extract_videos_from_page(self, page):

ids_in_page = []

titles_in_page = []

for item in re.findall(

2871

r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):

2872

attrs = extract_attributes(item)

2873

video_id = attrs['data-video-id']

2874

video_title = unescapeHTML(attrs.get('data-title'))

2875

if video_title:

2876

video_title = video_title.strip()

2877

ids_in_page.append(video_id)

2878

titles_in_page.append(video_title)

2879

2880

# Fallback with old _VIDEO_RE

2881

self.extract_videos_from_page_impl(

2882

self._VIDEO_RE, page, ids_in_page, titles_in_page)

2883

2884

# Relaxed fallbacks

2885

self.extract_videos_from_page_impl(

2886

r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,

2887

ids_in_page, titles_in_page)

2888

self.extract_videos_from_page_impl(

2889

r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,

2890

ids_in_page, titles_in_page)

2891

2892

return zip(ids_in_page, titles_in_page)

2893

2894

def _extract_mix_ids_from_yt_initial(self, yt_initial):

2895

ids = []

2896

playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)

2897

if playlist_contents:

2898

for item in playlist_contents:

2899

videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)

if videoId:

ids.append(videoId)

return ids

def _extract_mix(self, playlist_id):

2905

# The mixes are generated from a single video

2906

# the id of the playlist is just 'RD' + video_id

2907

ids = []

2908

yt_initial = None

2909

last_id = playlist_id[-11:]

2910

for n in itertools.count(1):

2911

url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)

2912

webpage = self._download_webpage(

2913

url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))

2914

new_ids = orderedSet(re.findall(

2915

r'''(?xs)data-video-username=".*?".*?

2916

href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id),

2917

webpage))

2918

2919

# if no ids in html of page, try using embedded json

2920

if (len(new_ids) == 0):

2921

yt_initial = self._get_yt_initial_data(playlist_id, webpage)

2922

if yt_initial:

2923

new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)

2924

2925

# Fetch new pages until all the videos are repeated, it seems that

2926

# there are always 51 unique videos.

2927

new_ids = [_id for _id in new_ids if _id not in ids]

if not new_ids:

break

ids.extend(new_ids)

last_id = ids[-1]

url_results = self._ids_to_results(ids)

2934

2935

search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)

2936

title_span = (

2937

search_title('playlist-title')

2938

or search_title('title long-title')

2939

or search_title('title'))

2940

title = clean_html(title_span)

2941

2942

if not title:

2943

title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)

2944

2945

return self.playlist_result(url_results, playlist_id, title)

2946

2947

def _extract_playlist(self, playlist_id):

2948

url = self._TEMPLATE_URL % playlist_id

2949

page = self._download_webpage(url, playlist_id)

2950

2951

# the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)

2952

for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):

2953

match = match.strip()

2954

# Check if the playlist exists or is private

2955

mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)

2956

if mobj:

2957

reason = mobj.group('reason')

2958

message = 'This playlist %s' % reason

2959

if 'private' in reason:

2960

message += ', use --username or --netrc to access it'

2961

message += '.'

2962

raise ExtractorError(message, expected=True)

2963

elif re.match(r'[^<]*Invalid parameters[^<]*', match):

2964

raise ExtractorError(

2965

'Invalid parameters. Maybe URL is incorrect.',

2966

expected=True)

2967

elif re.match(r'[^<]*Choose your language[^<]*', match):

2968

continue

2969

else:

2970

self.report_warning('Youtube gives an alert message: ' + match)

2971

2972

playlist_title = self._html_search_regex(

2973

r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',

2974

page, 'title', default=None)

2975

2976

_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='

2977

uploader = self._html_search_regex(

2978

r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,

2979

page, 'uploader', default=None)

2980

mobj = re.search(

2981

r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,

2982

page)

2983

if mobj:

2984

uploader_id = mobj.group('uploader_id')

2985

uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))

2986

else:

2987

uploader_id = uploader_url = None

has_videos = True

if not playlist_title:

2992

try:

2993

# Some playlist URLs don't actually serve a playlist (e.g.

2994

# https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)

2995

next(self._entries(page, playlist_id))

2996

except StopIteration:

2997

has_videos = False

2998

2999

playlist = self.playlist_result(

3000

self._entries(page, playlist_id), playlist_id, playlist_title)

3001

playlist.update({

3002

'uploader': uploader,

3003

'uploader_id': uploader_id,

3004

'uploader_url': uploader_url,

3005

})

3006

if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):

3007

playlist.update(self._YTM_CHANNEL_INFO)

3008

3009

return has_videos, playlist

3010

3011

def _check_download_just_video(self, url, playlist_id):

3012

# Check if it's a video-specific URL

3013

query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)

3014

video_id = query_dict.get('v', [None])[0] or self._search_regex(

3015

r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,

3016

'video id', default=None)

3017

if video_id:

3018

if self._downloader.params.get('noplaylist'):

3019

self.to_screen('Downloading just video %s because of --no-playlist' % video_id)

3020

return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)

3021

else:

3022

self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))

3023

return video_id, None

3024

return None, None

3025

3026

def _real_extract(self, url):

3027

# Extract playlist id

3028

mobj = re.match(self._VALID_URL, url)

3029

if mobj is None:

3030

raise ExtractorError('Invalid URL: %s' % url)

3031

playlist_id = mobj.group(1) or mobj.group(2)

3032

3033

video_id, video = self._check_download_just_video(url, playlist_id)

if video:

return video

if playlist_id.startswith(('RD', 'UL', 'PU')):

3038

if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):

3039

# Mixes require a custom extraction process,

3040

# Youtube Music playlists act like normal playlists (with randomized order)

3041

return self._extract_mix(playlist_id)

3042

3043

has_videos, playlist = self._extract_playlist(playlist_id)

3044

if has_videos or not video_id:

3045

return playlist

3046

3047

# Some playlist URLs don't actually serve a playlist (see

3048

# https://github.com/ytdl-org/youtube-dl/issues/10537).

3049

# Fallback to plain video extraction if there is a video id

3050

# along with playlist id.

3051

return self.url_result(video_id, 'Youtube', video_id=video_id)

3052

3053

3054

class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):

3055

IE_DESC = 'YouTube.com channels'

3056

_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'

3057

_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'

3058

_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'

3059

IE_NAME = 'youtube:channel'

3060

_TESTS = [{

3061

'note': 'paginated channel',

3062

'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',

3063

'playlist_mincount': 91,

3064

'info_dict': {

3065

'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',

3066

'title': 'Uploads from lex will',

3067

'uploader': 'lex will',

3068

'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',

3069

}

3070

}, {

3071

'note': 'Age restricted channel',

3072

# from https://www.youtube.com/user/DeusExOfficial

3073

'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',

3074

'playlist_mincount': 64,

3075

'info_dict': {

3076

'id': 'UUs0ifCMCm1icqRbqhUINa0w',

3077

'title': 'Uploads from Deus Ex',

3078

'uploader': 'Deus Ex',

3079

'uploader_id': 'DeusExOfficial',

3080

},

3081

}, {

3082

'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',

3083

'only_matching': True,

3084

}, {

3085

'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',

3086

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3091

return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)

3092

else super(YoutubeChannelIE, cls).suitable(url))

3093

3094

def _build_template_url(self, url, channel_id):

3095

return self._TEMPLATE_URL % channel_id

3096

3097

def _real_extract(self, url):

3098

channel_id = self._match_id(url)

3099

3100

url = self._build_template_url(url, channel_id)

3101

3102

# Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)

3103

# Workaround by extracting as a playlist if managed to obtain channel playlist URL

3104

# otherwise fallback on channel by page extraction

3105

channel_page = self._download_webpage(

3106

url + '?view=57', channel_id,

3107

'Downloading channel page', fatal=False)

3108

if channel_page is False:

3109

channel_playlist_id = False

3110

else:

3111

channel_playlist_id = self._html_search_meta(

3112

'channelId', channel_page, 'channel id', default=None)

3113

if not channel_playlist_id:

3114

channel_url = self._html_search_meta(

3115

('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),

3116

channel_page, 'channel url', default=None)

3117

if channel_url:

3118

channel_playlist_id = self._search_regex(

3119

r'vnd\.youtube://user/([0-9A-Za-z_-]+)',

3120

channel_url, 'channel id', default=None)

3121

if channel_playlist_id and channel_playlist_id.startswith('UC'):

3122

playlist_id = 'UU' + channel_playlist_id[2:]

3123

return self.url_result(

3124

compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')

3125

3126

channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')

3127

autogenerated = re.search(r'''(?x)

3128

class="[^"]*?(?:

3129

channel-header-autogenerated-label|

3130

yt-channel-title-autogenerated

3131

)[^"]*"''', channel_page) is not None

3132

3133

if autogenerated:

3134

# The videos are contained in a single page

3135

# the ajax pages can't be used, they are empty

3136

entries = [

3137

self.url_result(

3138

video_id, 'Youtube', video_id=video_id,

3139

video_title=video_title)

3140

for video_id, video_title in self.extract_videos_from_page(channel_page)]

3141

return self.playlist_result(entries, channel_id)

3142

3143

try:

3144

next(self._entries(channel_page, channel_id))

3145

except StopIteration:

3146

alert_message = self._html_search_regex(

3147

r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',

3148

channel_page, 'alert', default=None, group='alert')

3149

if alert_message:

3150

raise ExtractorError('Youtube said: %s' % alert_message, expected=True)

3151

3152

return self.playlist_result(self._entries(channel_page, channel_id), channel_id)

3153

3154

3155

class YoutubeUserIE(YoutubeChannelIE):

3156

IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'

3157

3158

_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'

3159

IE_NAME = 'youtube:user'

3160

3161

_TESTS = [{

3162

'url': 'https://www.youtube.com/user/TheLinuxFoundation',

3163

'playlist_mincount': 320,

3164

'info_dict': {

3165

'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',

3166

'title': 'Uploads from The Linux Foundation',

3167

'uploader': 'The Linux Foundation',

3168

'uploader_id': 'TheLinuxFoundation',

3169

}

3170

}, {

3171

# Only available via https://www.youtube.com/c/12minuteathlete/videos

3172

# but not https://www.youtube.com/user/12minuteathlete/videos

3173

'url': 'https://www.youtube.com/c/12minuteathlete/videos',

3174

'playlist_mincount': 249,

3175

'info_dict': {

3176

'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',

3177

'title': 'Uploads from 12 Minute Athlete',

3178

'uploader': '12 Minute Athlete',

3179

'uploader_id': 'the12minuteathlete',

3180

}

3181

}, {

3182

'url': 'ytuser:phihag',

3183

'only_matching': True,

3184

}, {

3185

'url': 'https://www.youtube.com/c/gametrailers',

3186

'only_matching': True,

3187

}, {

3188

'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',

3189

'only_matching': True,

3190

}, {

3191

'url': 'https://www.youtube.com/gametrailers',

3192

'only_matching': True,

3193

}, {

3194

# This channel is not available, geo restricted to JP

3195

'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',

3196

'only_matching': True,

}]

@classmethod

def suitable(cls, url):

3201

# Don't return True if the url can be extracted with other youtube

3202

# extractor, the regex would is too permissive and it would match.

3203

other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)

3204

if any(ie.suitable(url) for ie in other_yt_ies):

3205

return False

3206

else:

3207

return super(YoutubeUserIE, cls).suitable(url)

3208

3209

def _build_template_url(self, url, channel_id):

3210

mobj = re.match(self._VALID_URL, url)

3211

return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))

3212

3213

3214

class YoutubeLiveIE(YoutubeBaseInfoExtractor):

3215

IE_DESC = 'YouTube.com live streams'

3216

_VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'

3217

IE_NAME = 'youtube:live'

3218

3219

_TESTS = [{

3220

'url': 'https://www.youtube.com/user/TheYoungTurks/live',

'info_dict': {

'id': 'a48o2S1cPoo',

'ext': 'mp4',

'title': 'The Young Turks - Live Main Show',

3225

'uploader': 'The Young Turks',

3226

'uploader_id': 'TheYoungTurks',

3227

'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',

3228

'upload_date': '20150715',

3229

'license': 'Standard YouTube License',

3230

'description': 'md5:438179573adcdff3c97ebb1ee632b891',

3231

'categories': ['News & Politics'],

3232

'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],

3233

'like_count': int,

3234

'dislike_count': int,

3235

},

3236

'params': {

3237

'skip_download': True,

3238

},

3239

}, {

3240

'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',

3241

'only_matching': True,

3242

}, {

3243

'url': 'https://www.youtube.com/c/CommanderVideoHq/live',

3244

'only_matching': True,

3245

}, {

3246

'url': 'https://www.youtube.com/TheYoungTurks/live',

3247

'only_matching': True,

3248

}]

3249

3250

def _real_extract(self, url):

3251

mobj = re.match(self._VALID_URL, url)

3252

channel_id = mobj.group('id')

3253

base_url = mobj.group('base_url')

3254

webpage = self._download_webpage(url, channel_id, fatal=False)

3255

if webpage:

3256

page_type = self._og_search_property(

3257

'type', webpage, 'page type', default='')

3258

video_id = self._html_search_meta(

3259

'videoId', webpage, 'video id', default=None)

3260

if page_type.startswith('video') and video_id and re.match(

3261

r'^[0-9A-Za-z_-]{11}$', video_id):

3262

return self.url_result(video_id, YoutubeIE.ie_key())

3263

return self.url_result(base_url)

3264

3265

3266

class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):

3267

IE_DESC = 'YouTube.com user/channel playlists'

3268

_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'

3269

IE_NAME = 'youtube:playlists'

3270

3271

_TESTS = [{

3272

'url': 'https://www.youtube.com/user/ThirstForScience/playlists',

3273

'playlist_mincount': 4,

3274

'info_dict': {

3275

'id': 'ThirstForScience',

3276

'title': 'ThirstForScience',

3277

},

3278

}, {

3279

# with "Load more" button

3280

'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',

3281

'playlist_mincount': 70,

3282

'info_dict': {

3283

'id': 'igorkle1',

3284

'title': 'Игорь Клейнер',

3285

},

3286

}, {

3287

'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',

3288

'playlist_mincount': 17,

3289

'info_dict': {

3290

'id': 'UCiU1dHvZObB2iP6xkJ__Icw',

3291

'title': 'Chem Player',

},

'skip': 'Blocked',

}, {

'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',

3296

'only_matching': True,

}]

class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):

3301

IE_DESC = 'YouTube.com searches'

3302

# there doesn't appear to be a real limit, for example if you search for

3303

# 'python' you get more than 8.000.000 results

3304

_MAX_RESULTS = float('inf')

3305

IE_NAME = 'youtube:search'

3306

_SEARCH_KEY = 'ytsearch'

3307

_SEARCH_PARAMS = None

3308

_TESTS = []

3309

3310

def _entries(self, query, n):

data = {

'context': {

'client': {

'clientName': 'WEB',

'clientVersion': '2.20201021.03.00',

}

},

'query': query,

}

if self._SEARCH_PARAMS:

3321

data['params'] = self._SEARCH_PARAMS

3322

total = 0

3323

for page_num in itertools.count(1):

3324

search = self._download_json(

3325

'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',

3326

video_id='query "%s"' % query,

3327

note='Downloading page %s' % page_num,

3328

errnote='Unable to download API page', fatal=False,

3329

data=json.dumps(data).encode('utf8'),

3330

headers={'content-type': 'application/json'})

3331

if not search:

3332

break

3333

slr_contents = try_get(

3334

search,

3335

(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],

3336

lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),

list)

if not slr_contents:

break

isr_contents = try_get(

3341

slr_contents,

3342

lambda x: x[0]['itemSectionRenderer']['contents'],

list)

if not isr_contents:

break

for content in isr_contents:

3347

if not isinstance(content, dict):

3348

continue

3349

video = content.get('videoRenderer')

3350

if not isinstance(video, dict):

3351

continue

3352

video_id = video.get('videoId')

3353

if not video_id:

3354

continue

3355

title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)

3356

description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)

3357

duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))

3358

view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''

3359

view_count = int_or_none(self._search_regex(

3360

r'^(\d+)', re.sub(r'\s', '', view_count_text),

3361

'view count', default=None))

3362

uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)

3363

total += 1

3364

yield {

3365

'_type': 'url_transparent',

3366

'ie_key': YoutubeIE.ie_key(),

'id': video_id,

'url': video_id,

'title': title,

'description': description,

3371

'duration': duration,

3372

'view_count': view_count,

3373

'uploader': uploader,

}

if total == n:

return

token = try_get(

slr_contents,

lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],

compat_str)

if not token:

break

data['continuation'] = token

3384

3385

def _get_n_results(self, query, n):

3386

"""Get a specified number of results for a query"""

3387

return self.playlist_result(self._entries(query, n), query)

3388

3389

3390

class YoutubeSearchDateIE(YoutubeSearchIE):

3391

IE_NAME = YoutubeSearchIE.IE_NAME + ':date'

3392

_SEARCH_KEY = 'ytsearchdate'

3393

IE_DESC = 'YouTube.com searches, newest videos first'

3394

_SEARCH_PARAMS = 'CAI%3D'

3395

3396

3397

class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):

3398

IE_DESC = 'YouTube.com search URLs'

3399

IE_NAME = 'youtube:search_url'

3400

_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'

3401

_TESTS = [{

3402

'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',

3403

'playlist_mincount': 5,

3404

'info_dict': {

3405

'title': 'youtube-dl test video',

3406

}

3407

}, {

3408

'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',

3409

'only_matching': True,

3410

}]

3411

3412

def _process_json_dict(self, obj, videos, c):

if "videoId" in obj:

videos.append(obj)

return

if "nextContinuationData" in obj:

3418

c["continuation"] = obj["nextContinuationData"]

3419

return

3420

3421

def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):

3422

search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)

3423

3424

result_items = self._find_videos_in_json(search_response)

3425

3426

for renderer in result_items:

3427

video_id = try_get(renderer, lambda x: x['videoId'])

3428

video_title = try_get(renderer, lambda x: x['title']['runs'][0]['text']) or try_get(renderer, lambda x: x['title']['simpleText'])

3429

3430

if video_id is None or video_title is None:

3431

# we do not have a videoRenderer or title extraction broke

3432

continue

3433

3434

video_title = video_title.strip()

3435

3436

try:

3437

idx = ids_in_page.index(video_id)

3438

if video_title and not titles_in_page[idx]:

3439

titles_in_page[idx] = video_title

3440

except ValueError:

3441

ids_in_page.append(video_id)

3442

titles_in_page.append(video_title)

3443

3444

def extract_videos_from_page(self, page):

3445

ids_in_page = []

3446

titles_in_page = []

3447

self.extract_videos_from_page_impl(page, ids_in_page, titles_in_page)

3448

return zip(ids_in_page, titles_in_page)

3449

3450

def _real_extract(self, url):

3451

mobj = re.match(self._VALID_URL, url)

3452

query = compat_urllib_parse_unquote_plus(mobj.group('query'))

3453

webpage = self._download_webpage(url, query)

3454

# data_json = self._process_initial_data(webpage)

3455

return self.playlist_result(self._entries(webpage, query, n=5), playlist_title=query)

3456

3457

3458

class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):

3459

IE_DESC = 'YouTube.com (multi-season) shows'

3460

_VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'

3461

IE_NAME = 'youtube:show'

3462

_TESTS = [{

3463

'url': 'https://www.youtube.com/show/airdisasters',

3464

'playlist_mincount': 5,

3465

'info_dict': {

3466

'id': 'airdisasters',

3467

'title': 'Air Disasters',

}

}]

def _real_extract(self, url):

3472

playlist_id = self._match_id(url)

3473

return super(YoutubeShowIE, self)._real_extract(

3474

'https://www.youtube.com/show/%s/playlists' % playlist_id)

3475

3476

3477

class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):

3478

"""

3479

Base class for feed extractors

3480

Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.

3481

"""

3482

_LOGIN_REQUIRED = True

@property

def IE_NAME(self):

return 'youtube:%s' % self._FEED_NAME

3487

3488

def _real_initialize(self):

3489

self._login()

3490

3491

def _process_entries(self, entries, seen):

3492

new_info = []

3493

for v in entries:

3494

v_id = try_get(v, lambda x: x['videoId'])

if not v_id:

continue

have_video = False

for old in seen:

if old['videoId'] == v_id:

have_video = True

break

if not have_video:

new_info.append(v)

if not new_info:

return

seen.extend(new_info)

3511

for video in new_info:

3512

yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))

3513

3514

def _real_extract(self, url):

3515

page = self._download_webpage(

3516

'https://www.youtube.com/feed/%s' % self._FEED_NAME,

3517

self._PLAYLIST_TITLE)

3518

return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),

3519

playlist_title=self._PLAYLIST_TITLE)

3520

3521

3522

class YoutubeWatchLaterIE(YoutubePlaylistIE):

3523

IE_NAME = 'youtube:watchlater'

3524

IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'

3525

_VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'

3526

3527

_TESTS = [{

3528

'url': 'https://www.youtube.com/playlist?list=WL',

3529

'only_matching': True,

3530

}, {

3531

'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',

3532

'only_matching': True,

3533

}]

3534

3535

def _real_extract(self, url):

3536

_, video = self._check_download_just_video(url, 'WL')

3537

if video:

3538

return video

3539

_, playlist = self._extract_playlist('WL')

return playlist

class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):

3544

IE_NAME = 'youtube:favorites'

3545

IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'

3546

_VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'

3547

_LOGIN_REQUIRED = True

3548

3549

def _real_extract(self, url):

3550

webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')

3551

playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')

3552

return self.url_result(playlist_id, 'YoutubePlaylist')

3553

3554

3555

class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):

3556

IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'

3557

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'

3558

_FEED_NAME = 'recommended'

3559

_PLAYLIST_TITLE = 'Youtube Recommended videos'

3560

3561

3562

class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):

3563

IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'

3564

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'

3565

_FEED_NAME = 'subscriptions'

3566

_PLAYLIST_TITLE = 'Youtube Subscriptions'

3567

3568

3569

class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):

3570

IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'

3571

_VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'

3572

_FEED_NAME = 'history'

3573

_PLAYLIST_TITLE = 'Youtube History'

3574

3575

3576

class YoutubeTruncatedURLIE(InfoExtractor):

3577

IE_NAME = 'youtube:truncated_url'

3578

IE_DESC = False # Do not list

3579

_VALID_URL = r'''(?x)

3580

(?:https?://)?

3581

(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/

3582

(?:watch\?(?:

3583

feature=[a-z_]+|

3584

annotation_id=annotation_[^&]+|

x-yt-cl=[0-9]+|

hl=[^&]*|

t=[0-9]+

)?

|

attribution_link\?a=[^&]+

)

$

'''

_TESTS = [{

'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',

3597

'only_matching': True,

3598

}, {

3599

'url': 'https://www.youtube.com/watch?',

3600

'only_matching': True,

3601

}, {

3602

'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',

3603

'only_matching': True,

3604

}, {

3605

'url': 'https://www.youtube.com/watch?feature=foo',

3606

'only_matching': True,

3607

}, {

3608

'url': 'https://www.youtube.com/watch?hl=en-GB',

3609

'only_matching': True,

3610

}, {

3611

'url': 'https://www.youtube.com/watch?t=2372',

3612

'only_matching': True,

3613

}]

3614

3615

def _real_extract(self, url):

3616

raise ExtractorError(

3617

'Did you forget to quote the URL? Remember that & is a meta '

3618

'character in most shells, so you want to put the URL in quotes, '

3619

'like youtube-dl '

3620

'"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '

3621

' or simply youtube-dl BaW_jenozKc .',

expected=True)

class YoutubeTruncatedIDIE(InfoExtractor):

3626

IE_NAME = 'youtube:truncated_id'

3627

IE_DESC = False # Do not list

3628

_VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'

3629

3630

_TESTS = [{

3631

'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',

3632

'only_matching': True,

3633

}]

3634

3635

def _real_extract(self, url):

3636

video_id = self._match_id(url)

3637

raise ExtractorError(

3638

'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),

3639

expected=True)