jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import time
	24	import tokenize
	25	import traceback
	26	import random
	27
	28	from string import ascii_letters
	29	from zipimport import zipimporter
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_kwargs,
	36	compat_numeric_types,
	37	compat_os_name,
	38	compat_str,
	39	compat_tokenize_tokenize,
	40	compat_urllib_error,
	41	compat_urllib_request,
	42	compat_urllib_request_DataHandler,
	43	)
	44	from .utils import (
	45	age_restricted,
	46	args_to_str,
	47	ContentTooShortError,
	48	date_from_str,
	49	DateRange,
	50	DEFAULT_OUTTMPL,
	51	determine_ext,
	52	determine_protocol,
	53	DOT_DESKTOP_LINK_TEMPLATE,
	54	DOT_URL_LINK_TEMPLATE,
	55	DOT_WEBLOC_LINK_TEMPLATE,
	56	DownloadError,
	57	encode_compat_str,
	58	encodeFilename,
	59	EntryNotInPlaylist,
	60	error_to_compat_str,
	61	ExistingVideoReached,
	62	expand_path,
	63	ExtractorError,
	64	float_or_none,
	65	format_bytes,
	66	format_field,
	67	FORMAT_RE,
	68	formatSeconds,
	69	GeoRestrictedError,
	70	int_or_none,
	71	iri_to_uri,
	72	ISO3166Utils,
	73	LazyList,
	74	locked_file,
	75	make_dir,
	76	make_HTTPS_handler,
	77	MaxDownloadsReached,
	78	network_exceptions,
	79	orderedSet,
	80	OUTTMPL_TYPES,
	81	PagedList,
	82	parse_filesize,
	83	PerRequestProxyHandler,
	84	platform_name,
	85	PostProcessingError,
	86	preferredencoding,
	87	prepend_extension,
	88	process_communicate_or_kill,
	89	random_uuidv4,
	90	register_socks_protocols,
	91	RejectedVideoReached,
	92	render_table,
	93	replace_extension,
	94	SameFileError,
	95	sanitize_filename,
	96	sanitize_path,
	97	sanitize_url,
	98	sanitized_Request,
	99	std_headers,
	100	str_or_none,
	101	strftime_or_none,
	102	subtitles_filename,
	103	to_high_limit_path,
	104	traverse_dict,
	105	UnavailableVideoError,
	106	url_basename,
	107	version_tuple,
	108	write_json_file,
	109	write_string,
	110	YoutubeDLCookieJar,
	111	YoutubeDLCookieProcessor,
	112	YoutubeDLHandler,
	113	YoutubeDLRedirectHandler,
	114	)
	115	from .cache import Cache
	116	from .extractor import (
	117	gen_extractor_classes,
	118	get_info_extractor,
	119	_LAZY_LOADER,
	120	_PLUGIN_CLASSES
	121	)
	122	from .extractor.openload import PhantomJSwrapper
	123	from .downloader import (
	124	get_suitable_downloader,
	125	shorten_protocol_name
	126	)
	127	from .downloader.rtmp import rtmpdump_version
	128	from .postprocessor import (
	129	FFmpegFixupM3u8PP,
	130	FFmpegFixupM4aPP,
	131	FFmpegFixupStretchedPP,
	132	FFmpegMergerPP,
	133	FFmpegPostProcessor,
	134	# FFmpegSubtitlesConvertorPP,
	135	get_postprocessor,
	136	MoveFilesAfterDownloadPP,
	137	)
	138	from .version import __version__
	139
	140	if compat_os_name == 'nt':
	141	import ctypes
	142
	143
	144	class YoutubeDL(object):
	145	"""YoutubeDL class.
	146
	147	YoutubeDL objects are the ones responsible of downloading the
	148	actual video file and writing it to disk if the user has requested
	149	it, among some other tasks. In most cases there should be one per
	150	program. As, given a video URL, the downloader doesn't know how to
	151	extract all the needed information, task that InfoExtractors do, it
	152	has to pass the URL to one of them.
	153
	154	For this, YoutubeDL objects have a method that allows
	155	InfoExtractors to be registered in a given order. When it is passed
	156	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	157	finds that reports being able to handle it. The InfoExtractor extracts
	158	all the information about the video or videos the URL refers to, and
	159	YoutubeDL process the extracted information, possibly using a File
	160	Downloader to download the video.
	161
	162	YoutubeDL objects accept a lot of parameters. In order not to saturate
	163	the object constructor with arguments, it receives a dictionary of
	164	options instead. These options are available through the params
	165	attribute for the InfoExtractors to use. The YoutubeDL also
	166	registers itself as the downloader in charge for the InfoExtractors
	167	that are added to it, so this is a "mutual registration".
	168
	169	Available options:
	170
	171	username: Username for authentication purposes.
	172	password: Password for authentication purposes.
	173	videopassword: Password for accessing a video.
	174	ap_mso: Adobe Pass multiple-system operator identifier.
	175	ap_username: Multiple-system operator account username.
	176	ap_password: Multiple-system operator account password.
	177	usenetrc: Use netrc for authentication instead.
	178	verbose: Print additional info to stdout.
	179	quiet: Do not print messages to stdout.
	180	no_warnings: Do not print out anything for warnings.
	181	forceprint: A list of templates to force print
	182	forceurl: Force printing final URL. (Deprecated)
	183	forcetitle: Force printing title. (Deprecated)
	184	forceid: Force printing ID. (Deprecated)
	185	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	186	forcedescription: Force printing description. (Deprecated)
	187	forcefilename: Force printing final filename. (Deprecated)
	188	forceduration: Force printing duration. (Deprecated)
	189	forcejson: Force printing info_dict as JSON.
	190	dump_single_json: Force printing the info_dict of the whole playlist
	191	(or video) as a single JSON line.
	192	force_write_download_archive: Force writing download archive regardless
	193	of 'skip_download' or 'simulate'.
	194	simulate: Do not download the video files.
	195	format: Video format code. see "FORMAT SELECTION" for more details.
	196	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	197	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	198	extracting metadata even if the video is not actually
	199	available for download (experimental)
	200	format_sort: How to sort the video formats. see "Sorting Formats"
	201	for more details.
	202	format_sort_force: Force the given format_sort. see "Sorting Formats"
	203	for more details.
	204	allow_multiple_video_streams: Allow multiple video streams to be merged
	205	into a single file
	206	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	207	into a single file
	208	paths: Dictionary of output paths. The allowed keys are 'home'
	209	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	210	outtmpl: Dictionary of templates for output names. Allowed keys
	211	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	212	A string a also accepted for backward compatibility
	213	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	214	restrictfilenames: Do not allow "&" and spaces in file names
	215	trim_file_name: Limit length of filename (extension excluded)
	216	windowsfilenames: Force the filenames to be windows compatible
	217	ignoreerrors: Do not stop on download errors
	218	(Default True when running yt-dlp,
	219	but False when directly accessing YoutubeDL class)
	220	skip_playlist_after_errors: Number of allowed failures until the rest of
	221	the playlist is skipped
	222	force_generic_extractor: Force downloader to use the generic extractor
	223	overwrites: Overwrite all video and metadata files if True,
	224	overwrite only non-video files if None
	225	and don't overwrite any file if False
	226	playliststart: Playlist item to start at.
	227	playlistend: Playlist item to end at.
	228	playlist_items: Specific indices of playlist to download.
	229	playlistreverse: Download playlist items in reverse order.
	230	playlistrandom: Download playlist items in random order.
	231	matchtitle: Download only matching titles.
	232	rejecttitle: Reject downloads for matching titles.
	233	logger: Log messages to a logging.Logger instance.
	234	logtostderr: Log messages to stderr instead of stdout.
	235	writedescription: Write the video description to a .description file
	236	writeinfojson: Write the video description to a .info.json file
	237	clean_infojson: Remove private fields from the infojson
	238	writecomments: Extract video comments. This will not be written to disk
	239	unless writeinfojson is also given
	240	writeannotations: Write the video annotations to a .annotations.xml file
	241	writethumbnail: Write the thumbnail image to a file
	242	allow_playlist_files: Whether to write playlists' description, infojson etc
	243	also to disk when using the 'write*' options
	244	write_all_thumbnails: Write all thumbnail formats to files
	245	writelink: Write an internet shortcut file, depending on the
	246	current platform (.url/.webloc/.desktop)
	247	writeurllink: Write a Windows internet shortcut file (.url)
	248	writewebloclink: Write a macOS internet shortcut file (.webloc)
	249	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	250	writesubtitles: Write the video subtitles to a file
	251	writeautomaticsub: Write the automatically generated subtitles to a file
	252	allsubtitles: Deprecated - Use subtitlelangs = ['all']
	253	Downloads all the subtitles of the video
	254	(requires writesubtitles or writeautomaticsub)
	255	listsubtitles: Lists all available subtitles for the video
	256	subtitlesformat: The format code for subtitles
	257	subtitleslangs: List of languages of the subtitles to download (can be regex).
	258	The list may contain "all" to refer to all the available
	259	subtitles. The language can be prefixed with a "-" to
	260	exclude it from the requested languages. Eg: ['all', '-live_chat']
	261	keepvideo: Keep the video file after post-processing
	262	daterange: A DateRange object, download only if the upload_date is in the range.
	263	skip_download: Skip the actual download of the video file
	264	cachedir: Location of the cache files in the filesystem.
	265	False to disable filesystem cache.
	266	noplaylist: Download single video instead of a playlist if in doubt.
	267	age_limit: An integer representing the user's age in years.
	268	Unsuitable videos for the given age are skipped.
	269	min_views: An integer representing the minimum view count the video
	270	must have in order to not be skipped.
	271	Videos without view count information are always
	272	downloaded. None for no limit.
	273	max_views: An integer representing the maximum view count.
	274	Videos that are more popular than that are not
	275	downloaded.
	276	Videos without view count information are always
	277	downloaded. None for no limit.
	278	download_archive: File name of a file where all downloads are recorded.
	279	Videos already present in the file are not downloaded
	280	again.
	281	break_on_existing: Stop the download process after attempting to download a
	282	file that is in the archive.
	283	break_on_reject: Stop the download process when encountering a video that
	284	has been filtered out.
	285	cookiefile: File name where cookies should be read from and dumped to
	286	nocheckcertificate:Do not verify SSL certificates
	287	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	288	At the moment, this is only supported by YouTube.
	289	proxy: URL of the proxy server to use
	290	geo_verification_proxy: URL of the proxy to use for IP address verification
	291	on geo-restricted sites.
	292	socket_timeout: Time to wait for unresponsive hosts, in seconds
	293	bidi_workaround: Work around buggy terminals without bidirectional text
	294	support, using fridibi
	295	debug_printtraffic:Print out sent and received HTTP traffic
	296	include_ads: Download ads as well
	297	default_search: Prepend this string if an input url is not valid.
	298	'auto' for elaborate guessing
	299	encoding: Use this encoding instead of the system-specified.
	300	extract_flat: Do not resolve URLs, return the immediate result.
	301	Pass in 'in_playlist' to only show this behavior for
	302	playlist items.
	303	postprocessors: A list of dictionaries, each with an entry
	304	* key: The name of the postprocessor. See
	305	yt_dlp/postprocessor/__init__.py for a list.
	306	* when: When to run the postprocessor. Can be one of
	307	pre_process\|before_dl\|post_process\|after_move.
	308	Assumed to be 'post_process' if not given
	309	post_hooks: A list of functions that get called as the final step
	310	for each video file, after all postprocessors have been
	311	called. The filename will be passed as the only argument.
	312	progress_hooks: A list of functions that get called on download
	313	progress, with a dictionary with the entries
	314	* status: One of "downloading", "error", or "finished".
	315	Check this first and ignore unknown values.
	316
	317	If status is one of "downloading", or "finished", the
	318	following properties may also be present:
	319	* filename: The final filename (always present)
	320	* tmpfilename: The filename we're currently writing to
	321	* downloaded_bytes: Bytes on disk
	322	* total_bytes: Size of the whole file, None if unknown
	323	* total_bytes_estimate: Guess of the eventual file size,
	324	None if unavailable.
	325	* elapsed: The number of seconds since download started.
	326	* eta: The estimated time in seconds, None if unknown
	327	* speed: The download speed in bytes/second, None if
	328	unknown
	329	* fragment_index: The counter of the currently
	330	downloaded video fragment.
	331	* fragment_count: The number of fragments (= individual
	332	files that will be merged)
	333
	334	Progress hooks are guaranteed to be called at least once
	335	(with status "finished") if the download is successful.
	336	merge_output_format: Extension to use when merging formats.
	337	final_ext: Expected final extension; used to detect when the file was
	338	already downloaded and converted. "merge_output_format" is
	339	replaced by this extension when given
	340	fixup: Automatically correct known faults of the file.
	341	One of:
	342	- "never": do nothing
	343	- "warn": only emit a warning
	344	- "detect_or_warn": check whether we can do anything
	345	about it, warn otherwise (default)
	346	source_address: Client-side IP address to bind to.
	347	call_home: Boolean, true iff we are allowed to contact the
	348	yt-dlp servers for debugging. (BROKEN)
	349	sleep_interval_requests: Number of seconds to sleep between requests
	350	during extraction
	351	sleep_interval: Number of seconds to sleep before each download when
	352	used alone or a lower bound of a range for randomized
	353	sleep before each download (minimum possible number
	354	of seconds to sleep) when used along with
	355	max_sleep_interval.
	356	max_sleep_interval:Upper bound of a range for randomized sleep before each
	357	download (maximum possible number of seconds to sleep).
	358	Must only be used along with sleep_interval.
	359	Actual sleep time will be a random float from range
	360	[sleep_interval; max_sleep_interval].
	361	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	362	listformats: Print an overview of available video formats and exit.
	363	list_thumbnails: Print a table of all thumbnails and exit.
	364	match_filter: A function that gets called with the info_dict of
	365	every video.
	366	If it returns a message, the video is ignored.
	367	If it returns None, the video is downloaded.
	368	match_filter_func in utils.py is one example for this.
	369	no_color: Do not emit color codes in output.
	370	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	371	HTTP header
	372	geo_bypass_country:
	373	Two-letter ISO 3166-2 country code that will be used for
	374	explicit geographic restriction bypassing via faking
	375	X-Forwarded-For HTTP header
	376	geo_bypass_ip_block:
	377	IP range in CIDR notation that will be used similarly to
	378	geo_bypass_country
	379
	380	The following options determine which downloader is picked:
	381	external_downloader: A dictionary of protocol keys and the executable of the
	382	external downloader to use for it. The allowed protocols
	383	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	384	Set the value to 'native' to use the native downloader
	385	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	386	or {'m3u8': 'ffmpeg'} instead.
	387	Use the native HLS downloader instead of ffmpeg/avconv
	388	if True, otherwise use ffmpeg/avconv if False, otherwise
	389	use downloader suggested by extractor if None.
	390	compat_opts: Compatibility options. See "Differences in default behavior".
	391	Note that only format-sort, format-spec, no-live-chat,
	392	no-attach-info-json, playlist-index, list-formats,
	393	no-direct-merge, no-youtube-channel-redirect,
	394	and no-youtube-unavailable-videos works when used via the API
	395
	396	The following parameters are not used by YoutubeDL itself, they are used by
	397	the downloader (see yt_dlp/downloader/common.py):
	398	nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
	399	noresizebuffer, retries, continuedl, noprogress, consoletitle,
	400	xattr_set_filesize, external_downloader_args, hls_use_mpegts,
	401	http_chunk_size.
	402
	403	The following options are used by the post processors:
	404	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	405	otherwise prefer ffmpeg. (avconv support is deprecated)
	406	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	407	to the binary or its containing directory.
	408	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	409	and a list of additional command-line arguments for the
	410	postprocessor/executable. The dict can also have "PP+EXE" keys
	411	which are used when the given exe is used by the given PP.
	412	Use 'default' as the name for arguments to passed to all PP
	413
	414	The following options are used by the extractors:
	415	extractor_retries: Number of times to retry for known errors
	416	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	417	hls_split_discontinuity: Split HLS playlists to different formats at
	418	discontinuities such as ad breaks (default: False)
	419	youtube_include_dash_manifest: If True (default), DASH manifests and related
	420	data will be downloaded and processed by extractor.
	421	You can reduce network I/O by disabling it if you don't
	422	care about DASH. (only for youtube)
	423	youtube_include_hls_manifest: If True (default), HLS manifests and related
	424	data will be downloaded and processed by extractor.
	425	You can reduce network I/O by disabling it if you don't
	426	care about HLS. (only for youtube)
	427	"""
	428
	429	_NUMERIC_FIELDS = set((
	430	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	431	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	432	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	433	'average_rating', 'comment_count', 'age_limit',
	434	'start_time', 'end_time',
	435	'chapter_number', 'season_number', 'episode_number',
	436	'track_number', 'disc_number', 'release_year',
	437	'playlist_index',
	438	))
	439
	440	params = None
	441	_ies = []
	442	_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	443	__prepare_filename_warned = False
	444	_first_webpage_request = True
	445	_download_retcode = None
	446	_num_downloads = None
	447	_playlist_level = 0
	448	_playlist_urls = set()
	449	_screen_file = None
	450
	451	def __init__(self, params=None, auto_init=True):
	452	"""Create a FileDownloader object with the given options."""
	453	if params is None:
	454	params = {}
	455	self._ies = []
	456	self._ies_instances = {}
	457	self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	458	self.__prepare_filename_warned = False
	459	self._first_webpage_request = True
	460	self._post_hooks = []
	461	self._progress_hooks = []
	462	self._download_retcode = 0
	463	self._num_downloads = 0
	464	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	465	self._err_file = sys.stderr
	466	self.params = {
	467	# Default parameters
	468	'nocheckcertificate': False,
	469	}
	470	self.params.update(params)
	471	self.cache = Cache(self)
	472
	473	if sys.version_info < (3, 6):
	474	self.report_warning(
	475	'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
	476	'Update to Python 3.6 or above' % sys.version_info[:2])
	477
	478	def check_deprecated(param, option, suggestion):
	479	if self.params.get(param) is not None:
	480	self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
	481	return True
	482	return False
	483
	484	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	485	if self.params.get('geo_verification_proxy') is None:
	486	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	487
	488	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	489	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	490	check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
	491
	492	for msg in self.params.get('warnings', []):
	493	self.report_warning(msg)
	494
	495	if self.params.get('final_ext'):
	496	if self.params.get('merge_output_format'):
	497	self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
	498	self.params['merge_output_format'] = self.params['final_ext']
	499
	500	if 'overwrites' in self.params and self.params['overwrites'] is None:

1

#!/usr/bin/env python

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import time

import tokenize

import traceback

import random

from string import ascii_letters

29

from zipimport import zipimporter

30

31

from .compat import (

32

compat_basestring,

33

compat_cookiejar,

34

compat_get_terminal_size,

35

compat_kwargs,

36

compat_numeric_types,

37

compat_os_name,

38

compat_str,

39

compat_tokenize_tokenize,

40

compat_urllib_error,

41

compat_urllib_request,

42

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

54

DOT_URL_LINK_TEMPLATE,

55

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

FORMAT_RE,

formatSeconds,

GeoRestrictedError,

int_or_none,

iri_to_uri,

ISO3166Utils,

LazyList,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

network_exceptions,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

process_communicate_or_kill,

89

random_uuidv4,

90

register_socks_protocols,

91

RejectedVideoReached,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

to_high_limit_path,

traverse_dict,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

112

YoutubeDLHandler,

113

YoutubeDLRedirectHandler,

114

)

115

from .cache import Cache

116

from .extractor import (

117

gen_extractor_classes,

get_info_extractor,

_LAZY_LOADER,

_PLUGIN_CLASSES

)

from .extractor.openload import PhantomJSwrapper

123

from .downloader import (

124

get_suitable_downloader,

125

shorten_protocol_name

126

)

127

from .downloader.rtmp import rtmpdump_version

128

from .postprocessor import (

129

FFmpegFixupM3u8PP,

130

FFmpegFixupM4aPP,

131

FFmpegFixupStretchedPP,

132

FFmpegMergerPP,

133

FFmpegPostProcessor,

134

# FFmpegSubtitlesConvertorPP,

135

get_postprocessor,

136

MoveFilesAfterDownloadPP,

137

)

138

from .version import __version__

139

140

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

145

"""YoutubeDL class.

146

147

YoutubeDL objects are the ones responsible of downloading the

148

actual video file and writing it to disk if the user has requested

149

it, among some other tasks. In most cases there should be one per

150

program. As, given a video URL, the downloader doesn't know how to

151

extract all the needed information, task that InfoExtractors do, it

152

has to pass the URL to one of them.

153

154

For this, YoutubeDL objects have a method that allows

155

InfoExtractors to be registered in a given order. When it is passed

156

a URL, the YoutubeDL object handles it to the first InfoExtractor it

157

finds that reports being able to handle it. The InfoExtractor extracts

158

all the information about the video or videos the URL refers to, and

159

YoutubeDL process the extracted information, possibly using a File

160

Downloader to download the video.

161

162

YoutubeDL objects accept a lot of parameters. In order not to saturate

163

the object constructor with arguments, it receives a dictionary of

164

options instead. These options are available through the params

165

attribute for the InfoExtractors to use. The YoutubeDL also

166

registers itself as the downloader in charge for the InfoExtractors

167

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

172

password: Password for authentication purposes.

173

videopassword: Password for accessing a video.

174

ap_mso: Adobe Pass multiple-system operator identifier.

175

ap_username: Multiple-system operator account username.

176

ap_password: Multiple-system operator account password.

177

usenetrc: Use netrc for authentication instead.

178

verbose: Print additional info to stdout.

179

quiet: Do not print messages to stdout.

180

no_warnings: Do not print out anything for warnings.

181

forceprint: A list of templates to force print

182

forceurl: Force printing final URL. (Deprecated)

183

forcetitle: Force printing title. (Deprecated)

184

forceid: Force printing ID. (Deprecated)

185

forcethumbnail: Force printing thumbnail URL. (Deprecated)

186

forcedescription: Force printing description. (Deprecated)

187

forcefilename: Force printing final filename. (Deprecated)

188

forceduration: Force printing duration. (Deprecated)

189

forcejson: Force printing info_dict as JSON.

190

dump_single_json: Force printing the info_dict of the whole playlist

191

(or video) as a single JSON line.

192

force_write_download_archive: Force writing download archive regardless

193

of 'skip_download' or 'simulate'.

194

simulate: Do not download the video files.

195

format: Video format code. see "FORMAT SELECTION" for more details.

196

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

197

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

198

extracting metadata even if the video is not actually

199

available for download (experimental)

200

format_sort: How to sort the video formats. see "Sorting Formats"

201

for more details.

202

format_sort_force: Force the given format_sort. see "Sorting Formats"

203

for more details.

204

allow_multiple_video_streams: Allow multiple video streams to be merged

205

into a single file

206

allow_multiple_audio_streams: Allow multiple audio streams to be merged

207

into a single file

208

paths: Dictionary of output paths. The allowed keys are 'home'

209

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

210

outtmpl: Dictionary of templates for output names. Allowed keys

211

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

212

A string a also accepted for backward compatibility

213

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

214

restrictfilenames: Do not allow "&" and spaces in file names

215

trim_file_name: Limit length of filename (extension excluded)

216

windowsfilenames: Force the filenames to be windows compatible

217

ignoreerrors: Do not stop on download errors

218

(Default True when running yt-dlp,

219

but False when directly accessing YoutubeDL class)

220

skip_playlist_after_errors: Number of allowed failures until the rest of

221

the playlist is skipped

222

force_generic_extractor: Force downloader to use the generic extractor

223

overwrites: Overwrite all video and metadata files if True,

224

overwrite only non-video files if None

225

and don't overwrite any file if False

226

playliststart: Playlist item to start at.

227

playlistend: Playlist item to end at.

228

playlist_items: Specific indices of playlist to download.

229

playlistreverse: Download playlist items in reverse order.

230

playlistrandom: Download playlist items in random order.

231

matchtitle: Download only matching titles.

232

rejecttitle: Reject downloads for matching titles.

233

logger: Log messages to a logging.Logger instance.

234

logtostderr: Log messages to stderr instead of stdout.

235

writedescription: Write the video description to a .description file

236

writeinfojson: Write the video description to a .info.json file

237

clean_infojson: Remove private fields from the infojson

238

writecomments: Extract video comments. This will not be written to disk

239

unless writeinfojson is also given

240

writeannotations: Write the video annotations to a .annotations.xml file

241

writethumbnail: Write the thumbnail image to a file

242

allow_playlist_files: Whether to write playlists' description, infojson etc

243

also to disk when using the 'write*' options

244

write_all_thumbnails: Write all thumbnail formats to files

245

writelink: Write an internet shortcut file, depending on the

246

current platform (.url/.webloc/.desktop)

247

writeurllink: Write a Windows internet shortcut file (.url)

248

writewebloclink: Write a macOS internet shortcut file (.webloc)

249

writedesktoplink: Write a Linux internet shortcut file (.desktop)

250

writesubtitles: Write the video subtitles to a file

251

writeautomaticsub: Write the automatically generated subtitles to a file

252

allsubtitles: Deprecated - Use subtitlelangs = ['all']

253

Downloads all the subtitles of the video

254

(requires writesubtitles or writeautomaticsub)

255

listsubtitles: Lists all available subtitles for the video

256

subtitlesformat: The format code for subtitles

257

subtitleslangs: List of languages of the subtitles to download (can be regex).

258

The list may contain "all" to refer to all the available

259

subtitles. The language can be prefixed with a "-" to

260

exclude it from the requested languages. Eg: ['all', '-live_chat']

261

keepvideo: Keep the video file after post-processing

262

daterange: A DateRange object, download only if the upload_date is in the range.

263

skip_download: Skip the actual download of the video file

264

cachedir: Location of the cache files in the filesystem.

265

False to disable filesystem cache.

266

noplaylist: Download single video instead of a playlist if in doubt.

267

age_limit: An integer representing the user's age in years.

268

Unsuitable videos for the given age are skipped.

269

min_views: An integer representing the minimum view count the video

270

must have in order to not be skipped.

271

Videos without view count information are always

272

downloaded. None for no limit.

273

max_views: An integer representing the maximum view count.

274

Videos that are more popular than that are not

275

downloaded.

276

Videos without view count information are always

277

downloaded. None for no limit.

278

download_archive: File name of a file where all downloads are recorded.

279

Videos already present in the file are not downloaded

280

again.

281

break_on_existing: Stop the download process after attempting to download a

282

file that is in the archive.

283

break_on_reject: Stop the download process when encountering a video that

284

has been filtered out.

285

cookiefile: File name where cookies should be read from and dumped to

286

nocheckcertificate:Do not verify SSL certificates

287

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

288

At the moment, this is only supported by YouTube.

289

proxy: URL of the proxy server to use

290

geo_verification_proxy: URL of the proxy to use for IP address verification

291

on geo-restricted sites.

292

socket_timeout: Time to wait for unresponsive hosts, in seconds

293

bidi_workaround: Work around buggy terminals without bidirectional text

294

support, using fridibi

295

debug_printtraffic:Print out sent and received HTTP traffic

296

include_ads: Download ads as well

297

default_search: Prepend this string if an input url is not valid.

298

'auto' for elaborate guessing

299

encoding: Use this encoding instead of the system-specified.

300

extract_flat: Do not resolve URLs, return the immediate result.

301

Pass in 'in_playlist' to only show this behavior for

302

playlist items.

303

postprocessors: A list of dictionaries, each with an entry

304

* key: The name of the postprocessor. See

305

yt_dlp/postprocessor/__init__.py for a list.

306

* when: When to run the postprocessor. Can be one of

307

pre_process|before_dl|post_process|after_move.

308

Assumed to be 'post_process' if not given

309

post_hooks: A list of functions that get called as the final step

310

for each video file, after all postprocessors have been

311

called. The filename will be passed as the only argument.

312

progress_hooks: A list of functions that get called on download

313

progress, with a dictionary with the entries

314

* status: One of "downloading", "error", or "finished".

315

Check this first and ignore unknown values.

316

317

If status is one of "downloading", or "finished", the

318

following properties may also be present:

319

* filename: The final filename (always present)

320

* tmpfilename: The filename we're currently writing to

321

* downloaded_bytes: Bytes on disk

322

* total_bytes: Size of the whole file, None if unknown

323

* total_bytes_estimate: Guess of the eventual file size,

324

None if unavailable.

325

* elapsed: The number of seconds since download started.

326

* eta: The estimated time in seconds, None if unknown

327

* speed: The download speed in bytes/second, None if

328

unknown

329

* fragment_index: The counter of the currently

330

downloaded video fragment.

331

* fragment_count: The number of fragments (= individual

332

files that will be merged)

333

334

Progress hooks are guaranteed to be called at least once

335

(with status "finished") if the download is successful.

336

merge_output_format: Extension to use when merging formats.

337

final_ext: Expected final extension; used to detect when the file was

338

already downloaded and converted. "merge_output_format" is

339

replaced by this extension when given

340

fixup: Automatically correct known faults of the file.

341

One of:

342

- "never": do nothing

343

- "warn": only emit a warning

344

- "detect_or_warn": check whether we can do anything

345

about it, warn otherwise (default)

346

source_address: Client-side IP address to bind to.

347

call_home: Boolean, true iff we are allowed to contact the

348

yt-dlp servers for debugging. (BROKEN)

349

sleep_interval_requests: Number of seconds to sleep between requests

350

during extraction

351

sleep_interval: Number of seconds to sleep before each download when

352

used alone or a lower bound of a range for randomized

353

sleep before each download (minimum possible number

354

of seconds to sleep) when used along with

355

max_sleep_interval.

356

max_sleep_interval:Upper bound of a range for randomized sleep before each

357

download (maximum possible number of seconds to sleep).

358

Must only be used along with sleep_interval.

359

Actual sleep time will be a random float from range

360

[sleep_interval; max_sleep_interval].

361

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

362

listformats: Print an overview of available video formats and exit.

363

list_thumbnails: Print a table of all thumbnails and exit.

364

match_filter: A function that gets called with the info_dict of

365

every video.

366

If it returns a message, the video is ignored.

367

If it returns None, the video is downloaded.

368

match_filter_func in utils.py is one example for this.

369

no_color: Do not emit color codes in output.

370

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

371

HTTP header

372

geo_bypass_country:

373

Two-letter ISO 3166-2 country code that will be used for

374

explicit geographic restriction bypassing via faking

375

X-Forwarded-For HTTP header

376

geo_bypass_ip_block:

377

IP range in CIDR notation that will be used similarly to

378

geo_bypass_country

379

380

The following options determine which downloader is picked:

381

external_downloader: A dictionary of protocol keys and the executable of the

382

external downloader to use for it. The allowed protocols

383

384

Set the value to 'native' to use the native downloader

385

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

386

or {'m3u8': 'ffmpeg'} instead.

387

Use the native HLS downloader instead of ffmpeg/avconv

388

if True, otherwise use ffmpeg/avconv if False, otherwise

389

use downloader suggested by extractor if None.

390

compat_opts: Compatibility options. See "Differences in default behavior".

391

Note that only format-sort, format-spec, no-live-chat,

392

no-attach-info-json, playlist-index, list-formats,

393

no-direct-merge, no-youtube-channel-redirect,

394

and no-youtube-unavailable-videos works when used via the API

395

396

The following parameters are not used by YoutubeDL itself, they are used by

397

the downloader (see yt_dlp/downloader/common.py):

398

nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,

399

noresizebuffer, retries, continuedl, noprogress, consoletitle,

400

xattr_set_filesize, external_downloader_args, hls_use_mpegts,

401

http_chunk_size.

402

403

The following options are used by the post processors:

404

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

405

otherwise prefer ffmpeg. (avconv support is deprecated)

406

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

407

to the binary or its containing directory.

408

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

409

and a list of additional command-line arguments for the

410

postprocessor/executable. The dict can also have "PP+EXE" keys

411

which are used when the given exe is used by the given PP.

412

Use 'default' as the name for arguments to passed to all PP

413

414

The following options are used by the extractors:

415

extractor_retries: Number of times to retry for known errors

416

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

417

hls_split_discontinuity: Split HLS playlists to different formats at

418

discontinuities such as ad breaks (default: False)

419

youtube_include_dash_manifest: If True (default), DASH manifests and related

420

data will be downloaded and processed by extractor.

421

You can reduce network I/O by disabling it if you don't

422

care about DASH. (only for youtube)

423

youtube_include_hls_manifest: If True (default), HLS manifests and related

424

data will be downloaded and processed by extractor.

425

You can reduce network I/O by disabling it if you don't

426

care about HLS. (only for youtube)

427

"""

428

429

_NUMERIC_FIELDS = set((

430

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

431

'timestamp', 'upload_year', 'upload_month', 'upload_day',

432

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

433

'average_rating', 'comment_count', 'age_limit',

434

'start_time', 'end_time',

435

'chapter_number', 'season_number', 'episode_number',

436

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

443

__prepare_filename_warned = False

444

_first_webpage_request = True

445

_download_retcode = None

446

_num_downloads = None

447

_playlist_level = 0

448

_playlist_urls = set()

449

_screen_file = None

450

451

def __init__(self, params=None, auto_init=True):

452

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

457

self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

458

self.__prepare_filename_warned = False

459

self._first_webpage_request = True

460

self._post_hooks = []

461

self._progress_hooks = []

462

self._download_retcode = 0

463

self._num_downloads = 0

464

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

465

self._err_file = sys.stderr

466

self.params = {

467

# Default parameters

468

'nocheckcertificate': False,

469

}

470

self.params.update(params)

471

self.cache = Cache(self)

472

473

if sys.version_info < (3, 6):

474

self.report_warning(

475

'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '

476

'Update to Python 3.6 or above' % sys.version_info[:2])

477

478

def check_deprecated(param, option, suggestion):

479

if self.params.get(param) is not None:

480

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

485

if self.params.get('geo_verification_proxy') is None:

486

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

487

488

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

489

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

490

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

491

492

for msg in self.params.get('warnings', []):

493

self.report_warning(msg)

494

495

if self.params.get('final_ext'):

496

if self.params.get('merge_output_format'):

497

self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')

498

self.params['merge_output_format'] = self.params['final_ext']

499

500

if 'overwrites' in self.params and self.params['overwrites'] is None:

501

del self.params['overwrites']

502

503

if params.get('bidi_workaround', False):

504

try:

505

import pty

506

master, slave = pty.openpty()

507

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

512

sp_kwargs = dict(

513

stdin=subprocess.PIPE,

514

stdout=slave,

515

stderr=self._err_file)

516

try:

517

self._output_process = subprocess.Popen(

518

['bidiv'] + width_args, **sp_kwargs

519

)

520

except OSError:

521

self._output_process = subprocess.Popen(

522

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

523

self._output_channel = os.fdopen(master, 'rb')

524

except OSError as ose:

525

if ose.errno == errno.ENOENT:

526

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import time
	24	import tokenize
	25	import traceback
	26	import random
	27
	28	from string import ascii_letters
	29	from zipimport import zipimporter
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_kwargs,
	36	compat_numeric_types,
	37	compat_os_name,
	38	compat_str,
	39	compat_tokenize_tokenize,
	40	compat_urllib_error,