]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
Merge remote-tracking branch 'd912e3/golem'
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 'Christian Albrecht',
20 'Dave Vasilevsky',
21 'Jaime Marquínez Ferrándiz',
22 'Jeff Crouse',
23 'Osama Khalid',
24 'Michael Walter',
25 'M. Yasoob Ullah Khalid',
26 'Julien Fraichard',
27 'Johny Mo Swag',
28 'Axel Noack',
29 'Albert Kim',
30 'Pierre Rudloff',
31 'Huarong Huo',
32 'Ismael Mejía',
33 'Steffan \'Ruirize\' James',
34 'Andras Elso',
35 'Jelle van der Waa',
36 'Marcin Cieślak',
37 'Anton Larionov',
38 'Takuya Tsuchida',
39 'Sergey M.',
40 'Michael Orlitzky',
41 'Chris Gahan',
42 'Saimadhav Heblikar',
43 'Mike Col',
44 'Oleg Prutz',
45 'pulpe',
46 'Andreas Schmitz',
47 'Michael Kaiser',
48 'Niklas Laxström',
49 'David Triendl',
50 'Anthony Weems',
51 'David Wagner',
52 'Juan C. Olivares',
53 'Mattias Harrysson',
54 'phaer',
55 'Sainyam Kapoor',
56 'Nicolas Évrard',
57 'Jason Normore',
58 'Hoje Lee',
59 'Adam Thalhammer',
60 'Georg Jähnig',
61 'Ralf Haring',
62 'Koki Takahashi',
63 'Ariset Llerena',
64 'Adam Malcontenti-Wilson',
65 'Tobias Bell',
66 'Naglis Jonaitis',
67 'Charles Chen',
68 'Hassaan Ali',
69 'Dobrosław Żybort',
70 'David Fabijan',
71 'Sebastian Haas',
72 'Alexander Kirk',
73 'Erik Johnson',
74 'Keith Beckman',
75 'Ole Ernst',
76 'Aaron McDaniel (mcd1992)',
77 'Magnus Kolstad',
78 'Hari Padmanaban',
79 'Carlos Ramos',
80 '5moufl',
81 'lenaten',
82 )
83
84 __license__ = 'Public Domain'
85
86 import codecs
87 import io
88 import os
89 import random
90 import sys
91
92
93 from .options import (
94 parseOpts,
95 )
96 from .utils import (
97 compat_getpass,
98 compat_print,
99 DateRange,
100 DEFAULT_OUTTMPL,
101 decodeOption,
102 DownloadError,
103 MaxDownloadsReached,
104 preferredencoding,
105 read_batch_urls,
106 SameFileError,
107 setproctitle,
108 std_headers,
109 write_string,
110 )
111 from .update import update_self
112 from .downloader import (
113 FileDownloader,
114 )
115 from .extractor import gen_extractors
116 from .YoutubeDL import YoutubeDL
117 from .postprocessor import (
118 AtomicParsleyPP,
119 FFmpegAudioFixPP,
120 FFmpegMetadataPP,
121 FFmpegVideoConvertor,
122 FFmpegExtractAudioPP,
123 FFmpegEmbedSubtitlePP,
124 XAttrMetadataPP,
125 ExecAfterDownloadPP,
126 )
127
128
129 def _real_main(argv=None):
130 # Compatibility fixes for Windows
131 if sys.platform == 'win32':
132 # https://github.com/rg3/youtube-dl/issues/820
133 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
134
135 setproctitle(u'youtube-dl')
136
137 parser, opts, args = parseOpts(argv)
138
139 # Set user agent
140 if opts.user_agent is not None:
141 std_headers['User-Agent'] = opts.user_agent
142
143 # Set referer
144 if opts.referer is not None:
145 std_headers['Referer'] = opts.referer
146
147 # Custom HTTP headers
148 if opts.headers is not None:
149 for h in opts.headers:
150 if h.find(':', 1) < 0:
151 parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
152 key, value = h.split(':', 2)
153 if opts.verbose:
154 write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
155 std_headers[key] = value
156
157 # Dump user agent
158 if opts.dump_user_agent:
159 compat_print(std_headers['User-Agent'])
160 sys.exit(0)
161
162 # Batch file verification
163 batch_urls = []
164 if opts.batchfile is not None:
165 try:
166 if opts.batchfile == '-':
167 batchfd = sys.stdin
168 else:
169 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
170 batch_urls = read_batch_urls(batchfd)
171 if opts.verbose:
172 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
173 except IOError:
174 sys.exit(u'ERROR: batch file could not be read')
175 all_urls = batch_urls + args
176 all_urls = [url.strip() for url in all_urls]
177 _enc = preferredencoding()
178 all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
179
180 extractors = gen_extractors()
181
182 if opts.list_extractors:
183 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
184 compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
185 matchedUrls = [url for url in all_urls if ie.suitable(url)]
186 for mu in matchedUrls:
187 compat_print(u' ' + mu)
188 sys.exit(0)
189 if opts.list_extractor_descriptions:
190 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
191 if not ie._WORKING:
192 continue
193 desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
194 if desc is False:
195 continue
196 if hasattr(ie, 'SEARCH_KEY'):
197 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
198 _COUNTS = (u'', u'5', u'10', u'all')
199 desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
200 compat_print(desc)
201 sys.exit(0)
202
203
204 # Conflicting, missing and erroneous options
205 if opts.usenetrc and (opts.username is not None or opts.password is not None):
206 parser.error(u'using .netrc conflicts with giving username/password')
207 if opts.password is not None and opts.username is None:
208 parser.error(u'account username missing\n')
209 if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
210 parser.error(u'using output template conflicts with using title, video ID or auto number')
211 if opts.usetitle and opts.useid:
212 parser.error(u'using title conflicts with using video ID')
213 if opts.username is not None and opts.password is None:
214 opts.password = compat_getpass(u'Type account password and press [Return]: ')
215 if opts.ratelimit is not None:
216 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
217 if numeric_limit is None:
218 parser.error(u'invalid rate limit specified')
219 opts.ratelimit = numeric_limit
220 if opts.min_filesize is not None:
221 numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
222 if numeric_limit is None:
223 parser.error(u'invalid min_filesize specified')
224 opts.min_filesize = numeric_limit
225 if opts.max_filesize is not None:
226 numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
227 if numeric_limit is None:
228 parser.error(u'invalid max_filesize specified')
229 opts.max_filesize = numeric_limit
230 if opts.retries is not None:
231 try:
232 opts.retries = int(opts.retries)
233 except (TypeError, ValueError):
234 parser.error(u'invalid retry count specified')
235 if opts.buffersize is not None:
236 numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
237 if numeric_buffersize is None:
238 parser.error(u'invalid buffer size specified')
239 opts.buffersize = numeric_buffersize
240 if opts.playliststart <= 0:
241 raise ValueError(u'Playlist start must be positive')
242 if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
243 raise ValueError(u'Playlist end must be greater than playlist start')
244 if opts.extractaudio:
245 if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
246 parser.error(u'invalid audio format specified')
247 if opts.audioquality:
248 opts.audioquality = opts.audioquality.strip('k').strip('K')
249 if not opts.audioquality.isdigit():
250 parser.error(u'invalid audio quality specified')
251 if opts.recodevideo is not None:
252 if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
253 parser.error(u'invalid video recode format specified')
254 if opts.date is not None:
255 date = DateRange.day(opts.date)
256 else:
257 date = DateRange(opts.dateafter, opts.datebefore)
258 if opts.default_search not in ('auto', 'auto_warning', 'error', 'fixup_error', None) and ':' not in opts.default_search:
259 parser.error(u'--default-search invalid; did you forget a colon (:) at the end?')
260
261 # Do not download videos when there are audio-only formats
262 if opts.extractaudio and not opts.keepvideo and opts.format is None:
263 opts.format = 'bestaudio/best'
264
265 # --all-sub automatically sets --write-sub if --write-auto-sub is not given
266 # this was the old behaviour if only --all-sub was given.
267 if opts.allsubtitles and (opts.writeautomaticsub == False):
268 opts.writesubtitles = True
269
270 if sys.version_info < (3,):
271 # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
272 if opts.outtmpl is not None:
273 opts.outtmpl = opts.outtmpl.decode(preferredencoding())
274 outtmpl =((opts.outtmpl is not None and opts.outtmpl)
275 or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
276 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
277 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
278 or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
279 or (opts.useid and u'%(id)s.%(ext)s')
280 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
281 or DEFAULT_OUTTMPL)
282 if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
283 parser.error(u'Cannot download a video and extract audio into the same'
284 u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
285 u' template'.format(outtmpl))
286
287 any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson
288 download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
289
290 ydl_opts = {
291 'usenetrc': opts.usenetrc,
292 'username': opts.username,
293 'password': opts.password,
294 'twofactor': opts.twofactor,
295 'videopassword': opts.videopassword,
296 'quiet': (opts.quiet or any_printing),
297 'no_warnings': opts.no_warnings,
298 'forceurl': opts.geturl,
299 'forcetitle': opts.gettitle,
300 'forceid': opts.getid,
301 'forcethumbnail': opts.getthumbnail,
302 'forcedescription': opts.getdescription,
303 'forceduration': opts.getduration,
304 'forcefilename': opts.getfilename,
305 'forceformat': opts.getformat,
306 'forcejson': opts.dumpjson,
307 'simulate': opts.simulate,
308 'skip_download': (opts.skip_download or opts.simulate or any_printing),
309 'format': opts.format,
310 'format_limit': opts.format_limit,
311 'listformats': opts.listformats,
312 'outtmpl': outtmpl,
313 'autonumber_size': opts.autonumber_size,
314 'restrictfilenames': opts.restrictfilenames,
315 'ignoreerrors': opts.ignoreerrors,
316 'ratelimit': opts.ratelimit,
317 'nooverwrites': opts.nooverwrites,
318 'retries': opts.retries,
319 'buffersize': opts.buffersize,
320 'noresizebuffer': opts.noresizebuffer,
321 'continuedl': opts.continue_dl,
322 'noprogress': opts.noprogress,
323 'progress_with_newline': opts.progress_with_newline,
324 'playliststart': opts.playliststart,
325 'playlistend': opts.playlistend,
326 'noplaylist': opts.noplaylist,
327 'logtostderr': opts.outtmpl == '-',
328 'consoletitle': opts.consoletitle,
329 'nopart': opts.nopart,
330 'updatetime': opts.updatetime,
331 'writedescription': opts.writedescription,
332 'writeannotations': opts.writeannotations,
333 'writeinfojson': opts.writeinfojson,
334 'writethumbnail': opts.writethumbnail,
335 'writesubtitles': opts.writesubtitles,
336 'writeautomaticsub': opts.writeautomaticsub,
337 'allsubtitles': opts.allsubtitles,
338 'listsubtitles': opts.listsubtitles,
339 'subtitlesformat': opts.subtitlesformat,
340 'subtitleslangs': opts.subtitleslangs,
341 'matchtitle': decodeOption(opts.matchtitle),
342 'rejecttitle': decodeOption(opts.rejecttitle),
343 'max_downloads': opts.max_downloads,
344 'prefer_free_formats': opts.prefer_free_formats,
345 'verbose': opts.verbose,
346 'dump_intermediate_pages': opts.dump_intermediate_pages,
347 'write_pages': opts.write_pages,
348 'test': opts.test,
349 'keepvideo': opts.keepvideo,
350 'min_filesize': opts.min_filesize,
351 'max_filesize': opts.max_filesize,
352 'min_views': opts.min_views,
353 'max_views': opts.max_views,
354 'daterange': date,
355 'cachedir': opts.cachedir,
356 'youtube_print_sig_code': opts.youtube_print_sig_code,
357 'age_limit': opts.age_limit,
358 'download_archive': download_archive_fn,
359 'cookiefile': opts.cookiefile,
360 'nocheckcertificate': opts.no_check_certificate,
361 'prefer_insecure': opts.prefer_insecure,
362 'proxy': opts.proxy,
363 'socket_timeout': opts.socket_timeout,
364 'bidi_workaround': opts.bidi_workaround,
365 'debug_printtraffic': opts.debug_printtraffic,
366 'prefer_ffmpeg': opts.prefer_ffmpeg,
367 'include_ads': opts.include_ads,
368 'default_search': opts.default_search,
369 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
370 'encoding': opts.encoding,
371 'exec_cmd': opts.exec_cmd,
372 }
373
374 with YoutubeDL(ydl_opts) as ydl:
375 ydl.print_debug_header()
376 ydl.add_default_info_extractors()
377
378 # PostProcessors
379 # Add the metadata pp first, the other pps will copy it
380 if opts.addmetadata:
381 ydl.add_post_processor(FFmpegMetadataPP())
382 if opts.extractaudio:
383 ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
384 if opts.recodevideo:
385 ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
386 if opts.embedsubtitles:
387 ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
388 if opts.xattrs:
389 ydl.add_post_processor(XAttrMetadataPP())
390 if opts.embedthumbnail:
391 if not opts.addmetadata:
392 ydl.add_post_processor(FFmpegAudioFixPP())
393 ydl.add_post_processor(AtomicParsleyPP())
394
395
396 # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
397 # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
398 if opts.exec_cmd:
399 ydl.add_post_processor(ExecAfterDownloadPP(
400 verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
401
402 # Update version
403 if opts.update_self:
404 update_self(ydl.to_screen, opts.verbose)
405
406 # Remove cache dir
407 if opts.rm_cachedir:
408 ydl.cache.remove()
409
410 # Maybe do nothing
411 if (len(all_urls) < 1) and (opts.load_info_filename is None):
412 if not (opts.update_self or opts.rm_cachedir):
413 parser.error(u'you must provide at least one URL')
414 else:
415 sys.exit()
416
417 try:
418 if opts.load_info_filename is not None:
419 retcode = ydl.download_with_info_file(opts.load_info_filename)
420 else:
421 retcode = ydl.download(all_urls)
422 except MaxDownloadsReached:
423 ydl.to_screen(u'--max-download limit reached, aborting.')
424 retcode = 101
425
426 sys.exit(retcode)
427
428
429 def main(argv=None):
430 try:
431 _real_main(argv)
432 except DownloadError:
433 sys.exit(1)
434 except SameFileError:
435 sys.exit(u'ERROR: fixed output name but more than one file to download')
436 except KeyboardInterrupt:
437 sys.exit(u'\nERROR: Interrupted by user')