]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
Merge branch 'master' of github.com:rg3/youtube-dl
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 'Christian Albrecht',
20 'Dave Vasilevsky',
21 'Jaime Marquínez Ferrándiz',
22 'Jeff Crouse',
23 'Osama Khalid',
24 'Michael Walter',
25 'M. Yasoob Ullah Khalid',
26 'Julien Fraichard',
27 'Johny Mo Swag',
28 'Axel Noack',
29 'Albert Kim',
30 'Pierre Rudloff',
31 'Huarong Huo',
32 'Ismael Mejía',
33 'Steffan \'Ruirize\' James',
34 'Andras Elso',
35 'Jelle van der Waa',
36 'Marcin Cieślak',
37 'Anton Larionov',
38 'Takuya Tsuchida',
39 'Sergey M.',
40 'Michael Orlitzky',
41 'Chris Gahan',
42 'Saimadhav Heblikar',
43 'Mike Col',
44 'Oleg Prutz',
45 'pulpe',
46 'Andreas Schmitz',
47 'Michael Kaiser',
48 'Niklas Laxström',
49 'David Triendl',
50 'Anthony Weems',
51 'David Wagner',
52 'Juan C. Olivares',
53 'Mattias Harrysson',
54 'phaer',
55 'Sainyam Kapoor',
56 'Nicolas Évrard',
57 'Jason Normore',
58 'Hoje Lee',
59 'Adam Thalhammer',
60 'Georg Jähnig',
61 'Ralf Haring',
62 'Koki Takahashi',
63 'Ariset Llerena',
64 'Adam Malcontenti-Wilson',
65 'Tobias Bell',
66 'Naglis Jonaitis',
67 'Charles Chen',
68 'Hassaan Ali',
69 'Dobrosław Żybort',
70 'David Fabijan',
71 'Sebastian Haas',
72 'Alexander Kirk',
73 'Erik Johnson',
74 'Keith Beckman',
75 'Ole Ernst',
76 'Aaron McDaniel (mcd1992)',
77 'Magnus Kolstad',
78 'Hari Padmanaban',
79 'Carlos Ramos',
80 '5moufl',
81 'lenaten',
82 'Dennis Scheiba',
83 'Damon Timm',
84 'winwon',
85 'Xavier Beynon'
86 )
87
88 __license__ = 'Public Domain'
89
90 import codecs
91 import io
92 import os
93 import random
94 import sys
95
96
97 from .options import (
98 parseOpts,
99 )
100 from .utils import (
101 compat_getpass,
102 compat_print,
103 DateRange,
104 DEFAULT_OUTTMPL,
105 decodeOption,
106 DownloadError,
107 MaxDownloadsReached,
108 preferredencoding,
109 read_batch_urls,
110 SameFileError,
111 setproctitle,
112 std_headers,
113 write_string,
114 )
115 from .update import update_self
116 from .downloader import (
117 FileDownloader,
118 )
119 from .extractor import gen_extractors
120 from .YoutubeDL import YoutubeDL
121 from .postprocessor import (
122 AtomicParsleyPP,
123 FFmpegAudioFixPP,
124 FFmpegMetadataPP,
125 FFmpegVideoConvertor,
126 FFmpegExtractAudioPP,
127 FFmpegEmbedSubtitlePP,
128 XAttrMetadataPP,
129 ExecAfterDownloadPP,
130 )
131
132
133 def _real_main(argv=None):
134 # Compatibility fixes for Windows
135 if sys.platform == 'win32':
136 # https://github.com/rg3/youtube-dl/issues/820
137 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
138
139 setproctitle(u'youtube-dl')
140
141 parser, opts, args = parseOpts(argv)
142
143 # Set user agent
144 if opts.user_agent is not None:
145 std_headers['User-Agent'] = opts.user_agent
146
147 # Set referer
148 if opts.referer is not None:
149 std_headers['Referer'] = opts.referer
150
151 # Custom HTTP headers
152 if opts.headers is not None:
153 for h in opts.headers:
154 if h.find(':', 1) < 0:
155 parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
156 key, value = h.split(':', 2)
157 if opts.verbose:
158 write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
159 std_headers[key] = value
160
161 # Dump user agent
162 if opts.dump_user_agent:
163 compat_print(std_headers['User-Agent'])
164 sys.exit(0)
165
166 # Batch file verification
167 batch_urls = []
168 if opts.batchfile is not None:
169 try:
170 if opts.batchfile == '-':
171 batchfd = sys.stdin
172 else:
173 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
174 batch_urls = read_batch_urls(batchfd)
175 if opts.verbose:
176 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
177 except IOError:
178 sys.exit(u'ERROR: batch file could not be read')
179 all_urls = batch_urls + args
180 all_urls = [url.strip() for url in all_urls]
181 _enc = preferredencoding()
182 all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
183
184 extractors = gen_extractors()
185
186 if opts.list_extractors:
187 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
188 compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
189 matchedUrls = [url for url in all_urls if ie.suitable(url)]
190 for mu in matchedUrls:
191 compat_print(u' ' + mu)
192 sys.exit(0)
193 if opts.list_extractor_descriptions:
194 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
195 if not ie._WORKING:
196 continue
197 desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
198 if desc is False:
199 continue
200 if hasattr(ie, 'SEARCH_KEY'):
201 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
202 _COUNTS = (u'', u'5', u'10', u'all')
203 desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
204 compat_print(desc)
205 sys.exit(0)
206
207
208 # Conflicting, missing and erroneous options
209 if opts.usenetrc and (opts.username is not None or opts.password is not None):
210 parser.error(u'using .netrc conflicts with giving username/password')
211 if opts.password is not None and opts.username is None:
212 parser.error(u'account username missing\n')
213 if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
214 parser.error(u'using output template conflicts with using title, video ID or auto number')
215 if opts.usetitle and opts.useid:
216 parser.error(u'using title conflicts with using video ID')
217 if opts.username is not None and opts.password is None:
218 opts.password = compat_getpass(u'Type account password and press [Return]: ')
219 if opts.ratelimit is not None:
220 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
221 if numeric_limit is None:
222 parser.error(u'invalid rate limit specified')
223 opts.ratelimit = numeric_limit
224 if opts.min_filesize is not None:
225 numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
226 if numeric_limit is None:
227 parser.error(u'invalid min_filesize specified')
228 opts.min_filesize = numeric_limit
229 if opts.max_filesize is not None:
230 numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
231 if numeric_limit is None:
232 parser.error(u'invalid max_filesize specified')
233 opts.max_filesize = numeric_limit
234 if opts.retries is not None:
235 try:
236 opts.retries = int(opts.retries)
237 except (TypeError, ValueError):
238 parser.error(u'invalid retry count specified')
239 if opts.buffersize is not None:
240 numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
241 if numeric_buffersize is None:
242 parser.error(u'invalid buffer size specified')
243 opts.buffersize = numeric_buffersize
244 if opts.playliststart <= 0:
245 raise ValueError(u'Playlist start must be positive')
246 if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
247 raise ValueError(u'Playlist end must be greater than playlist start')
248 if opts.extractaudio:
249 if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
250 parser.error(u'invalid audio format specified')
251 if opts.audioquality:
252 opts.audioquality = opts.audioquality.strip('k').strip('K')
253 if not opts.audioquality.isdigit():
254 parser.error(u'invalid audio quality specified')
255 if opts.recodevideo is not None:
256 if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
257 parser.error(u'invalid video recode format specified')
258 if opts.date is not None:
259 date = DateRange.day(opts.date)
260 else:
261 date = DateRange(opts.dateafter, opts.datebefore)
262
263 # Do not download videos when there are audio-only formats
264 if opts.extractaudio and not opts.keepvideo and opts.format is None:
265 opts.format = 'bestaudio/best'
266
267 # --all-sub automatically sets --write-sub if --write-auto-sub is not given
268 # this was the old behaviour if only --all-sub was given.
269 if opts.allsubtitles and (opts.writeautomaticsub == False):
270 opts.writesubtitles = True
271
272 if sys.version_info < (3,):
273 # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
274 if opts.outtmpl is not None:
275 opts.outtmpl = opts.outtmpl.decode(preferredencoding())
276 outtmpl =((opts.outtmpl is not None and opts.outtmpl)
277 or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
278 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
279 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
280 or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
281 or (opts.useid and u'%(id)s.%(ext)s')
282 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
283 or DEFAULT_OUTTMPL)
284 if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
285 parser.error(u'Cannot download a video and extract audio into the same'
286 u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
287 u' template'.format(outtmpl))
288
289 any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
290 download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
291
292 ydl_opts = {
293 'usenetrc': opts.usenetrc,
294 'username': opts.username,
295 'password': opts.password,
296 'twofactor': opts.twofactor,
297 'videopassword': opts.videopassword,
298 'quiet': (opts.quiet or any_printing),
299 'no_warnings': opts.no_warnings,
300 'forceurl': opts.geturl,
301 'forcetitle': opts.gettitle,
302 'forceid': opts.getid,
303 'forcethumbnail': opts.getthumbnail,
304 'forcedescription': opts.getdescription,
305 'forceduration': opts.getduration,
306 'forcefilename': opts.getfilename,
307 'forceformat': opts.getformat,
308 'forcejson': opts.dumpjson,
309 'dump_single_json': opts.dump_single_json,
310 'simulate': opts.simulate,
311 'skip_download': (opts.skip_download or opts.simulate or any_printing),
312 'format': opts.format,
313 'format_limit': opts.format_limit,
314 'listformats': opts.listformats,
315 'outtmpl': outtmpl,
316 'autonumber_size': opts.autonumber_size,
317 'restrictfilenames': opts.restrictfilenames,
318 'ignoreerrors': opts.ignoreerrors,
319 'ratelimit': opts.ratelimit,
320 'nooverwrites': opts.nooverwrites,
321 'retries': opts.retries,
322 'buffersize': opts.buffersize,
323 'noresizebuffer': opts.noresizebuffer,
324 'continuedl': opts.continue_dl,
325 'noprogress': opts.noprogress,
326 'progress_with_newline': opts.progress_with_newline,
327 'playliststart': opts.playliststart,
328 'playlistend': opts.playlistend,
329 'noplaylist': opts.noplaylist,
330 'logtostderr': opts.outtmpl == '-',
331 'consoletitle': opts.consoletitle,
332 'nopart': opts.nopart,
333 'updatetime': opts.updatetime,
334 'writedescription': opts.writedescription,
335 'writeannotations': opts.writeannotations,
336 'writeinfojson': opts.writeinfojson,
337 'writethumbnail': opts.writethumbnail,
338 'writesubtitles': opts.writesubtitles,
339 'writeautomaticsub': opts.writeautomaticsub,
340 'allsubtitles': opts.allsubtitles,
341 'listsubtitles': opts.listsubtitles,
342 'subtitlesformat': opts.subtitlesformat,
343 'subtitleslangs': opts.subtitleslangs,
344 'matchtitle': decodeOption(opts.matchtitle),
345 'rejecttitle': decodeOption(opts.rejecttitle),
346 'max_downloads': opts.max_downloads,
347 'prefer_free_formats': opts.prefer_free_formats,
348 'verbose': opts.verbose,
349 'dump_intermediate_pages': opts.dump_intermediate_pages,
350 'write_pages': opts.write_pages,
351 'test': opts.test,
352 'keepvideo': opts.keepvideo,
353 'min_filesize': opts.min_filesize,
354 'max_filesize': opts.max_filesize,
355 'min_views': opts.min_views,
356 'max_views': opts.max_views,
357 'daterange': date,
358 'cachedir': opts.cachedir,
359 'youtube_print_sig_code': opts.youtube_print_sig_code,
360 'age_limit': opts.age_limit,
361 'download_archive': download_archive_fn,
362 'cookiefile': opts.cookiefile,
363 'nocheckcertificate': opts.no_check_certificate,
364 'prefer_insecure': opts.prefer_insecure,
365 'proxy': opts.proxy,
366 'socket_timeout': opts.socket_timeout,
367 'bidi_workaround': opts.bidi_workaround,
368 'debug_printtraffic': opts.debug_printtraffic,
369 'prefer_ffmpeg': opts.prefer_ffmpeg,
370 'include_ads': opts.include_ads,
371 'default_search': opts.default_search,
372 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
373 'encoding': opts.encoding,
374 'exec_cmd': opts.exec_cmd,
375 'extract_flat': opts.extract_flat,
376 }
377
378 with YoutubeDL(ydl_opts) as ydl:
379 ydl.print_debug_header()
380 ydl.add_default_info_extractors()
381
382 # PostProcessors
383 # Add the metadata pp first, the other pps will copy it
384 if opts.addmetadata:
385 ydl.add_post_processor(FFmpegMetadataPP())
386 if opts.extractaudio:
387 ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
388 if opts.recodevideo:
389 ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
390 if opts.embedsubtitles:
391 ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
392 if opts.xattrs:
393 ydl.add_post_processor(XAttrMetadataPP())
394 if opts.embedthumbnail:
395 if not opts.addmetadata:
396 ydl.add_post_processor(FFmpegAudioFixPP())
397 ydl.add_post_processor(AtomicParsleyPP())
398
399
400 # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
401 # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
402 if opts.exec_cmd:
403 ydl.add_post_processor(ExecAfterDownloadPP(
404 verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
405
406 # Update version
407 if opts.update_self:
408 update_self(ydl.to_screen, opts.verbose)
409
410 # Remove cache dir
411 if opts.rm_cachedir:
412 ydl.cache.remove()
413
414 # Maybe do nothing
415 if (len(all_urls) < 1) and (opts.load_info_filename is None):
416 if not (opts.update_self or opts.rm_cachedir):
417 parser.error(u'you must provide at least one URL')
418 else:
419 sys.exit()
420
421 try:
422 if opts.load_info_filename is not None:
423 retcode = ydl.download_with_info_file(opts.load_info_filename)
424 else:
425 retcode = ydl.download(all_urls)
426 except MaxDownloadsReached:
427 ydl.to_screen(u'--max-download limit reached, aborting.')
428 retcode = 101
429
430 sys.exit(retcode)
431
432
433 def main(argv=None):
434 try:
435 _real_main(argv)
436 except DownloadError:
437 sys.exit(1)
438 except SameFileError:
439 sys.exit(u'ERROR: fixed output name but more than one file to download')
440 except KeyboardInterrupt:
441 sys.exit(u'\nERROR: Interrupted by user')