]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
credit @jaimeMF
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import with_statement
5 from __future__ import absolute_import
6
7 __authors__ = (
8 'Ricardo Garcia Gonzalez',
9 'Danny Colligan',
10 'Benjamin Johnson',
11 'Vasyl\' Vavrychuk',
12 'Witold Baryluk',
13 'Paweł Paprota',
14 'Gergely Imreh',
15 'Rogério Brito',
16 'Philipp Hagemeister',
17 'Sören Schulze',
18 'Kevin Ngo',
19 'Ori Avtalion',
20 'shizeeg',
21 'Filippo Valsorda',
22 'Christian Albrecht',
23 'Dave Vasilevsky',
24 'Jaime Marquínez Ferrándiz',
25 )
26
27 __license__ = 'Public Domain'
28
29 import getpass
30 import optparse
31 import os
32 import re
33 import shlex
34 import socket
35 import subprocess
36 import sys
37 import warnings
38 import platform
39
40 from .utils import *
41 from .update import update_self
42 from .version import __version__
43 from .FileDownloader import *
44 from .InfoExtractors import *
45 from .PostProcessor import *
46
47 def parseOpts():
48 def _readOptions(filename_bytes):
49 try:
50 optionf = open(filename_bytes)
51 except IOError:
52 return [] # silently skip if file is not present
53 try:
54 res = []
55 for l in optionf:
56 res += shlex.split(l, comments=True)
57 finally:
58 optionf.close()
59 return res
60
61 def _format_option_string(option):
62 ''' ('-o', '--option') -> -o, --format METAVAR'''
63
64 opts = []
65
66 if option._short_opts:
67 opts.append(option._short_opts[0])
68 if option._long_opts:
69 opts.append(option._long_opts[0])
70 if len(opts) > 1:
71 opts.insert(1, ', ')
72
73 if option.takes_value(): opts.append(' %s' % option.metavar)
74
75 return "".join(opts)
76
77 def _find_term_columns():
78 columns = os.environ.get('COLUMNS', None)
79 if columns:
80 return int(columns)
81
82 try:
83 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
84 out,err = sp.communicate()
85 return int(out.split()[1])
86 except:
87 pass
88 return None
89
90 max_width = 80
91 max_help_position = 80
92
93 # No need to wrap help messages if we're on a wide console
94 columns = _find_term_columns()
95 if columns: max_width = columns
96
97 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
98 fmt.format_option_strings = _format_option_string
99
100 kw = {
101 'version' : __version__,
102 'formatter' : fmt,
103 'usage' : '%prog [options] url [url...]',
104 'conflict_handler' : 'resolve',
105 }
106
107 parser = optparse.OptionParser(**kw)
108
109 # option groups
110 general = optparse.OptionGroup(parser, 'General Options')
111 selection = optparse.OptionGroup(parser, 'Video Selection')
112 authentication = optparse.OptionGroup(parser, 'Authentication Options')
113 video_format = optparse.OptionGroup(parser, 'Video Format Options')
114 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
115 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
116 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
117
118 general.add_option('-h', '--help',
119 action='help', help='print this help text and exit')
120 general.add_option('-v', '--version',
121 action='version', help='print program version and exit')
122 general.add_option('-U', '--update',
123 action='store_true', dest='update_self', help='update this program to latest version')
124 general.add_option('-i', '--ignore-errors',
125 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
126 general.add_option('-r', '--rate-limit',
127 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
128 general.add_option('-R', '--retries',
129 dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
130 general.add_option('--buffer-size',
131 dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
132 general.add_option('--no-resize-buffer',
133 action='store_true', dest='noresizebuffer',
134 help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
135 general.add_option('--dump-user-agent',
136 action='store_true', dest='dump_user_agent',
137 help='display the current browser identification', default=False)
138 general.add_option('--user-agent',
139 dest='user_agent', help='specify a custom user agent', metavar='UA')
140 general.add_option('--list-extractors',
141 action='store_true', dest='list_extractors',
142 help='List all supported extractors and the URLs they would handle', default=False)
143 general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
144
145 selection.add_option('--playlist-start',
146 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
147 selection.add_option('--playlist-end',
148 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
149 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
150 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
151 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
152
153 authentication.add_option('-u', '--username',
154 dest='username', metavar='USERNAME', help='account username')
155 authentication.add_option('-p', '--password',
156 dest='password', metavar='PASSWORD', help='account password')
157 authentication.add_option('-n', '--netrc',
158 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
159
160
161 video_format.add_option('-f', '--format',
162 action='store', dest='format', metavar='FORMAT', help='video format code')
163 video_format.add_option('--all-formats',
164 action='store_const', dest='format', help='download all available video formats', const='all')
165 video_format.add_option('--prefer-free-formats',
166 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
167 video_format.add_option('--max-quality',
168 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
169 video_format.add_option('-F', '--list-formats',
170 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
171 video_format.add_option('--write-srt',
172 action='store_true', dest='writesubtitles',
173 help='write video closed captions to a .srt file (currently youtube only)', default=False)
174 video_format.add_option('--srt-lang',
175 action='store', dest='subtitleslang', metavar='LANG',
176 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
177
178
179 verbosity.add_option('-q', '--quiet',
180 action='store_true', dest='quiet', help='activates quiet mode', default=False)
181 verbosity.add_option('-s', '--simulate',
182 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
183 verbosity.add_option('--skip-download',
184 action='store_true', dest='skip_download', help='do not download the video', default=False)
185 verbosity.add_option('-g', '--get-url',
186 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
187 verbosity.add_option('-e', '--get-title',
188 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
189 verbosity.add_option('--get-thumbnail',
190 action='store_true', dest='getthumbnail',
191 help='simulate, quiet but print thumbnail URL', default=False)
192 verbosity.add_option('--get-description',
193 action='store_true', dest='getdescription',
194 help='simulate, quiet but print video description', default=False)
195 verbosity.add_option('--get-filename',
196 action='store_true', dest='getfilename',
197 help='simulate, quiet but print output filename', default=False)
198 verbosity.add_option('--get-format',
199 action='store_true', dest='getformat',
200 help='simulate, quiet but print output format', default=False)
201 verbosity.add_option('--no-progress',
202 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
203 verbosity.add_option('--console-title',
204 action='store_true', dest='consoletitle',
205 help='display progress in console titlebar', default=False)
206 verbosity.add_option('-v', '--verbose',
207 action='store_true', dest='verbose', help='print various debugging information', default=False)
208
209
210 filesystem.add_option('-t', '--title',
211 action='store_true', dest='usetitle', help='use title in file name', default=False)
212 filesystem.add_option('--id',
213 action='store_true', dest='useid', help='use video ID in file name', default=False)
214 filesystem.add_option('-l', '--literal',
215 action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
216 filesystem.add_option('-A', '--auto-number',
217 action='store_true', dest='autonumber',
218 help='number downloaded files starting from 00000', default=False)
219 filesystem.add_option('-o', '--output',
220 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')
221 filesystem.add_option('--restrict-filenames',
222 action='store_true', dest='restrictfilenames',
223 help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
224 filesystem.add_option('-a', '--batch-file',
225 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
226 filesystem.add_option('-w', '--no-overwrites',
227 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
228 filesystem.add_option('-c', '--continue',
229 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
230 filesystem.add_option('--no-continue',
231 action='store_false', dest='continue_dl',
232 help='do not resume partially downloaded files (restart from beginning)')
233 filesystem.add_option('--cookies',
234 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
235 filesystem.add_option('--no-part',
236 action='store_true', dest='nopart', help='do not use .part files', default=False)
237 filesystem.add_option('--no-mtime',
238 action='store_false', dest='updatetime',
239 help='do not use the Last-modified header to set the file modification time', default=True)
240 filesystem.add_option('--write-description',
241 action='store_true', dest='writedescription',
242 help='write video description to a .description file', default=False)
243 filesystem.add_option('--write-info-json',
244 action='store_true', dest='writeinfojson',
245 help='write video metadata to a .info.json file', default=False)
246
247
248 postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
249 help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
250 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
251 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
252 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
253 help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
254 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
255 help='keeps the video file on disk after the post-processing; the video is erased by default')
256 postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
257 help='do not overwrite post-processed files; the post-processed files are overwritten by default')
258
259
260 parser.add_option_group(general)
261 parser.add_option_group(selection)
262 parser.add_option_group(filesystem)
263 parser.add_option_group(verbosity)
264 parser.add_option_group(video_format)
265 parser.add_option_group(authentication)
266 parser.add_option_group(postproc)
267
268 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
269 if xdg_config_home:
270 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
271 else:
272 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
273 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
274 opts, args = parser.parse_args(argv)
275
276 return parser, opts, args
277
278 def gen_extractors():
279 """ Return a list of an instance of every supported extractor.
280 The order does matter; the first extractor matched is the one handling the URL.
281 """
282 return [
283 YoutubePlaylistIE(),
284 YoutubeChannelIE(),
285 YoutubeUserIE(),
286 YoutubeSearchIE(),
287 YoutubeIE(),
288 MetacafeIE(),
289 DailymotionIE(),
290 GoogleSearchIE(),
291 PhotobucketIE(),
292 YahooIE(),
293 YahooSearchIE(),
294 DepositFilesIE(),
295 FacebookIE(),
296 BlipTVUserIE(),
297 BlipTVIE(),
298 VimeoIE(),
299 MyVideoIE(),
300 ComedyCentralIE(),
301 EscapistIE(),
302 CollegeHumorIE(),
303 XVideosIE(),
304 SoundcloudIE(),
305 InfoQIE(),
306 MixcloudIE(),
307 StanfordOpenClassroomIE(),
308 MTVIE(),
309 YoukuIE(),
310 XNXXIE(),
311 GooglePlusIE(),
312 ArteTvIE(),
313 NBAIE(),
314 JustinTVIE(),
315 FunnyOrDieIE(),
316 TweetReelIE(),
317 SteamIE(),
318 UstreamIE(),
319 GenericIE()
320 ]
321
322 def _real_main():
323 parser, opts, args = parseOpts()
324
325 # Open appropriate CookieJar
326 if opts.cookiefile is None:
327 jar = compat_cookiejar.CookieJar()
328 else:
329 try:
330 jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
331 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
332 jar.load()
333 except (IOError, OSError) as err:
334 sys.exit(u'ERROR: unable to open cookie file')
335 # Set user agent
336 if opts.user_agent is not None:
337 std_headers['User-Agent'] = opts.user_agent
338
339 # Dump user agent
340 if opts.dump_user_agent:
341 print(std_headers['User-Agent'])
342 sys.exit(0)
343
344 # Batch file verification
345 batchurls = []
346 if opts.batchfile is not None:
347 try:
348 if opts.batchfile == '-':
349 batchfd = sys.stdin
350 else:
351 batchfd = open(opts.batchfile, 'r')
352 batchurls = batchfd.readlines()
353 batchurls = [x.strip() for x in batchurls]
354 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
355 except IOError:
356 sys.exit(u'ERROR: batch file could not be read')
357 all_urls = batchurls + args
358 all_urls = [url.strip() for url in all_urls]
359
360 # General configuration
361 cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
362 proxy_handler = compat_urllib_request.ProxyHandler()
363 opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
364 compat_urllib_request.install_opener(opener)
365 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
366
367 extractors = gen_extractors()
368
369 if opts.list_extractors:
370 for ie in extractors:
371 print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
372 matchedUrls = [url for url in all_urls if ie.suitable(url)]
373 all_urls = [url for url in all_urls if url not in matchedUrls]
374 for mu in matchedUrls:
375 print(u' ' + mu)
376 sys.exit(0)
377
378 # Conflicting, missing and erroneous options
379 if opts.usenetrc and (opts.username is not None or opts.password is not None):
380 parser.error(u'using .netrc conflicts with giving username/password')
381 if opts.password is not None and opts.username is None:
382 parser.error(u'account username missing')
383 if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
384 parser.error(u'using output template conflicts with using title, video ID or auto number')
385 if opts.usetitle and opts.useid:
386 parser.error(u'using title conflicts with using video ID')
387 if opts.username is not None and opts.password is None:
388 opts.password = getpass.getpass(u'Type account password and press return:')
389 if opts.ratelimit is not None:
390 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
391 if numeric_limit is None:
392 parser.error(u'invalid rate limit specified')
393 opts.ratelimit = numeric_limit
394 if opts.retries is not None:
395 try:
396 opts.retries = int(opts.retries)
397 except (TypeError, ValueError) as err:
398 parser.error(u'invalid retry count specified')
399 if opts.buffersize is not None:
400 numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
401 if numeric_buffersize is None:
402 parser.error(u'invalid buffer size specified')
403 opts.buffersize = numeric_buffersize
404 try:
405 opts.playliststart = int(opts.playliststart)
406 if opts.playliststart <= 0:
407 raise ValueError(u'Playlist start must be positive')
408 except (TypeError, ValueError) as err:
409 parser.error(u'invalid playlist start number specified')
410 try:
411 opts.playlistend = int(opts.playlistend)
412 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
413 raise ValueError(u'Playlist end must be greater than playlist start')
414 except (TypeError, ValueError) as err:
415 parser.error(u'invalid playlist end number specified')
416 if opts.extractaudio:
417 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
418 parser.error(u'invalid audio format specified')
419 if opts.audioquality:
420 opts.audioquality = opts.audioquality.strip('k').strip('K')
421 if not opts.audioquality.isdigit():
422 parser.error(u'invalid audio quality specified')
423
424 if sys.version_info < (3,):
425 # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
426 if opts.outtmpl is not None:
427 opts.outtmpl = opts.outtmpl.decode(preferredencoding())
428 outtmpl =((opts.outtmpl is not None and opts.outtmpl)
429 or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
430 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
431 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
432 or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
433 or (opts.useid and u'%(id)s.%(ext)s')
434 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
435 or u'%(id)s.%(ext)s')
436 # File downloader
437 fd = FileDownloader({
438 'usenetrc': opts.usenetrc,
439 'username': opts.username,
440 'password': opts.password,
441 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
442 'forceurl': opts.geturl,
443 'forcetitle': opts.gettitle,
444 'forcethumbnail': opts.getthumbnail,
445 'forcedescription': opts.getdescription,
446 'forcefilename': opts.getfilename,
447 'forceformat': opts.getformat,
448 'simulate': opts.simulate,
449 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
450 'format': opts.format,
451 'format_limit': opts.format_limit,
452 'listformats': opts.listformats,
453 'outtmpl': outtmpl,
454 'restrictfilenames': opts.restrictfilenames,
455 'ignoreerrors': opts.ignoreerrors,
456 'ratelimit': opts.ratelimit,
457 'nooverwrites': opts.nooverwrites,
458 'retries': opts.retries,
459 'buffersize': opts.buffersize,
460 'noresizebuffer': opts.noresizebuffer,
461 'continuedl': opts.continue_dl,
462 'noprogress': opts.noprogress,
463 'playliststart': opts.playliststart,
464 'playlistend': opts.playlistend,
465 'logtostderr': opts.outtmpl == '-',
466 'consoletitle': opts.consoletitle,
467 'nopart': opts.nopart,
468 'updatetime': opts.updatetime,
469 'writedescription': opts.writedescription,
470 'writeinfojson': opts.writeinfojson,
471 'writesubtitles': opts.writesubtitles,
472 'subtitleslang': opts.subtitleslang,
473 'matchtitle': opts.matchtitle,
474 'rejecttitle': opts.rejecttitle,
475 'max_downloads': opts.max_downloads,
476 'prefer_free_formats': opts.prefer_free_formats,
477 'verbose': opts.verbose,
478 'test': opts.test,
479 })
480
481 if opts.verbose:
482 fd.to_screen(u'[debug] youtube-dl version ' + __version__)
483 try:
484 sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
485 cwd=os.path.dirname(os.path.abspath(__file__)))
486 out, err = sp.communicate()
487 out = out.decode().strip()
488 if re.match('[0-9a-f]+', out):
489 fd.to_screen(u'[debug] Git HEAD: ' + out)
490 except:
491 pass
492 fd.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
493 fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
494
495 for extractor in extractors:
496 fd.add_info_extractor(extractor)
497
498 # PostProcessors
499 if opts.extractaudio:
500 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites))
501
502 # Update version
503 if opts.update_self:
504 update_self(fd.to_screen, opts.verbose, sys.argv[0])
505
506 # Maybe do nothing
507 if len(all_urls) < 1:
508 if not opts.update_self:
509 parser.error(u'you must provide at least one URL')
510 else:
511 sys.exit()
512
513 try:
514 retcode = fd.download(all_urls)
515 except MaxDownloadsReached:
516 fd.to_screen(u'--max-download limit reached, aborting.')
517 retcode = 101
518
519 # Dump cookie jar if requested
520 if opts.cookiefile is not None:
521 try:
522 jar.save()
523 except (IOError, OSError) as err:
524 sys.exit(u'ERROR: unable to save cookie jar')
525
526 sys.exit(retcode)
527
528 def main():
529 try:
530 _real_main()
531 except DownloadError:
532 sys.exit(1)
533 except SameFileError:
534 sys.exit(u'ERROR: fixed output name but more than one file to download')
535 except KeyboardInterrupt:
536 sys.exit(u'\nERROR: Interrupted by user')