]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
moved updating code to update.py
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import with_statement
5 from __future__ import absolute_import
6
7 __authors__ = (
8 'Ricardo Garcia Gonzalez',
9 'Danny Colligan',
10 'Benjamin Johnson',
11 'Vasyl\' Vavrychuk',
12 'Witold Baryluk',
13 'Paweł Paprota',
14 'Gergely Imreh',
15 'Rogério Brito',
16 'Philipp Hagemeister',
17 'Sören Schulze',
18 'Kevin Ngo',
19 'Ori Avtalion',
20 'shizeeg',
21 'Filippo Valsorda',
22 'Christian Albrecht',
23 'Dave Vasilevsky',
24 )
25
26 __license__ = 'Public Domain'
27
28 import getpass
29 import optparse
30 import os
31 import re
32 import shlex
33 import socket
34 import subprocess
35 import sys
36 import warnings
37 import platform
38
39 from .utils import *
40 from .update import update_self
41 from .version import __version__
42 from .FileDownloader import *
43 from .InfoExtractors import *
44 from .PostProcessor import *
45
46 def parseOpts():
47 def _readOptions(filename_bytes):
48 try:
49 optionf = open(filename_bytes)
50 except IOError:
51 return [] # silently skip if file is not present
52 try:
53 res = []
54 for l in optionf:
55 res += shlex.split(l, comments=True)
56 finally:
57 optionf.close()
58 return res
59
60 def _format_option_string(option):
61 ''' ('-o', '--option') -> -o, --format METAVAR'''
62
63 opts = []
64
65 if option._short_opts:
66 opts.append(option._short_opts[0])
67 if option._long_opts:
68 opts.append(option._long_opts[0])
69 if len(opts) > 1:
70 opts.insert(1, ', ')
71
72 if option.takes_value(): opts.append(' %s' % option.metavar)
73
74 return "".join(opts)
75
76 def _find_term_columns():
77 columns = os.environ.get('COLUMNS', None)
78 if columns:
79 return int(columns)
80
81 try:
82 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
83 out,err = sp.communicate()
84 return int(out.split()[1])
85 except:
86 pass
87 return None
88
89 max_width = 80
90 max_help_position = 80
91
92 # No need to wrap help messages if we're on a wide console
93 columns = _find_term_columns()
94 if columns: max_width = columns
95
96 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
97 fmt.format_option_strings = _format_option_string
98
99 kw = {
100 'version' : __version__,
101 'formatter' : fmt,
102 'usage' : '%prog [options] url [url...]',
103 'conflict_handler' : 'resolve',
104 }
105
106 parser = optparse.OptionParser(**kw)
107
108 # option groups
109 general = optparse.OptionGroup(parser, 'General Options')
110 selection = optparse.OptionGroup(parser, 'Video Selection')
111 authentication = optparse.OptionGroup(parser, 'Authentication Options')
112 video_format = optparse.OptionGroup(parser, 'Video Format Options')
113 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
114 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
115 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
116
117 general.add_option('-h', '--help',
118 action='help', help='print this help text and exit')
119 general.add_option('-v', '--version',
120 action='version', help='print program version and exit')
121 general.add_option('-U', '--update',
122 action='store_true', dest='update_self', help='update this program to latest version')
123 general.add_option('-i', '--ignore-errors',
124 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
125 general.add_option('-r', '--rate-limit',
126 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
127 general.add_option('-R', '--retries',
128 dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10)
129 general.add_option('--buffer-size',
130 dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024")
131 general.add_option('--no-resize-buffer',
132 action='store_true', dest='noresizebuffer',
133 help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False)
134 general.add_option('--dump-user-agent',
135 action='store_true', dest='dump_user_agent',
136 help='display the current browser identification', default=False)
137 general.add_option('--user-agent',
138 dest='user_agent', help='specify a custom user agent', metavar='UA')
139 general.add_option('--list-extractors',
140 action='store_true', dest='list_extractors',
141 help='List all supported extractors and the URLs they would handle', default=False)
142 general.add_option('--test', action='store_true', dest='test', default=False, help=optparse.SUPPRESS_HELP)
143
144 selection.add_option('--playlist-start',
145 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is %default)', default=1)
146 selection.add_option('--playlist-end',
147 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
148 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
149 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
150 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
151
152 authentication.add_option('-u', '--username',
153 dest='username', metavar='USERNAME', help='account username')
154 authentication.add_option('-p', '--password',
155 dest='password', metavar='PASSWORD', help='account password')
156 authentication.add_option('-n', '--netrc',
157 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
158
159
160 video_format.add_option('-f', '--format',
161 action='store', dest='format', metavar='FORMAT', help='video format code')
162 video_format.add_option('--all-formats',
163 action='store_const', dest='format', help='download all available video formats', const='all')
164 video_format.add_option('--prefer-free-formats',
165 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
166 video_format.add_option('--max-quality',
167 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
168 video_format.add_option('-F', '--list-formats',
169 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
170 video_format.add_option('--write-srt',
171 action='store_true', dest='writesubtitles',
172 help='write video closed captions to a .srt file (currently youtube only)', default=False)
173 video_format.add_option('--srt-lang',
174 action='store', dest='subtitleslang', metavar='LANG',
175 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
176
177
178 verbosity.add_option('-q', '--quiet',
179 action='store_true', dest='quiet', help='activates quiet mode', default=False)
180 verbosity.add_option('-s', '--simulate',
181 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
182 verbosity.add_option('--skip-download',
183 action='store_true', dest='skip_download', help='do not download the video', default=False)
184 verbosity.add_option('-g', '--get-url',
185 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
186 verbosity.add_option('-e', '--get-title',
187 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
188 verbosity.add_option('--get-thumbnail',
189 action='store_true', dest='getthumbnail',
190 help='simulate, quiet but print thumbnail URL', default=False)
191 verbosity.add_option('--get-description',
192 action='store_true', dest='getdescription',
193 help='simulate, quiet but print video description', default=False)
194 verbosity.add_option('--get-filename',
195 action='store_true', dest='getfilename',
196 help='simulate, quiet but print output filename', default=False)
197 verbosity.add_option('--get-format',
198 action='store_true', dest='getformat',
199 help='simulate, quiet but print output format', default=False)
200 verbosity.add_option('--no-progress',
201 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
202 verbosity.add_option('--console-title',
203 action='store_true', dest='consoletitle',
204 help='display progress in console titlebar', default=False)
205 verbosity.add_option('-v', '--verbose',
206 action='store_true', dest='verbose', help='print various debugging information', default=False)
207
208
209 filesystem.add_option('-t', '--title',
210 action='store_true', dest='usetitle', help='use title in file name', default=False)
211 filesystem.add_option('--id',
212 action='store_true', dest='useid', help='use video ID in file name', default=False)
213 filesystem.add_option('-l', '--literal',
214 action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
215 filesystem.add_option('-A', '--auto-number',
216 action='store_true', dest='autonumber',
217 help='number downloaded files starting from 00000', default=False)
218 filesystem.add_option('-o', '--output',
219 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. Can also be used to download to a different directory, for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')
220 filesystem.add_option('--restrict-filenames',
221 action='store_true', dest='restrictfilenames',
222 help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
223 filesystem.add_option('-a', '--batch-file',
224 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
225 filesystem.add_option('-w', '--no-overwrites',
226 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
227 filesystem.add_option('-c', '--continue',
228 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
229 filesystem.add_option('--no-continue',
230 action='store_false', dest='continue_dl',
231 help='do not resume partially downloaded files (restart from beginning)')
232 filesystem.add_option('--cookies',
233 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
234 filesystem.add_option('--no-part',
235 action='store_true', dest='nopart', help='do not use .part files', default=False)
236 filesystem.add_option('--no-mtime',
237 action='store_false', dest='updatetime',
238 help='do not use the Last-modified header to set the file modification time', default=True)
239 filesystem.add_option('--write-description',
240 action='store_true', dest='writedescription',
241 help='write video description to a .description file', default=False)
242 filesystem.add_option('--write-info-json',
243 action='store_true', dest='writeinfojson',
244 help='write video metadata to a .info.json file', default=False)
245
246
247 postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
248 help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
249 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
250 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
251 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5',
252 help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)')
253 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
254 help='keeps the video file on disk after the post-processing; the video is erased by default')
255 postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False,
256 help='do not overwrite post-processed files; the post-processed files are overwritten by default')
257
258
259 parser.add_option_group(general)
260 parser.add_option_group(selection)
261 parser.add_option_group(filesystem)
262 parser.add_option_group(verbosity)
263 parser.add_option_group(video_format)
264 parser.add_option_group(authentication)
265 parser.add_option_group(postproc)
266
267 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
268 if xdg_config_home:
269 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
270 else:
271 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
272 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
273 opts, args = parser.parse_args(argv)
274
275 return parser, opts, args
276
277 def gen_extractors():
278 """ Return a list of an instance of every supported extractor.
279 The order does matter; the first extractor matched is the one handling the URL.
280 """
281 return [
282 YoutubePlaylistIE(),
283 YoutubeChannelIE(),
284 YoutubeUserIE(),
285 YoutubeSearchIE(),
286 YoutubeIE(),
287 MetacafeIE(),
288 DailymotionIE(),
289 GoogleSearchIE(),
290 PhotobucketIE(),
291 YahooIE(),
292 YahooSearchIE(),
293 DepositFilesIE(),
294 FacebookIE(),
295 BlipTVUserIE(),
296 BlipTVIE(),
297 VimeoIE(),
298 MyVideoIE(),
299 ComedyCentralIE(),
300 EscapistIE(),
301 CollegeHumorIE(),
302 XVideosIE(),
303 SoundcloudIE(),
304 InfoQIE(),
305 MixcloudIE(),
306 StanfordOpenClassroomIE(),
307 MTVIE(),
308 YoukuIE(),
309 XNXXIE(),
310 GooglePlusIE(),
311 ArteTvIE(),
312 NBAIE(),
313 JustinTVIE(),
314 FunnyOrDieIE(),
315 TweetReelIE(),
316 GenericIE()
317 ]
318
319 def _real_main():
320 parser, opts, args = parseOpts()
321
322 # Open appropriate CookieJar
323 if opts.cookiefile is None:
324 jar = compat_cookiejar.CookieJar()
325 else:
326 try:
327 jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
328 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
329 jar.load()
330 except (IOError, OSError) as err:
331 sys.exit(u'ERROR: unable to open cookie file')
332 # Set user agent
333 if opts.user_agent is not None:
334 std_headers['User-Agent'] = opts.user_agent
335
336 # Dump user agent
337 if opts.dump_user_agent:
338 print(std_headers['User-Agent'])
339 sys.exit(0)
340
341 # Batch file verification
342 batchurls = []
343 if opts.batchfile is not None:
344 try:
345 if opts.batchfile == '-':
346 batchfd = sys.stdin
347 else:
348 batchfd = open(opts.batchfile, 'r')
349 batchurls = batchfd.readlines()
350 batchurls = [x.strip() for x in batchurls]
351 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
352 except IOError:
353 sys.exit(u'ERROR: batch file could not be read')
354 all_urls = batchurls + args
355 all_urls = [url.strip() for url in all_urls]
356
357 # General configuration
358 cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar)
359 proxy_handler = compat_urllib_request.ProxyHandler()
360 opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
361 compat_urllib_request.install_opener(opener)
362 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
363
364 extractors = gen_extractors()
365
366 if opts.list_extractors:
367 for ie in extractors:
368 print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
369 matchedUrls = [url for url in all_urls if ie.suitable(url)]
370 all_urls = [url for url in all_urls if url not in matchedUrls]
371 for mu in matchedUrls:
372 print(u' ' + mu)
373 sys.exit(0)
374
375 # Conflicting, missing and erroneous options
376 if opts.usenetrc and (opts.username is not None or opts.password is not None):
377 parser.error(u'using .netrc conflicts with giving username/password')
378 if opts.password is not None and opts.username is None:
379 parser.error(u'account username missing')
380 if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
381 parser.error(u'using output template conflicts with using title, video ID or auto number')
382 if opts.usetitle and opts.useid:
383 parser.error(u'using title conflicts with using video ID')
384 if opts.username is not None and opts.password is None:
385 opts.password = getpass.getpass(u'Type account password and press return:')
386 if opts.ratelimit is not None:
387 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
388 if numeric_limit is None:
389 parser.error(u'invalid rate limit specified')
390 opts.ratelimit = numeric_limit
391 if opts.retries is not None:
392 try:
393 opts.retries = int(opts.retries)
394 except (TypeError, ValueError) as err:
395 parser.error(u'invalid retry count specified')
396 if opts.buffersize is not None:
397 numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
398 if numeric_buffersize is None:
399 parser.error(u'invalid buffer size specified')
400 opts.buffersize = numeric_buffersize
401 try:
402 opts.playliststart = int(opts.playliststart)
403 if opts.playliststart <= 0:
404 raise ValueError(u'Playlist start must be positive')
405 except (TypeError, ValueError) as err:
406 parser.error(u'invalid playlist start number specified')
407 try:
408 opts.playlistend = int(opts.playlistend)
409 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
410 raise ValueError(u'Playlist end must be greater than playlist start')
411 except (TypeError, ValueError) as err:
412 parser.error(u'invalid playlist end number specified')
413 if opts.extractaudio:
414 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
415 parser.error(u'invalid audio format specified')
416 if opts.audioquality:
417 opts.audioquality = opts.audioquality.strip('k').strip('K')
418 if not opts.audioquality.isdigit():
419 parser.error(u'invalid audio quality specified')
420
421 if sys.version_info < (3,):
422 # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
423 if opts.outtmpl is not None:
424 opts.outtmpl = opts.outtmpl.decode(preferredencoding())
425 outtmpl =((opts.outtmpl is not None and opts.outtmpl)
426 or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
427 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
428 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
429 or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
430 or (opts.useid and u'%(id)s.%(ext)s')
431 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
432 or u'%(id)s.%(ext)s')
433 # File downloader
434 fd = FileDownloader({
435 'usenetrc': opts.usenetrc,
436 'username': opts.username,
437 'password': opts.password,
438 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
439 'forceurl': opts.geturl,
440 'forcetitle': opts.gettitle,
441 'forcethumbnail': opts.getthumbnail,
442 'forcedescription': opts.getdescription,
443 'forcefilename': opts.getfilename,
444 'forceformat': opts.getformat,
445 'simulate': opts.simulate,
446 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
447 'format': opts.format,
448 'format_limit': opts.format_limit,
449 'listformats': opts.listformats,
450 'outtmpl': outtmpl,
451 'restrictfilenames': opts.restrictfilenames,
452 'ignoreerrors': opts.ignoreerrors,
453 'ratelimit': opts.ratelimit,
454 'nooverwrites': opts.nooverwrites,
455 'retries': opts.retries,
456 'buffersize': opts.buffersize,
457 'noresizebuffer': opts.noresizebuffer,
458 'continuedl': opts.continue_dl,
459 'noprogress': opts.noprogress,
460 'playliststart': opts.playliststart,
461 'playlistend': opts.playlistend,
462 'logtostderr': opts.outtmpl == '-',
463 'consoletitle': opts.consoletitle,
464 'nopart': opts.nopart,
465 'updatetime': opts.updatetime,
466 'writedescription': opts.writedescription,
467 'writeinfojson': opts.writeinfojson,
468 'writesubtitles': opts.writesubtitles,
469 'subtitleslang': opts.subtitleslang,
470 'matchtitle': opts.matchtitle,
471 'rejecttitle': opts.rejecttitle,
472 'max_downloads': opts.max_downloads,
473 'prefer_free_formats': opts.prefer_free_formats,
474 'verbose': opts.verbose,
475 'test': opts.test,
476 })
477
478 if opts.verbose:
479 fd.to_screen(u'[debug] youtube-dl version ' + __version__)
480 try:
481 sp = subprocess.Popen(['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE,
482 cwd=os.path.dirname(os.path.abspath(__file__)))
483 out, err = sp.communicate()
484 out = out.decode().strip()
485 if re.match('[0-9a-f]+', out):
486 fd.to_screen(u'[debug] Git HEAD: ' + out)
487 except:
488 pass
489 fd.to_screen(u'[debug] Python version %s - %s' %(platform.python_version(), platform.platform()))
490 fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
491
492 for extractor in extractors:
493 fd.add_info_extractor(extractor)
494
495 # PostProcessors
496 if opts.extractaudio:
497 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites))
498
499 # Update version
500 if opts.update_self:
501 update_self(fd.to_screen, opts.verbose, sys.argv[0])
502
503 # Maybe do nothing
504 if len(all_urls) < 1:
505 if not opts.update_self:
506 parser.error(u'you must provide at least one URL')
507 else:
508 sys.exit()
509
510 try:
511 retcode = fd.download(all_urls)
512 except MaxDownloadsReached:
513 fd.to_screen(u'--max-download limit reached, aborting.')
514 retcode = 101
515
516 # Dump cookie jar if requested
517 if opts.cookiefile is not None:
518 try:
519 jar.save()
520 except (IOError, OSError) as err:
521 sys.exit(u'ERROR: unable to save cookie jar')
522
523 sys.exit(retcode)
524
525 def main():
526 try:
527 _real_main()
528 except DownloadError:
529 sys.exit(1)
530 except SameFileError:
531 sys.exit(u'ERROR: fixed output name but more than one file to download')
532 except KeyboardInterrupt:
533 sys.exit(u'\nERROR: Interrupted by user')