]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
Merge pull request #405 from hdclark/master
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 )
20
21 __license__ = 'Public Domain'
22 __version__ = '2012.09.27'
23
24 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
25 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
26 UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
27
28
29 import cookielib
30 import getpass
31 import optparse
32 import os
33 import re
34 import shlex
35 import socket
36 import subprocess
37 import sys
38 import urllib2
39 import warnings
40
41 from utils import *
42 from FileDownloader import *
43 from InfoExtractors import *
44 from PostProcessor import *
45
46 def updateSelf(downloader, filename):
47 ''' Update the program file with the latest version from the repository '''
48 # Note: downloader only used for options
49
50 if not os.access(filename, os.W_OK):
51 sys.exit('ERROR: no write permissions on %s' % filename)
52
53 downloader.to_screen(u'Updating to latest version...')
54
55 urlv = urllib2.urlopen(UPDATE_URL_VERSION)
56 newversion = urlv.read().strip()
57 if newversion == __version__:
58 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
59 return
60 urlv.close()
61
62 if hasattr(sys, "frozen"): #py2exe
63 exe = os.path.abspath(filename)
64 directory = os.path.dirname(exe)
65 if not os.access(directory, os.W_OK):
66 sys.exit('ERROR: no write permissions on %s' % directory)
67
68 try:
69 urlh = urllib2.urlopen(UPDATE_URL_EXE)
70 newcontent = urlh.read()
71 urlh.close()
72 with open(exe + '.new', 'wb') as outf:
73 outf.write(newcontent)
74 except (IOError, OSError), err:
75 sys.exit('ERROR: unable to download latest version')
76
77 try:
78 bat = os.path.join(directory, 'youtube-dl-updater.bat')
79 b = open(bat, 'w')
80
81 print >> b, """
82 echo Updating youtube-dl...
83 ping 127.0.0.1 -n 5 -w 1000 > NUL
84 move /Y "%s.new" "%s"
85 del "%s"
86 """ %(exe, exe, bat)
87
88 b.close()
89
90 os.startfile(bat)
91 except (IOError, OSError), err:
92 sys.exit('ERROR: unable to overwrite current version')
93
94 else:
95 try:
96 urlh = urllib2.urlopen(UPDATE_URL)
97 newcontent = urlh.read()
98 urlh.close()
99 except (IOError, OSError), err:
100 sys.exit('ERROR: unable to download latest version')
101
102 try:
103 with open(filename, 'wb') as outf:
104 outf.write(newcontent)
105 except (IOError, OSError), err:
106 sys.exit('ERROR: unable to overwrite current version')
107
108 downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
109
110 def parseOpts():
111 def _readOptions(filename_bytes):
112 try:
113 optionf = open(filename_bytes)
114 except IOError:
115 return [] # silently skip if file is not present
116 try:
117 res = []
118 for l in optionf:
119 res += shlex.split(l, comments=True)
120 finally:
121 optionf.close()
122 return res
123
124 def _format_option_string(option):
125 ''' ('-o', '--option') -> -o, --format METAVAR'''
126
127 opts = []
128
129 if option._short_opts: opts.append(option._short_opts[0])
130 if option._long_opts: opts.append(option._long_opts[0])
131 if len(opts) > 1: opts.insert(1, ', ')
132
133 if option.takes_value(): opts.append(' %s' % option.metavar)
134
135 return "".join(opts)
136
137 def _find_term_columns():
138 columns = os.environ.get('COLUMNS', None)
139 if columns:
140 return int(columns)
141
142 try:
143 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
144 out,err = sp.communicate()
145 return int(out.split()[1])
146 except:
147 pass
148 return None
149
150 max_width = 80
151 max_help_position = 80
152
153 # No need to wrap help messages if we're on a wide console
154 columns = _find_term_columns()
155 if columns: max_width = columns
156
157 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
158 fmt.format_option_strings = _format_option_string
159
160 kw = {
161 'version' : __version__,
162 'formatter' : fmt,
163 'usage' : '%prog [options] url [url...]',
164 'conflict_handler' : 'resolve',
165 }
166
167 parser = optparse.OptionParser(**kw)
168
169 # option groups
170 general = optparse.OptionGroup(parser, 'General Options')
171 selection = optparse.OptionGroup(parser, 'Video Selection')
172 authentication = optparse.OptionGroup(parser, 'Authentication Options')
173 video_format = optparse.OptionGroup(parser, 'Video Format Options')
174 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
175 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
176 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
177
178 general.add_option('-h', '--help',
179 action='help', help='print this help text and exit')
180 general.add_option('-v', '--version',
181 action='version', help='print program version and exit')
182 general.add_option('-U', '--update',
183 action='store_true', dest='update_self', help='update this program to latest version')
184 general.add_option('-i', '--ignore-errors',
185 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
186 general.add_option('-r', '--rate-limit',
187 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
188 general.add_option('-R', '--retries',
189 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
190 general.add_option('--dump-user-agent',
191 action='store_true', dest='dump_user_agent',
192 help='display the current browser identification', default=False)
193 general.add_option('--user-agent',
194 action='store', dest='useragent', help='specify a custom user agent')
195 general.add_option('--list-extractors',
196 action='store_true', dest='list_extractors',
197 help='List all supported extractors and the URLs they would handle', default=False)
198
199 selection.add_option('--playlist-start',
200 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
201 selection.add_option('--playlist-end',
202 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
203 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
204 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
205 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
206
207 authentication.add_option('-u', '--username',
208 dest='username', metavar='USERNAME', help='account username')
209 authentication.add_option('-p', '--password',
210 dest='password', metavar='PASSWORD', help='account password')
211 authentication.add_option('-n', '--netrc',
212 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
213
214
215 video_format.add_option('-f', '--format',
216 action='store', dest='format', metavar='FORMAT', help='video format code')
217 video_format.add_option('--all-formats',
218 action='store_const', dest='format', help='download all available video formats', const='all')
219 video_format.add_option('--prefer-free-formats',
220 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
221 video_format.add_option('--max-quality',
222 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
223 video_format.add_option('-F', '--list-formats',
224 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
225 video_format.add_option('--write-srt',
226 action='store_true', dest='writesubtitles',
227 help='write video closed captions to a .srt file (currently youtube only)', default=False)
228 video_format.add_option('--srt-lang',
229 action='store', dest='subtitleslang', metavar='LANG',
230 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
231
232
233 verbosity.add_option('-q', '--quiet',
234 action='store_true', dest='quiet', help='activates quiet mode', default=False)
235 verbosity.add_option('-s', '--simulate',
236 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
237 verbosity.add_option('--skip-download',
238 action='store_true', dest='skip_download', help='do not download the video', default=False)
239 verbosity.add_option('-g', '--get-url',
240 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
241 verbosity.add_option('-e', '--get-title',
242 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
243 verbosity.add_option('--get-thumbnail',
244 action='store_true', dest='getthumbnail',
245 help='simulate, quiet but print thumbnail URL', default=False)
246 verbosity.add_option('--get-description',
247 action='store_true', dest='getdescription',
248 help='simulate, quiet but print video description', default=False)
249 verbosity.add_option('--get-filename',
250 action='store_true', dest='getfilename',
251 help='simulate, quiet but print output filename', default=False)
252 verbosity.add_option('--get-format',
253 action='store_true', dest='getformat',
254 help='simulate, quiet but print output format', default=False)
255 verbosity.add_option('--no-progress',
256 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
257 verbosity.add_option('--console-title',
258 action='store_true', dest='consoletitle',
259 help='display progress in console titlebar', default=False)
260 verbosity.add_option('-v', '--verbose',
261 action='store_true', dest='verbose', help='print various debugging information', default=False)
262
263
264 filesystem.add_option('-t', '--title',
265 action='store_true', dest='usetitle', help='use title in file name', default=False)
266 filesystem.add_option('-l', '--literal',
267 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
268 filesystem.add_option('-A', '--auto-number',
269 action='store_true', dest='autonumber',
270 help='number downloaded files starting from 00000', default=False)
271 filesystem.add_option('-o', '--output',
272 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
273 filesystem.add_option('-a', '--batch-file',
274 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
275 filesystem.add_option('-w', '--no-overwrites',
276 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
277 filesystem.add_option('-c', '--continue',
278 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
279 filesystem.add_option('--no-continue',
280 action='store_false', dest='continue_dl',
281 help='do not resume partially downloaded files (restart from beginning)')
282 filesystem.add_option('--cookies',
283 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
284 filesystem.add_option('--no-part',
285 action='store_true', dest='nopart', help='do not use .part files', default=False)
286 filesystem.add_option('--no-mtime',
287 action='store_false', dest='updatetime',
288 help='do not use the Last-modified header to set the file modification time', default=True)
289 filesystem.add_option('--write-description',
290 action='store_true', dest='writedescription',
291 help='write video description to a .description file', default=False)
292 filesystem.add_option('--write-info-json',
293 action='store_true', dest='writeinfojson',
294 help='write video metadata to a .info.json file', default=False)
295
296
297 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
298 help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
299 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
300 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
301 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
302 help='ffmpeg/avconv audio bitrate specification, 128k by default')
303 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
304 help='keeps the video file on disk after the post-processing; the video is erased by default')
305
306
307 parser.add_option_group(general)
308 parser.add_option_group(selection)
309 parser.add_option_group(filesystem)
310 parser.add_option_group(verbosity)
311 parser.add_option_group(video_format)
312 parser.add_option_group(authentication)
313 parser.add_option_group(postproc)
314
315 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
316 if xdg_config_home:
317 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
318 else:
319 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
320 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
321 opts, args = parser.parse_args(argv)
322
323 return parser, opts, args
324
325 def gen_extractors():
326 """ Return a list of an instance of every supported extractor.
327 The order does matter; the first extractor matched is the one handling the URL.
328 """
329 return [
330 YoutubePlaylistIE(),
331 YoutubeUserIE(),
332 YoutubeSearchIE(),
333 YoutubeIE(),
334 MetacafeIE(),
335 DailymotionIE(),
336 GoogleIE(),
337 GoogleSearchIE(),
338 PhotobucketIE(),
339 YahooIE(),
340 YahooSearchIE(),
341 DepositFilesIE(),
342 FacebookIE(),
343 BlipTVUserIE(),
344 BlipTVIE(),
345 VimeoIE(),
346 MyVideoIE(),
347 ComedyCentralIE(),
348 EscapistIE(),
349 CollegeHumorIE(),
350 XVideosIE(),
351 SoundcloudIE(),
352 InfoQIE(),
353 MixcloudIE(),
354 StanfordOpenClassroomIE(),
355 MTVIE(),
356 YoukuIE(),
357 XNXXIE(),
358
359 GenericIE()
360 ]
361
362 def _real_main():
363 parser, opts, args = parseOpts()
364
365 # Open appropriate CookieJar
366 if opts.cookiefile is None:
367 jar = cookielib.CookieJar()
368 else:
369 try:
370 jar = cookielib.MozillaCookieJar(opts.cookiefile)
371 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
372 jar.load()
373 except (IOError, OSError), err:
374 sys.exit(u'ERROR: unable to open cookie file')
375 # Set user agent
376 if opts.useragent is not None:
377 std_headers['User-Agent'] = opts.useragent
378
379 # Dump user agent
380 if opts.dump_user_agent:
381 print std_headers['User-Agent']
382 sys.exit(0)
383
384 # Batch file verification
385 batchurls = []
386 if opts.batchfile is not None:
387 try:
388 if opts.batchfile == '-':
389 batchfd = sys.stdin
390 else:
391 batchfd = open(opts.batchfile, 'r')
392 batchurls = batchfd.readlines()
393 batchurls = [x.strip() for x in batchurls]
394 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
395 except IOError:
396 sys.exit(u'ERROR: batch file could not be read')
397 all_urls = batchurls + args
398 all_urls = map(lambda url: url.strip(), all_urls)
399
400 # General configuration
401 cookie_processor = urllib2.HTTPCookieProcessor(jar)
402 proxy_handler = urllib2.ProxyHandler()
403 opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
404 urllib2.install_opener(opener)
405 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
406
407 extractors = gen_extractors()
408
409 if opts.list_extractors:
410 for ie in extractors:
411 print(ie.IE_NAME)
412 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
413 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
414 for mu in matchedUrls:
415 print(u' ' + mu)
416 sys.exit(0)
417
418 # Conflicting, missing and erroneous options
419 if opts.usenetrc and (opts.username is not None or opts.password is not None):
420 parser.error(u'using .netrc conflicts with giving username/password')
421 if opts.password is not None and opts.username is None:
422 parser.error(u'account username missing')
423 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
424 parser.error(u'using output template conflicts with using title, literal title or auto number')
425 if opts.usetitle and opts.useliteral:
426 parser.error(u'using title conflicts with using literal title')
427 if opts.username is not None and opts.password is None:
428 opts.password = getpass.getpass(u'Type account password and press return:')
429 if opts.ratelimit is not None:
430 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
431 if numeric_limit is None:
432 parser.error(u'invalid rate limit specified')
433 opts.ratelimit = numeric_limit
434 if opts.retries is not None:
435 try:
436 opts.retries = long(opts.retries)
437 except (TypeError, ValueError), err:
438 parser.error(u'invalid retry count specified')
439 try:
440 opts.playliststart = int(opts.playliststart)
441 if opts.playliststart <= 0:
442 raise ValueError(u'Playlist start must be positive')
443 except (TypeError, ValueError), err:
444 parser.error(u'invalid playlist start number specified')
445 try:
446 opts.playlistend = int(opts.playlistend)
447 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
448 raise ValueError(u'Playlist end must be greater than playlist start')
449 except (TypeError, ValueError), err:
450 parser.error(u'invalid playlist end number specified')
451 if opts.extractaudio:
452 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
453 parser.error(u'invalid audio format specified')
454
455 # File downloader
456 fd = FileDownloader({
457 'usenetrc': opts.usenetrc,
458 'username': opts.username,
459 'password': opts.password,
460 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
461 'forceurl': opts.geturl,
462 'forcetitle': opts.gettitle,
463 'forcethumbnail': opts.getthumbnail,
464 'forcedescription': opts.getdescription,
465 'forcefilename': opts.getfilename,
466 'forceformat': opts.getformat,
467 'simulate': opts.simulate,
468 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
469 'format': opts.format,
470 'format_limit': opts.format_limit,
471 'listformats': opts.listformats,
472 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
473 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
474 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
475 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
476 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
477 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
478 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
479 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
480 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
481 or u'%(id)s.%(ext)s'),
482 'ignoreerrors': opts.ignoreerrors,
483 'ratelimit': opts.ratelimit,
484 'nooverwrites': opts.nooverwrites,
485 'retries': opts.retries,
486 'continuedl': opts.continue_dl,
487 'noprogress': opts.noprogress,
488 'playliststart': opts.playliststart,
489 'playlistend': opts.playlistend,
490 'logtostderr': opts.outtmpl == '-',
491 'consoletitle': opts.consoletitle,
492 'nopart': opts.nopart,
493 'updatetime': opts.updatetime,
494 'writedescription': opts.writedescription,
495 'writeinfojson': opts.writeinfojson,
496 'writesubtitles': opts.writesubtitles,
497 'subtitleslang': opts.subtitleslang,
498 'matchtitle': opts.matchtitle,
499 'rejecttitle': opts.rejecttitle,
500 'max_downloads': opts.max_downloads,
501 'prefer_free_formats': opts.prefer_free_formats,
502 'verbose': opts.verbose,
503 })
504
505 if opts.verbose:
506 fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
507
508 for extractor in extractors:
509 fd.add_info_extractor(extractor)
510
511 # PostProcessors
512 if opts.extractaudio:
513 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
514
515 # Update version
516 if opts.update_self:
517 updateSelf(fd, sys.argv[0])
518
519 # Maybe do nothing
520 if len(all_urls) < 1:
521 if not opts.update_self:
522 parser.error(u'you must provide at least one URL')
523 else:
524 sys.exit()
525
526 try:
527 retcode = fd.download(all_urls)
528 except MaxDownloadsReached:
529 fd.to_screen(u'--max-download limit reached, aborting.')
530 retcode = 101
531
532 # Dump cookie jar if requested
533 if opts.cookiefile is not None:
534 try:
535 jar.save()
536 except (IOError, OSError), err:
537 sys.exit(u'ERROR: unable to save cookie jar')
538
539 sys.exit(retcode)
540
541 def main():
542 try:
543 _real_main()
544 except DownloadError:
545 sys.exit(1)
546 except SameFileError:
547 sys.exit(u'ERROR: fixed output name but more than one file to download')
548 except KeyboardInterrupt:
549 sys.exit(u'\nERROR: Interrupted by user')