]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
Split code as a package, compiled into an executable zip
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 )
20
21 __license__ = 'Public Domain'
22 __version__ = '2012.02.27'
23
24 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
25
26
27 import cookielib
28 import getpass
29 import optparse
30 import os
31 import re
32 import shlex
33 import socket
34 import subprocess
35 import sys
36 import urllib2
37 import warnings
38
39 from Utils import *
40 from FileDownloader import *
41 from InfoExtractors import *
42 from PostProcessing import *
43
44 def updateSelf(downloader, filename):
45 ''' Update the program file with the latest version from the repository '''
46 # Note: downloader only used for options
47 if not os.access(filename, os.W_OK):
48 sys.exit('ERROR: no write permissions on %s' % filename)
49
50 downloader.to_screen(u'Updating to latest version...')
51
52 try:
53 try:
54 urlh = urllib2.urlopen(UPDATE_URL)
55 newcontent = urlh.read()
56
57 vmatch = re.search("__version__ = '([^']+)'", newcontent)
58 if vmatch is not None and vmatch.group(1) == __version__:
59 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
60 return
61 finally:
62 urlh.close()
63 except (IOError, OSError), err:
64 sys.exit('ERROR: unable to download latest version')
65
66 try:
67 outf = open(filename, 'wb')
68 try:
69 outf.write(newcontent)
70 finally:
71 outf.close()
72 except (IOError, OSError), err:
73 sys.exit('ERROR: unable to overwrite current version')
74
75 downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
76
77 def parseOpts():
78 def _readOptions(filename_bytes):
79 try:
80 optionf = open(filename_bytes)
81 except IOError:
82 return [] # silently skip if file is not present
83 try:
84 res = []
85 for l in optionf:
86 res += shlex.split(l, comments=True)
87 finally:
88 optionf.close()
89 return res
90
91 def _format_option_string(option):
92 ''' ('-o', '--option') -> -o, --format METAVAR'''
93
94 opts = []
95
96 if option._short_opts: opts.append(option._short_opts[0])
97 if option._long_opts: opts.append(option._long_opts[0])
98 if len(opts) > 1: opts.insert(1, ', ')
99
100 if option.takes_value(): opts.append(' %s' % option.metavar)
101
102 return "".join(opts)
103
104 def _find_term_columns():
105 columns = os.environ.get('COLUMNS', None)
106 if columns:
107 return int(columns)
108
109 try:
110 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
111 out,err = sp.communicate()
112 return int(out.split()[1])
113 except:
114 pass
115 return None
116
117 max_width = 80
118 max_help_position = 80
119
120 # No need to wrap help messages if we're on a wide console
121 columns = _find_term_columns()
122 if columns: max_width = columns
123
124 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
125 fmt.format_option_strings = _format_option_string
126
127 kw = {
128 'version' : __version__,
129 'formatter' : fmt,
130 'usage' : '%prog [options] url [url...]',
131 'conflict_handler' : 'resolve',
132 }
133
134 parser = optparse.OptionParser(**kw)
135
136 # option groups
137 general = optparse.OptionGroup(parser, 'General Options')
138 selection = optparse.OptionGroup(parser, 'Video Selection')
139 authentication = optparse.OptionGroup(parser, 'Authentication Options')
140 video_format = optparse.OptionGroup(parser, 'Video Format Options')
141 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
142 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
143 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
144
145 general.add_option('-h', '--help',
146 action='help', help='print this help text and exit')
147 general.add_option('-v', '--version',
148 action='version', help='print program version and exit')
149 general.add_option('-U', '--update',
150 action='store_true', dest='update_self', help='update this program to latest version')
151 general.add_option('-i', '--ignore-errors',
152 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
153 general.add_option('-r', '--rate-limit',
154 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
155 general.add_option('-R', '--retries',
156 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
157 general.add_option('--dump-user-agent',
158 action='store_true', dest='dump_user_agent',
159 help='display the current browser identification', default=False)
160 general.add_option('--list-extractors',
161 action='store_true', dest='list_extractors',
162 help='List all supported extractors and the URLs they would handle', default=False)
163
164 selection.add_option('--playlist-start',
165 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
166 selection.add_option('--playlist-end',
167 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
168 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
169 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
170 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
171
172 authentication.add_option('-u', '--username',
173 dest='username', metavar='USERNAME', help='account username')
174 authentication.add_option('-p', '--password',
175 dest='password', metavar='PASSWORD', help='account password')
176 authentication.add_option('-n', '--netrc',
177 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
178
179
180 video_format.add_option('-f', '--format',
181 action='store', dest='format', metavar='FORMAT', help='video format code')
182 video_format.add_option('--all-formats',
183 action='store_const', dest='format', help='download all available video formats', const='all')
184 video_format.add_option('--prefer-free-formats',
185 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
186 video_format.add_option('--max-quality',
187 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
188 video_format.add_option('-F', '--list-formats',
189 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
190 video_format.add_option('--write-srt',
191 action='store_true', dest='writesubtitles',
192 help='write video closed captions to a .srt file (currently youtube only)', default=False)
193 video_format.add_option('--srt-lang',
194 action='store', dest='subtitleslang', metavar='LANG',
195 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
196
197
198 verbosity.add_option('-q', '--quiet',
199 action='store_true', dest='quiet', help='activates quiet mode', default=False)
200 verbosity.add_option('-s', '--simulate',
201 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
202 verbosity.add_option('--skip-download',
203 action='store_true', dest='skip_download', help='do not download the video', default=False)
204 verbosity.add_option('-g', '--get-url',
205 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
206 verbosity.add_option('-e', '--get-title',
207 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
208 verbosity.add_option('--get-thumbnail',
209 action='store_true', dest='getthumbnail',
210 help='simulate, quiet but print thumbnail URL', default=False)
211 verbosity.add_option('--get-description',
212 action='store_true', dest='getdescription',
213 help='simulate, quiet but print video description', default=False)
214 verbosity.add_option('--get-filename',
215 action='store_true', dest='getfilename',
216 help='simulate, quiet but print output filename', default=False)
217 verbosity.add_option('--get-format',
218 action='store_true', dest='getformat',
219 help='simulate, quiet but print output format', default=False)
220 verbosity.add_option('--no-progress',
221 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
222 verbosity.add_option('--console-title',
223 action='store_true', dest='consoletitle',
224 help='display progress in console titlebar', default=False)
225 verbosity.add_option('-v', '--verbose',
226 action='store_true', dest='verbose', help='print various debugging information', default=False)
227
228
229 filesystem.add_option('-t', '--title',
230 action='store_true', dest='usetitle', help='use title in file name', default=False)
231 filesystem.add_option('-l', '--literal',
232 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
233 filesystem.add_option('-A', '--auto-number',
234 action='store_true', dest='autonumber',
235 help='number downloaded files starting from 00000', default=False)
236 filesystem.add_option('-o', '--output',
237 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
238 filesystem.add_option('-a', '--batch-file',
239 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
240 filesystem.add_option('-w', '--no-overwrites',
241 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
242 filesystem.add_option('-c', '--continue',
243 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
244 filesystem.add_option('--no-continue',
245 action='store_false', dest='continue_dl',
246 help='do not resume partially downloaded files (restart from beginning)')
247 filesystem.add_option('--cookies',
248 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
249 filesystem.add_option('--no-part',
250 action='store_true', dest='nopart', help='do not use .part files', default=False)
251 filesystem.add_option('--no-mtime',
252 action='store_false', dest='updatetime',
253 help='do not use the Last-modified header to set the file modification time', default=True)
254 filesystem.add_option('--write-description',
255 action='store_true', dest='writedescription',
256 help='write video description to a .description file', default=False)
257 filesystem.add_option('--write-info-json',
258 action='store_true', dest='writeinfojson',
259 help='write video metadata to a .info.json file', default=False)
260
261
262 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
263 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
264 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
265 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
266 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
267 help='ffmpeg audio bitrate specification, 128k by default')
268 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
269 help='keeps the video file on disk after the post-processing; the video is erased by default')
270
271
272 parser.add_option_group(general)
273 parser.add_option_group(selection)
274 parser.add_option_group(filesystem)
275 parser.add_option_group(verbosity)
276 parser.add_option_group(video_format)
277 parser.add_option_group(authentication)
278 parser.add_option_group(postproc)
279
280 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
281 if xdg_config_home:
282 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
283 else:
284 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
285 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
286 opts, args = parser.parse_args(argv)
287
288 return parser, opts, args
289
290 def gen_extractors():
291 """ Return a list of an instance of every supported extractor.
292 The order does matter; the first extractor matched is the one handling the URL.
293 """
294 youtube_ie = YoutubeIE()
295 google_ie = GoogleIE()
296 yahoo_ie = YahooIE()
297 return [
298 YoutubePlaylistIE(youtube_ie),
299 YoutubeUserIE(youtube_ie),
300 YoutubeSearchIE(youtube_ie),
301 youtube_ie,
302 MetacafeIE(youtube_ie),
303 DailymotionIE(),
304 google_ie,
305 GoogleSearchIE(google_ie),
306 PhotobucketIE(),
307 yahoo_ie,
308 YahooSearchIE(yahoo_ie),
309 DepositFilesIE(),
310 FacebookIE(),
311 BlipTVIE(),
312 VimeoIE(),
313 MyVideoIE(),
314 ComedyCentralIE(),
315 EscapistIE(),
316 CollegeHumorIE(),
317 XVideosIE(),
318 SoundcloudIE(),
319 InfoQIE(),
320 MixcloudIE(),
321 StanfordOpenClassroomIE(),
322 MTVIE(),
323
324 GenericIE()
325 ]
326
327 def _real_main():
328 parser, opts, args = parseOpts()
329
330 # Open appropriate CookieJar
331 if opts.cookiefile is None:
332 jar = cookielib.CookieJar()
333 else:
334 try:
335 jar = cookielib.MozillaCookieJar(opts.cookiefile)
336 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
337 jar.load()
338 except (IOError, OSError), err:
339 sys.exit(u'ERROR: unable to open cookie file')
340
341 # Dump user agent
342 if opts.dump_user_agent:
343 print std_headers['User-Agent']
344 sys.exit(0)
345
346 # Batch file verification
347 batchurls = []
348 if opts.batchfile is not None:
349 try:
350 if opts.batchfile == '-':
351 batchfd = sys.stdin
352 else:
353 batchfd = open(opts.batchfile, 'r')
354 batchurls = batchfd.readlines()
355 batchurls = [x.strip() for x in batchurls]
356 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
357 except IOError:
358 sys.exit(u'ERROR: batch file could not be read')
359 all_urls = batchurls + args
360 all_urls = map(lambda url: url.strip(), all_urls)
361
362 # General configuration
363 cookie_processor = urllib2.HTTPCookieProcessor(jar)
364 proxy_handler = urllib2.ProxyHandler()
365 opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
366 urllib2.install_opener(opener)
367 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
368
369 if opts.verbose:
370 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
371
372 extractors = gen_extractors()
373
374 if opts.list_extractors:
375 for ie in extractors:
376 print(ie.IE_NAME)
377 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
378 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
379 for mu in matchedUrls:
380 print(u' ' + mu)
381 sys.exit(0)
382
383 # Conflicting, missing and erroneous options
384 if opts.usenetrc and (opts.username is not None or opts.password is not None):
385 parser.error(u'using .netrc conflicts with giving username/password')
386 if opts.password is not None and opts.username is None:
387 parser.error(u'account username missing')
388 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
389 parser.error(u'using output template conflicts with using title, literal title or auto number')
390 if opts.usetitle and opts.useliteral:
391 parser.error(u'using title conflicts with using literal title')
392 if opts.username is not None and opts.password is None:
393 opts.password = getpass.getpass(u'Type account password and press return:')
394 if opts.ratelimit is not None:
395 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
396 if numeric_limit is None:
397 parser.error(u'invalid rate limit specified')
398 opts.ratelimit = numeric_limit
399 if opts.retries is not None:
400 try:
401 opts.retries = long(opts.retries)
402 except (TypeError, ValueError), err:
403 parser.error(u'invalid retry count specified')
404 try:
405 opts.playliststart = int(opts.playliststart)
406 if opts.playliststart <= 0:
407 raise ValueError(u'Playlist start must be positive')
408 except (TypeError, ValueError), err:
409 parser.error(u'invalid playlist start number specified')
410 try:
411 opts.playlistend = int(opts.playlistend)
412 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
413 raise ValueError(u'Playlist end must be greater than playlist start')
414 except (TypeError, ValueError), err:
415 parser.error(u'invalid playlist end number specified')
416 if opts.extractaudio:
417 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
418 parser.error(u'invalid audio format specified')
419
420 # File downloader
421 fd = FileDownloader({
422 'usenetrc': opts.usenetrc,
423 'username': opts.username,
424 'password': opts.password,
425 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
426 'forceurl': opts.geturl,
427 'forcetitle': opts.gettitle,
428 'forcethumbnail': opts.getthumbnail,
429 'forcedescription': opts.getdescription,
430 'forcefilename': opts.getfilename,
431 'forceformat': opts.getformat,
432 'simulate': opts.simulate,
433 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
434 'format': opts.format,
435 'format_limit': opts.format_limit,
436 'listformats': opts.listformats,
437 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
438 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
439 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
440 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
441 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
442 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
443 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
444 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
445 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
446 or u'%(id)s.%(ext)s'),
447 'ignoreerrors': opts.ignoreerrors,
448 'ratelimit': opts.ratelimit,
449 'nooverwrites': opts.nooverwrites,
450 'retries': opts.retries,
451 'continuedl': opts.continue_dl,
452 'noprogress': opts.noprogress,
453 'playliststart': opts.playliststart,
454 'playlistend': opts.playlistend,
455 'logtostderr': opts.outtmpl == '-',
456 'consoletitle': opts.consoletitle,
457 'nopart': opts.nopart,
458 'updatetime': opts.updatetime,
459 'writedescription': opts.writedescription,
460 'writeinfojson': opts.writeinfojson,
461 'writesubtitles': opts.writesubtitles,
462 'subtitleslang': opts.subtitleslang,
463 'matchtitle': opts.matchtitle,
464 'rejecttitle': opts.rejecttitle,
465 'max_downloads': opts.max_downloads,
466 'prefer_free_formats': opts.prefer_free_formats,
467 'verbose': opts.verbose,
468 })
469 for extractor in extractors:
470 fd.add_info_extractor(extractor)
471
472 # PostProcessors
473 if opts.extractaudio:
474 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
475
476 # Update version
477 if opts.update_self:
478 updateSelf(fd, sys.argv[0])
479
480 # Maybe do nothing
481 if len(all_urls) < 1:
482 if not opts.update_self:
483 parser.error(u'you must provide at least one URL')
484 else:
485 sys.exit()
486
487 try:
488 retcode = fd.download(all_urls)
489 except MaxDownloadsReached:
490 fd.to_screen(u'--max-download limit reached, aborting.')
491 retcode = 101
492
493 # Dump cookie jar if requested
494 if opts.cookiefile is not None:
495 try:
496 jar.save()
497 except (IOError, OSError), err:
498 sys.exit(u'ERROR: unable to save cookie jar')
499
500 sys.exit(retcode)
501
502 def main():
503 try:
504 _real_main()
505 except DownloadError:
506 sys.exit(1)
507 except SameFileError:
508 sys.exit(u'ERROR: fixed output name but more than one file to download')
509 except KeyboardInterrupt:
510 sys.exit(u'\nERROR: Interrupted by user')