]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
add youku support
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 )
20
21 __license__ = 'Public Domain'
22 __version__ = '2012.02.27'
23
24 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
25 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
26 UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
27
28
29 import cookielib
30 import getpass
31 import optparse
32 import os
33 import re
34 import shlex
35 import socket
36 import subprocess
37 import sys
38 import urllib2
39 import warnings
40
41 from utils import *
42 from FileDownloader import *
43 from InfoExtractors import *
44 from PostProcessor import *
45
46 def updateSelf(downloader, filename):
47 ''' Update the program file with the latest version from the repository '''
48 # Note: downloader only used for options
49
50 if not os.access(filename, os.W_OK):
51 sys.exit('ERROR: no write permissions on %s' % filename)
52
53 downloader.to_screen(u'Updating to latest version...')
54
55 urlv = urllib2.urlopen(UPDATE_URL_VERSION)
56 newversion = urlv.read().strip()
57 if newversion == __version__:
58 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
59 return
60 urlv.close()
61
62 if hasattr(sys, "frozen"): #py2exe
63 exe = os.path.abspath(filename)
64 directory = os.path.dirname(exe)
65 if not os.access(directory, os.W_OK):
66 sys.exit('ERROR: no write permissions on %s' % directory)
67
68 try:
69 urlh = urllib2.urlopen(UPDATE_URL_EXE)
70 newcontent = urlh.read()
71 urlh.close()
72 with open(exe + '.new', 'wb') as outf:
73 outf.write(newcontent)
74 except (IOError, OSError), err:
75 sys.exit('ERROR: unable to download latest version')
76
77 try:
78 bat = os.path.join(directory, 'youtube-dl-updater.bat')
79 b = open(bat, 'w')
80
81 print >> b, """
82 echo Updating youtube-dl...
83 ping 127.0.0.1 -n 5 -w 1000 > NUL
84 move /Y "%s.new" "%s"
85 del "%s"
86 """ %(exe, exe, bat)
87
88 b.close()
89
90 os.startfile(bat)
91 except (IOError, OSError), err:
92 sys.exit('ERROR: unable to overwrite current version')
93
94 else:
95 try:
96 urlh = urllib2.urlopen(UPDATE_URL)
97 newcontent = urlh.read()
98 urlh.close()
99 except (IOError, OSError), err:
100 sys.exit('ERROR: unable to download latest version')
101
102 try:
103 with open(filename, 'wb') as outf:
104 outf.write(newcontent)
105 except (IOError, OSError), err:
106 sys.exit('ERROR: unable to overwrite current version')
107
108 downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
109
110 def parseOpts():
111 def _readOptions(filename_bytes):
112 try:
113 optionf = open(filename_bytes)
114 except IOError:
115 return [] # silently skip if file is not present
116 try:
117 res = []
118 for l in optionf:
119 res += shlex.split(l, comments=True)
120 finally:
121 optionf.close()
122 return res
123
124 def _format_option_string(option):
125 ''' ('-o', '--option') -> -o, --format METAVAR'''
126
127 opts = []
128
129 if option._short_opts: opts.append(option._short_opts[0])
130 if option._long_opts: opts.append(option._long_opts[0])
131 if len(opts) > 1: opts.insert(1, ', ')
132
133 if option.takes_value(): opts.append(' %s' % option.metavar)
134
135 return "".join(opts)
136
137 def _find_term_columns():
138 columns = os.environ.get('COLUMNS', None)
139 if columns:
140 return int(columns)
141
142 try:
143 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
144 out,err = sp.communicate()
145 return int(out.split()[1])
146 except:
147 pass
148 return None
149
150 max_width = 80
151 max_help_position = 80
152
153 # No need to wrap help messages if we're on a wide console
154 columns = _find_term_columns()
155 if columns: max_width = columns
156
157 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
158 fmt.format_option_strings = _format_option_string
159
160 kw = {
161 'version' : __version__,
162 'formatter' : fmt,
163 'usage' : '%prog [options] url [url...]',
164 'conflict_handler' : 'resolve',
165 }
166
167 parser = optparse.OptionParser(**kw)
168
169 # option groups
170 general = optparse.OptionGroup(parser, 'General Options')
171 selection = optparse.OptionGroup(parser, 'Video Selection')
172 authentication = optparse.OptionGroup(parser, 'Authentication Options')
173 video_format = optparse.OptionGroup(parser, 'Video Format Options')
174 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
175 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
176 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
177
178 general.add_option('-h', '--help',
179 action='help', help='print this help text and exit')
180 general.add_option('-v', '--version',
181 action='version', help='print program version and exit')
182 general.add_option('-U', '--update',
183 action='store_true', dest='update_self', help='update this program to latest version')
184 general.add_option('-i', '--ignore-errors',
185 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
186 general.add_option('-r', '--rate-limit',
187 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
188 general.add_option('-R', '--retries',
189 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
190 general.add_option('--dump-user-agent',
191 action='store_true', dest='dump_user_agent',
192 help='display the current browser identification', default=False)
193 general.add_option('--list-extractors',
194 action='store_true', dest='list_extractors',
195 help='List all supported extractors and the URLs they would handle', default=False)
196
197 selection.add_option('--playlist-start',
198 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
199 selection.add_option('--playlist-end',
200 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
201 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
202 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
203 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
204
205 authentication.add_option('-u', '--username',
206 dest='username', metavar='USERNAME', help='account username')
207 authentication.add_option('-p', '--password',
208 dest='password', metavar='PASSWORD', help='account password')
209 authentication.add_option('-n', '--netrc',
210 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
211
212
213 video_format.add_option('-f', '--format',
214 action='store', dest='format', metavar='FORMAT', help='video format code')
215 video_format.add_option('--all-formats',
216 action='store_const', dest='format', help='download all available video formats', const='all')
217 video_format.add_option('--prefer-free-formats',
218 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
219 video_format.add_option('--max-quality',
220 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
221 video_format.add_option('-F', '--list-formats',
222 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
223 video_format.add_option('--write-srt',
224 action='store_true', dest='writesubtitles',
225 help='write video closed captions to a .srt file (currently youtube only)', default=False)
226 video_format.add_option('--srt-lang',
227 action='store', dest='subtitleslang', metavar='LANG',
228 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
229
230
231 verbosity.add_option('-q', '--quiet',
232 action='store_true', dest='quiet', help='activates quiet mode', default=False)
233 verbosity.add_option('-s', '--simulate',
234 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
235 verbosity.add_option('--skip-download',
236 action='store_true', dest='skip_download', help='do not download the video', default=False)
237 verbosity.add_option('-g', '--get-url',
238 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
239 verbosity.add_option('-e', '--get-title',
240 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
241 verbosity.add_option('--get-thumbnail',
242 action='store_true', dest='getthumbnail',
243 help='simulate, quiet but print thumbnail URL', default=False)
244 verbosity.add_option('--get-description',
245 action='store_true', dest='getdescription',
246 help='simulate, quiet but print video description', default=False)
247 verbosity.add_option('--get-filename',
248 action='store_true', dest='getfilename',
249 help='simulate, quiet but print output filename', default=False)
250 verbosity.add_option('--get-format',
251 action='store_true', dest='getformat',
252 help='simulate, quiet but print output format', default=False)
253 verbosity.add_option('--no-progress',
254 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
255 verbosity.add_option('--console-title',
256 action='store_true', dest='consoletitle',
257 help='display progress in console titlebar', default=False)
258 verbosity.add_option('-v', '--verbose',
259 action='store_true', dest='verbose', help='print various debugging information', default=False)
260
261
262 filesystem.add_option('-t', '--title',
263 action='store_true', dest='usetitle', help='use title in file name', default=False)
264 filesystem.add_option('-l', '--literal',
265 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
266 filesystem.add_option('-A', '--auto-number',
267 action='store_true', dest='autonumber',
268 help='number downloaded files starting from 00000', default=False)
269 filesystem.add_option('-o', '--output',
270 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
271 filesystem.add_option('-a', '--batch-file',
272 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
273 filesystem.add_option('-w', '--no-overwrites',
274 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
275 filesystem.add_option('-c', '--continue',
276 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
277 filesystem.add_option('--no-continue',
278 action='store_false', dest='continue_dl',
279 help='do not resume partially downloaded files (restart from beginning)')
280 filesystem.add_option('--cookies',
281 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
282 filesystem.add_option('--no-part',
283 action='store_true', dest='nopart', help='do not use .part files', default=False)
284 filesystem.add_option('--no-mtime',
285 action='store_false', dest='updatetime',
286 help='do not use the Last-modified header to set the file modification time', default=True)
287 filesystem.add_option('--write-description',
288 action='store_true', dest='writedescription',
289 help='write video description to a .description file', default=False)
290 filesystem.add_option('--write-info-json',
291 action='store_true', dest='writeinfojson',
292 help='write video metadata to a .info.json file', default=False)
293
294
295 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
296 help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
297 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
298 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
299 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
300 help='ffmpeg/avconv audio bitrate specification, 128k by default')
301 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
302 help='keeps the video file on disk after the post-processing; the video is erased by default')
303
304
305 parser.add_option_group(general)
306 parser.add_option_group(selection)
307 parser.add_option_group(filesystem)
308 parser.add_option_group(verbosity)
309 parser.add_option_group(video_format)
310 parser.add_option_group(authentication)
311 parser.add_option_group(postproc)
312
313 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
314 if xdg_config_home:
315 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
316 else:
317 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
318 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
319 opts, args = parser.parse_args(argv)
320
321 return parser, opts, args
322
323 def gen_extractors():
324 """ Return a list of an instance of every supported extractor.
325 The order does matter; the first extractor matched is the one handling the URL.
326 """
327 return [
328 YoutubePlaylistIE(),
329 YoutubeUserIE(),
330 YoutubeSearchIE(),
331 YoutubeIE(),
332 MetacafeIE(),
333 DailymotionIE(),
334 GoogleIE(),
335 GoogleSearchIE(),
336 PhotobucketIE(),
337 YahooIE(),
338 YahooSearchIE(),
339 DepositFilesIE(),
340 FacebookIE(),
341 BlipTVUserIE(),
342 BlipTVIE(),
343 VimeoIE(),
344 MyVideoIE(),
345 ComedyCentralIE(),
346 EscapistIE(),
347 CollegeHumorIE(),
348 XVideosIE(),
349 SoundcloudIE(),
350 InfoQIE(),
351 MixcloudIE(),
352 StanfordOpenClassroomIE(),
353 MTVIE(),
354 YoukuIE(),
355
356 GenericIE()
357 ]
358
359 def _real_main():
360 parser, opts, args = parseOpts()
361
362 # Open appropriate CookieJar
363 if opts.cookiefile is None:
364 jar = cookielib.CookieJar()
365 else:
366 try:
367 jar = cookielib.MozillaCookieJar(opts.cookiefile)
368 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
369 jar.load()
370 except (IOError, OSError), err:
371 sys.exit(u'ERROR: unable to open cookie file')
372
373 # Dump user agent
374 if opts.dump_user_agent:
375 print std_headers['User-Agent']
376 sys.exit(0)
377
378 # Batch file verification
379 batchurls = []
380 if opts.batchfile is not None:
381 try:
382 if opts.batchfile == '-':
383 batchfd = sys.stdin
384 else:
385 batchfd = open(opts.batchfile, 'r')
386 batchurls = batchfd.readlines()
387 batchurls = [x.strip() for x in batchurls]
388 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
389 except IOError:
390 sys.exit(u'ERROR: batch file could not be read')
391 all_urls = batchurls + args
392 all_urls = map(lambda url: url.strip(), all_urls)
393
394 # General configuration
395 cookie_processor = urllib2.HTTPCookieProcessor(jar)
396 proxy_handler = urllib2.ProxyHandler()
397 opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
398 urllib2.install_opener(opener)
399 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
400
401 extractors = gen_extractors()
402
403 if opts.list_extractors:
404 for ie in extractors:
405 print(ie.IE_NAME)
406 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
407 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
408 for mu in matchedUrls:
409 print(u' ' + mu)
410 sys.exit(0)
411
412 # Conflicting, missing and erroneous options
413 if opts.usenetrc and (opts.username is not None or opts.password is not None):
414 parser.error(u'using .netrc conflicts with giving username/password')
415 if opts.password is not None and opts.username is None:
416 parser.error(u'account username missing')
417 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
418 parser.error(u'using output template conflicts with using title, literal title or auto number')
419 if opts.usetitle and opts.useliteral:
420 parser.error(u'using title conflicts with using literal title')
421 if opts.username is not None and opts.password is None:
422 opts.password = getpass.getpass(u'Type account password and press return:')
423 if opts.ratelimit is not None:
424 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
425 if numeric_limit is None:
426 parser.error(u'invalid rate limit specified')
427 opts.ratelimit = numeric_limit
428 if opts.retries is not None:
429 try:
430 opts.retries = long(opts.retries)
431 except (TypeError, ValueError), err:
432 parser.error(u'invalid retry count specified')
433 try:
434 opts.playliststart = int(opts.playliststart)
435 if opts.playliststart <= 0:
436 raise ValueError(u'Playlist start must be positive')
437 except (TypeError, ValueError), err:
438 parser.error(u'invalid playlist start number specified')
439 try:
440 opts.playlistend = int(opts.playlistend)
441 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
442 raise ValueError(u'Playlist end must be greater than playlist start')
443 except (TypeError, ValueError), err:
444 parser.error(u'invalid playlist end number specified')
445 if opts.extractaudio:
446 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
447 parser.error(u'invalid audio format specified')
448
449 # File downloader
450 fd = FileDownloader({
451 'usenetrc': opts.usenetrc,
452 'username': opts.username,
453 'password': opts.password,
454 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
455 'forceurl': opts.geturl,
456 'forcetitle': opts.gettitle,
457 'forcethumbnail': opts.getthumbnail,
458 'forcedescription': opts.getdescription,
459 'forcefilename': opts.getfilename,
460 'forceformat': opts.getformat,
461 'simulate': opts.simulate,
462 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
463 'format': opts.format,
464 'format_limit': opts.format_limit,
465 'listformats': opts.listformats,
466 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
467 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
468 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
469 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
470 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
471 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
472 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
473 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
474 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
475 or u'%(id)s.%(ext)s'),
476 'ignoreerrors': opts.ignoreerrors,
477 'ratelimit': opts.ratelimit,
478 'nooverwrites': opts.nooverwrites,
479 'retries': opts.retries,
480 'continuedl': opts.continue_dl,
481 'noprogress': opts.noprogress,
482 'playliststart': opts.playliststart,
483 'playlistend': opts.playlistend,
484 'logtostderr': opts.outtmpl == '-',
485 'consoletitle': opts.consoletitle,
486 'nopart': opts.nopart,
487 'updatetime': opts.updatetime,
488 'writedescription': opts.writedescription,
489 'writeinfojson': opts.writeinfojson,
490 'writesubtitles': opts.writesubtitles,
491 'subtitleslang': opts.subtitleslang,
492 'matchtitle': opts.matchtitle,
493 'rejecttitle': opts.rejecttitle,
494 'max_downloads': opts.max_downloads,
495 'prefer_free_formats': opts.prefer_free_formats,
496 'verbose': opts.verbose,
497 })
498
499 if opts.verbose:
500 fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
501
502 for extractor in extractors:
503 fd.add_info_extractor(extractor)
504
505 # PostProcessors
506 if opts.extractaudio:
507 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
508
509 # Update version
510 if opts.update_self:
511 updateSelf(fd, sys.argv[0])
512
513 # Maybe do nothing
514 if len(all_urls) < 1:
515 if not opts.update_self:
516 parser.error(u'you must provide at least one URL')
517 else:
518 sys.exit()
519
520 try:
521 retcode = fd.download(all_urls)
522 except MaxDownloadsReached:
523 fd.to_screen(u'--max-download limit reached, aborting.')
524 retcode = 101
525
526 # Dump cookie jar if requested
527 if opts.cookiefile is not None:
528 try:
529 jar.save()
530 except (IOError, OSError), err:
531 sys.exit(u'ERROR: unable to save cookie jar')
532
533 sys.exit(retcode)
534
535 def main():
536 try:
537 _real_main()
538 except DownloadError:
539 sys.exit(1)
540 except SameFileError:
541 sys.exit(u'ERROR: fixed output name but more than one file to download')
542 except KeyboardInterrupt:
543 sys.exit(u'\nERROR: Interrupted by user')