]> jfr.im git - yt-dlp.git/blob - youtube_dl/__init__.py
merged unescapeHTML branch; removed lxml dependency
[yt-dlp.git] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 )
20
21 __license__ = 'Public Domain'
22 __version__ = '2012.02.27'
23
24 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
25 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
26 UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
27
28
29 import cookielib
30 import getpass
31 import optparse
32 import os
33 import re
34 import shlex
35 import socket
36 import subprocess
37 import sys
38 import urllib2
39 import warnings
40
41 from utils import *
42 from FileDownloader import *
43 from InfoExtractors import *
44 from PostProcessor import *
45
46 def updateSelf(downloader, filename):
47 ''' Update the program file with the latest version from the repository '''
48 # Note: downloader only used for options
49
50 if not os.access(filename, os.W_OK):
51 sys.exit('ERROR: no write permissions on %s' % filename)
52
53 downloader.to_screen(u'Updating to latest version...')
54
55 urlv = urllib2.urlopen(UPDATE_URL_VERSION)
56 newversion = urlv.read().strip()
57 if newversion == __version__:
58 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
59 return
60 urlv.close()
61
62 if hasattr(sys, "frozen"): #py2exe
63 directory = os.path.dirname(filename)
64 exe = os.path.abspath(filename)
65 if not os.access(directory, os.W_OK):
66 sys.exit('ERROR: no write permissions on %s' % directory)
67
68 try:
69 urllib.urlretrieve(UPDATE_URL_EXE, exe + '.new')
70 except (IOError, OSError), err:
71 sys.exit('ERROR: unable to download latest version')
72
73 try:
74 bat = os.path.join(directory, 'youtube-dl-updater.bat')
75 b = open(bat, 'w')
76
77 print >> b, """
78 ping 127.0.0.1 -n 5 -w 1000 > NUL
79 move /Y "%s.new" "%s"
80 del "%s"
81 """ %(exe, exe, bat)
82
83 b.close()
84
85 os.startfile(bat)
86 except (IOError, OSError), err:
87 sys.exit('ERROR: unable to overwrite current version')
88
89 else:
90 try:
91 urlh = urllib2.urlopen(UPDATE_URL)
92 newcontent = urlh.read()
93 urlh.close()
94 except (IOError, OSError), err:
95 sys.exit('ERROR: unable to download latest version')
96
97 try:
98 outf = open(filename, 'wb')
99 try:
100 outf.write(newcontent)
101 finally:
102 outf.close()
103 except (IOError, OSError), err:
104 sys.exit('ERROR: unable to overwrite current version')
105
106 downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
107
108 def parseOpts():
109 def _readOptions(filename_bytes):
110 try:
111 optionf = open(filename_bytes)
112 except IOError:
113 return [] # silently skip if file is not present
114 try:
115 res = []
116 for l in optionf:
117 res += shlex.split(l, comments=True)
118 finally:
119 optionf.close()
120 return res
121
122 def _format_option_string(option):
123 ''' ('-o', '--option') -> -o, --format METAVAR'''
124
125 opts = []
126
127 if option._short_opts: opts.append(option._short_opts[0])
128 if option._long_opts: opts.append(option._long_opts[0])
129 if len(opts) > 1: opts.insert(1, ', ')
130
131 if option.takes_value(): opts.append(' %s' % option.metavar)
132
133 return "".join(opts)
134
135 def _find_term_columns():
136 columns = os.environ.get('COLUMNS', None)
137 if columns:
138 return int(columns)
139
140 try:
141 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
142 out,err = sp.communicate()
143 return int(out.split()[1])
144 except:
145 pass
146 return None
147
148 max_width = 80
149 max_help_position = 80
150
151 # No need to wrap help messages if we're on a wide console
152 columns = _find_term_columns()
153 if columns: max_width = columns
154
155 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
156 fmt.format_option_strings = _format_option_string
157
158 kw = {
159 'version' : __version__,
160 'formatter' : fmt,
161 'usage' : '%prog [options] url [url...]',
162 'conflict_handler' : 'resolve',
163 }
164
165 parser = optparse.OptionParser(**kw)
166
167 # option groups
168 general = optparse.OptionGroup(parser, 'General Options')
169 selection = optparse.OptionGroup(parser, 'Video Selection')
170 authentication = optparse.OptionGroup(parser, 'Authentication Options')
171 video_format = optparse.OptionGroup(parser, 'Video Format Options')
172 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
173 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
174 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
175
176 general.add_option('-h', '--help',
177 action='help', help='print this help text and exit')
178 general.add_option('-v', '--version',
179 action='version', help='print program version and exit')
180 general.add_option('-U', '--update',
181 action='store_true', dest='update_self', help='update this program to latest version')
182 general.add_option('-i', '--ignore-errors',
183 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
184 general.add_option('-r', '--rate-limit',
185 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
186 general.add_option('-R', '--retries',
187 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
188 general.add_option('--dump-user-agent',
189 action='store_true', dest='dump_user_agent',
190 help='display the current browser identification', default=False)
191 general.add_option('--list-extractors',
192 action='store_true', dest='list_extractors',
193 help='List all supported extractors and the URLs they would handle', default=False)
194
195 selection.add_option('--playlist-start',
196 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
197 selection.add_option('--playlist-end',
198 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
199 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
200 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
201 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
202
203 authentication.add_option('-u', '--username',
204 dest='username', metavar='USERNAME', help='account username')
205 authentication.add_option('-p', '--password',
206 dest='password', metavar='PASSWORD', help='account password')
207 authentication.add_option('-n', '--netrc',
208 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
209
210
211 video_format.add_option('-f', '--format',
212 action='store', dest='format', metavar='FORMAT', help='video format code')
213 video_format.add_option('--all-formats',
214 action='store_const', dest='format', help='download all available video formats', const='all')
215 video_format.add_option('--prefer-free-formats',
216 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
217 video_format.add_option('--max-quality',
218 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
219 video_format.add_option('-F', '--list-formats',
220 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
221 video_format.add_option('--write-srt',
222 action='store_true', dest='writesubtitles',
223 help='write video closed captions to a .srt file (currently youtube only)', default=False)
224 video_format.add_option('--srt-lang',
225 action='store', dest='subtitleslang', metavar='LANG',
226 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
227
228
229 verbosity.add_option('-q', '--quiet',
230 action='store_true', dest='quiet', help='activates quiet mode', default=False)
231 verbosity.add_option('-s', '--simulate',
232 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
233 verbosity.add_option('--skip-download',
234 action='store_true', dest='skip_download', help='do not download the video', default=False)
235 verbosity.add_option('-g', '--get-url',
236 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
237 verbosity.add_option('-e', '--get-title',
238 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
239 verbosity.add_option('--get-thumbnail',
240 action='store_true', dest='getthumbnail',
241 help='simulate, quiet but print thumbnail URL', default=False)
242 verbosity.add_option('--get-description',
243 action='store_true', dest='getdescription',
244 help='simulate, quiet but print video description', default=False)
245 verbosity.add_option('--get-filename',
246 action='store_true', dest='getfilename',
247 help='simulate, quiet but print output filename', default=False)
248 verbosity.add_option('--get-format',
249 action='store_true', dest='getformat',
250 help='simulate, quiet but print output format', default=False)
251 verbosity.add_option('--no-progress',
252 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
253 verbosity.add_option('--console-title',
254 action='store_true', dest='consoletitle',
255 help='display progress in console titlebar', default=False)
256 verbosity.add_option('-v', '--verbose',
257 action='store_true', dest='verbose', help='print various debugging information', default=False)
258
259
260 filesystem.add_option('-t', '--title',
261 action='store_true', dest='usetitle', help='use title in file name', default=False)
262 filesystem.add_option('-l', '--literal',
263 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
264 filesystem.add_option('-A', '--auto-number',
265 action='store_true', dest='autonumber',
266 help='number downloaded files starting from 00000', default=False)
267 filesystem.add_option('-o', '--output',
268 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
269 filesystem.add_option('-a', '--batch-file',
270 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
271 filesystem.add_option('-w', '--no-overwrites',
272 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
273 filesystem.add_option('-c', '--continue',
274 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
275 filesystem.add_option('--no-continue',
276 action='store_false', dest='continue_dl',
277 help='do not resume partially downloaded files (restart from beginning)')
278 filesystem.add_option('--cookies',
279 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
280 filesystem.add_option('--no-part',
281 action='store_true', dest='nopart', help='do not use .part files', default=False)
282 filesystem.add_option('--no-mtime',
283 action='store_false', dest='updatetime',
284 help='do not use the Last-modified header to set the file modification time', default=True)
285 filesystem.add_option('--write-description',
286 action='store_true', dest='writedescription',
287 help='write video description to a .description file', default=False)
288 filesystem.add_option('--write-info-json',
289 action='store_true', dest='writeinfojson',
290 help='write video metadata to a .info.json file', default=False)
291
292
293 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
294 help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
295 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
296 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
297 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
298 help='ffmpeg audio bitrate specification, 128k by default')
299 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
300 help='keeps the video file on disk after the post-processing; the video is erased by default')
301
302
303 parser.add_option_group(general)
304 parser.add_option_group(selection)
305 parser.add_option_group(filesystem)
306 parser.add_option_group(verbosity)
307 parser.add_option_group(video_format)
308 parser.add_option_group(authentication)
309 parser.add_option_group(postproc)
310
311 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
312 if xdg_config_home:
313 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
314 else:
315 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
316 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
317 opts, args = parser.parse_args(argv)
318
319 return parser, opts, args
320
321 def gen_extractors():
322 """ Return a list of an instance of every supported extractor.
323 The order does matter; the first extractor matched is the one handling the URL.
324 """
325 return [
326 YoutubePlaylistIE(),
327 YoutubeUserIE(),
328 YoutubeSearchIE(),
329 YoutubeIE(),
330 MetacafeIE(),
331 DailymotionIE(),
332 GoogleIE(),
333 GoogleSearchIE(),
334 PhotobucketIE(),
335 YahooIE(),
336 YahooSearchIE(),
337 DepositFilesIE(),
338 FacebookIE(),
339 BlipTVIE(),
340 VimeoIE(),
341 MyVideoIE(),
342 ComedyCentralIE(),
343 EscapistIE(),
344 CollegeHumorIE(),
345 XVideosIE(),
346 SoundcloudIE(),
347 InfoQIE(),
348 MixcloudIE(),
349 StanfordOpenClassroomIE(),
350 MTVIE(),
351
352 GenericIE()
353 ]
354
355 def _real_main():
356 parser, opts, args = parseOpts()
357
358 # Open appropriate CookieJar
359 if opts.cookiefile is None:
360 jar = cookielib.CookieJar()
361 else:
362 try:
363 jar = cookielib.MozillaCookieJar(opts.cookiefile)
364 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
365 jar.load()
366 except (IOError, OSError), err:
367 sys.exit(u'ERROR: unable to open cookie file')
368
369 # Dump user agent
370 if opts.dump_user_agent:
371 print std_headers['User-Agent']
372 sys.exit(0)
373
374 # Batch file verification
375 batchurls = []
376 if opts.batchfile is not None:
377 try:
378 if opts.batchfile == '-':
379 batchfd = sys.stdin
380 else:
381 batchfd = open(opts.batchfile, 'r')
382 batchurls = batchfd.readlines()
383 batchurls = [x.strip() for x in batchurls]
384 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
385 except IOError:
386 sys.exit(u'ERROR: batch file could not be read')
387 all_urls = batchurls + args
388 all_urls = map(lambda url: url.strip(), all_urls)
389
390 # General configuration
391 cookie_processor = urllib2.HTTPCookieProcessor(jar)
392 proxy_handler = urllib2.ProxyHandler()
393 opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
394 urllib2.install_opener(opener)
395 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
396
397 if opts.verbose:
398 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
399
400 extractors = gen_extractors()
401
402 if opts.list_extractors:
403 for ie in extractors:
404 print(ie.IE_NAME)
405 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
406 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
407 for mu in matchedUrls:
408 print(u' ' + mu)
409 sys.exit(0)
410
411 # Conflicting, missing and erroneous options
412 if opts.usenetrc and (opts.username is not None or opts.password is not None):
413 parser.error(u'using .netrc conflicts with giving username/password')
414 if opts.password is not None and opts.username is None:
415 parser.error(u'account username missing')
416 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
417 parser.error(u'using output template conflicts with using title, literal title or auto number')
418 if opts.usetitle and opts.useliteral:
419 parser.error(u'using title conflicts with using literal title')
420 if opts.username is not None and opts.password is None:
421 opts.password = getpass.getpass(u'Type account password and press return:')
422 if opts.ratelimit is not None:
423 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
424 if numeric_limit is None:
425 parser.error(u'invalid rate limit specified')
426 opts.ratelimit = numeric_limit
427 if opts.retries is not None:
428 try:
429 opts.retries = long(opts.retries)
430 except (TypeError, ValueError), err:
431 parser.error(u'invalid retry count specified')
432 try:
433 opts.playliststart = int(opts.playliststart)
434 if opts.playliststart <= 0:
435 raise ValueError(u'Playlist start must be positive')
436 except (TypeError, ValueError), err:
437 parser.error(u'invalid playlist start number specified')
438 try:
439 opts.playlistend = int(opts.playlistend)
440 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
441 raise ValueError(u'Playlist end must be greater than playlist start')
442 except (TypeError, ValueError), err:
443 parser.error(u'invalid playlist end number specified')
444 if opts.extractaudio:
445 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
446 parser.error(u'invalid audio format specified')
447
448 # File downloader
449 fd = FileDownloader({
450 'usenetrc': opts.usenetrc,
451 'username': opts.username,
452 'password': opts.password,
453 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
454 'forceurl': opts.geturl,
455 'forcetitle': opts.gettitle,
456 'forcethumbnail': opts.getthumbnail,
457 'forcedescription': opts.getdescription,
458 'forcefilename': opts.getfilename,
459 'forceformat': opts.getformat,
460 'simulate': opts.simulate,
461 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
462 'format': opts.format,
463 'format_limit': opts.format_limit,
464 'listformats': opts.listformats,
465 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
466 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
467 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
468 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
469 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
470 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
471 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
472 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
473 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
474 or u'%(id)s.%(ext)s'),
475 'ignoreerrors': opts.ignoreerrors,
476 'ratelimit': opts.ratelimit,
477 'nooverwrites': opts.nooverwrites,
478 'retries': opts.retries,
479 'continuedl': opts.continue_dl,
480 'noprogress': opts.noprogress,
481 'playliststart': opts.playliststart,
482 'playlistend': opts.playlistend,
483 'logtostderr': opts.outtmpl == '-',
484 'consoletitle': opts.consoletitle,
485 'nopart': opts.nopart,
486 'updatetime': opts.updatetime,
487 'writedescription': opts.writedescription,
488 'writeinfojson': opts.writeinfojson,
489 'writesubtitles': opts.writesubtitles,
490 'subtitleslang': opts.subtitleslang,
491 'matchtitle': opts.matchtitle,
492 'rejecttitle': opts.rejecttitle,
493 'max_downloads': opts.max_downloads,
494 'prefer_free_formats': opts.prefer_free_formats,
495 'verbose': opts.verbose,
496 })
497 for extractor in extractors:
498 fd.add_info_extractor(extractor)
499
500 # PostProcessors
501 if opts.extractaudio:
502 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
503
504 # Update version
505 if opts.update_self:
506 updateSelf(fd, sys.argv[0])
507
508 # Maybe do nothing
509 if len(all_urls) < 1:
510 if not opts.update_self:
511 parser.error(u'you must provide at least one URL')
512 else:
513 sys.exit()
514
515 try:
516 retcode = fd.download(all_urls)
517 except MaxDownloadsReached:
518 fd.to_screen(u'--max-download limit reached, aborting.')
519 retcode = 101
520
521 # Dump cookie jar if requested
522 if opts.cookiefile is not None:
523 try:
524 jar.save()
525 except (IOError, OSError), err:
526 sys.exit(u'ERROR: unable to save cookie jar')
527
528 sys.exit(retcode)
529
530 def main():
531 try:
532 _real_main()
533 except DownloadError:
534 sys.exit(1)
535 except SameFileError:
536 sys.exit(u'ERROR: fixed output name but more than one file to download')
537 except KeyboardInterrupt:
538 sys.exit(u'\nERROR: Interrupted by user')