]> jfr.im git - yt-dlp.git/blame - youtube_dl/__init__.py
Merge branch 'Dineshs91-belgiannational-ie'
[yt-dlp.git] / youtube_dl / __init__.py
CommitLineData
235b3ba4
PH
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
3906e6ce 4__authors__ = (
59ae15a5
PH
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 'Christian Albrecht',
88f6c78b 20 'Dave Vasilevsky',
2069acc6 21 'Jaime Marquínez Ferrándiz',
fffec3b9 22 'Jeff Crouse',
6aabe820 23 'Osama Khalid',
e8600d69 24 'Michael Walter',
95464f14 25 'M. Yasoob Ullah Khalid',
0ae456f0 26 'Julien Fraichard',
be74864a 27 'Johny Mo Swag',
df725153 28 'Axel Noack',
ba7a1de0 29 'Albert Kim',
4a55479f 30 'Pierre Rudloff',
085bea45 31 'Huarong Huo',
ac4f319b 32 'Ismael Mejía',
2dad310e 33 'Steffan \'Ruirize\' James',
a623df4c 34 'Andras Elso',
b5bdc269 35 'Jelle van der Waa',
d3793638 36 'Marcin Cieślak',
0a120f74 37 'Anton Larionov',
38b2db6a 38 'Takuya Tsuchida',
87968574 39 'Sergey M.',
b83be81d 40 'Michael Orlitzky',
e63fc1be 41 'Chris Gahan',
a7732b67 42 'Saimadhav Heblikar',
2a893862 43 'Mike Col',
6d784e87 44 'Oleg Prutz',
0cea52cc 45 'pulpe',
845d14d3 46 'Andreas Schmitz',
cbffec0c 47 'Michael Kaiser',
96d16370 48 'Niklas Laxström',
f38da667 49 'David Triendl',
a339d7ba 50 'Anthony Weems',
dc3727b6 51 'David Wagner',
2fcec131 52 'Juan C. Olivares',
855e2750 53 'Mattias Harrysson',
2cc0082d 54 'phaer',
610e47c8 55 'Sainyam Kapoor',
bbe99d26 56 'Nicolas Évrard',
ccdd34ed 57 'Jason Normore',
25dfe0eb 58 'Hoje Lee',
c2ef2923 59 'Adam Thalhammer',
d30d2815 60 'Georg Jähnig',
9cc977f1 61 'Ralf Haring',
ba4133c9 62 'Koki Takahashi',
3d55f280 63 'Ariset Llerena',
cdc22cb8 64 'Adam Malcontenti-Wilson',
1df0ae21 65 'Tobias Bell',
0d90e0f0 66 'Naglis Jonaitis',
3b09757b 67 'Charles Chen',
5e95cb27 68 'Hassaan Ali',
e48a2c64 69 'Dobrosław Żybort',
4665664c 70 'David Fabijan',
56ca04f6 71 'Sebastian Haas',
3f338cd6 72 'Alexander Kirk',
deda8ac3 73 'Erik Johnson',
00558d94 74 'Keith Beckman',
36b0079f 75 'Ole Ernst',
a7cacbca 76 'Aaron McDaniel (mcd1992)',
140d8d77 77 'Magnus Kolstad',
ae369738 78 'Hari Padmanaban',
997987d5 79 'Carlos Ramos',
5a13fe9e 80 '5moufl',
5e43e380 81 'lenaten',
2acfe95f 82 'Dennis Scheiba',
964ae0a1 83 'Damon Timm',
ec9c9784 84 'winwon',
fdfefa1b 85 'Xavier Beynon'
ba7a1de0 86)
235b3ba4
PH
87
88__license__ = 'Public Domain'
235b3ba4 89
0d94f247 90import codecs
8f563f32 91import io
235b3ba4 92import os
0f818663 93import random
235b3ba4 94import sys
235b3ba4 95
c496ca96 96
2daabe49
PH
97from .options import (
98 parseOpts,
99)
a4fd0415 100from .utils import (
e68301af 101 compat_getpass,
a4fd0415 102 compat_print,
a4fd0415 103 DateRange,
acd69589 104 DEFAULT_OUTTMPL,
a4fd0415 105 decodeOption,
a4fd0415 106 DownloadError,
a4fd0415 107 MaxDownloadsReached,
a4fd0415 108 preferredencoding,
62e609ab 109 read_batch_urls,
a4fd0415 110 SameFileError,
e3946f98 111 setproctitle,
a4fd0415
PH
112 std_headers,
113 write_string,
a4fd0415 114)
d5ed35b6 115from .update import update_self
92a86f4c 116from .downloader import (
a4fd0415
PH
117 FileDownloader,
118)
0824c28c 119from .extractor import gen_extractors
8222d8de 120from .YoutubeDL import YoutubeDL
56327689 121from .postprocessor import (
0c14e2fb 122 AtomicParsleyPP,
149254d0 123 FFmpegAudioFixPP,
a4fd0415
PH
124 FFmpegMetadataPP,
125 FFmpegVideoConvertor,
126 FFmpegExtractAudioPP,
127 FFmpegEmbedSubtitlePP,
e63fc1be 128 XAttrMetadataPP,
a2360a4c 129 ExecAfterDownloadPP,
a4fd0415
PH
130)
131
235b3ba4 132
b8ad4f02 133def _real_main(argv=None):
0d94f247
PH
134 # Compatibility fixes for Windows
135 if sys.platform == 'win32':
136 # https://github.com/rg3/youtube-dl/issues/820
137 codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
138
e3946f98
PH
139 setproctitle(u'youtube-dl')
140
b8ad4f02 141 parser, opts, args = parseOpts(argv)
59ae15a5 142
59ae15a5
PH
143 # Set user agent
144 if opts.user_agent is not None:
145 std_headers['User-Agent'] = opts.user_agent
1865ed31 146
28535652
BH
147 # Set referer
148 if opts.referer is not None:
149 std_headers['Referer'] = opts.referer
59ae15a5 150
410afb20
AA
151 # Custom HTTP headers
152 if opts.headers is not None:
153 for h in opts.headers:
154 if h.find(':', 1) < 0:
155 parser.error(u'wrong header formatting, it should be key:value, not "%s"'%h)
156 key, value = h.split(':', 2)
157 if opts.verbose:
158 write_string(u'[debug] Adding header from command line option %s:%s\n'%(key, value))
159 std_headers[key] = value
160
59ae15a5
PH
161 # Dump user agent
162 if opts.dump_user_agent:
93eb15c5 163 compat_print(std_headers['User-Agent'])
59ae15a5
PH
164 sys.exit(0)
165
166 # Batch file verification
62e609ab 167 batch_urls = []
59ae15a5
PH
168 if opts.batchfile is not None:
169 try:
170 if opts.batchfile == '-':
171 batchfd = sys.stdin
172 else:
62e609ab
PH
173 batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
174 batch_urls = read_batch_urls(batchfd)
05afc96b 175 if opts.verbose:
62e609ab 176 write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
59ae15a5
PH
177 except IOError:
178 sys.exit(u'ERROR: batch file could not be read')
62e609ab 179 all_urls = batch_urls + args
59ae15a5 180 all_urls = [url.strip() for url in all_urls]
c774b3c6 181 _enc = preferredencoding()
41292a38 182 all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
59ae15a5 183
59ae15a5
PH
184 extractors = gen_extractors()
185
186 if opts.list_extractors:
7dba9cd0 187 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
93eb15c5 188 compat_print(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else ''))
1a2c3c0f 189 matchedUrls = [url for url in all_urls if ie.suitable(url)]
59ae15a5 190 for mu in matchedUrls:
93eb15c5 191 compat_print(u' ' + mu)
59ae15a5 192 sys.exit(0)
0f818663
PH
193 if opts.list_extractor_descriptions:
194 for ie in sorted(extractors, key=lambda ie: ie.IE_NAME.lower()):
195 if not ie._WORKING:
196 continue
197 desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
15870e90
PH
198 if desc is False:
199 continue
0f818663 200 if hasattr(ie, 'SEARCH_KEY'):
53eb2176 201 _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny')
0f818663
PH
202 _COUNTS = (u'', u'5', u'10', u'all')
203 desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
204 compat_print(desc)
205 sys.exit(0)
206
59ae15a5
PH
207
208 # Conflicting, missing and erroneous options
209 if opts.usenetrc and (opts.username is not None or opts.password is not None):
210 parser.error(u'using .netrc conflicts with giving username/password')
211 if opts.password is not None and opts.username is None:
67d28bff 212 parser.error(u'account username missing\n')
59ae15a5
PH
213 if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
214 parser.error(u'using output template conflicts with using title, video ID or auto number')
215 if opts.usetitle and opts.useid:
216 parser.error(u'using title conflicts with using video ID')
217 if opts.username is not None and opts.password is None:
e68301af 218 opts.password = compat_getpass(u'Type account password and press [Return]: ')
59ae15a5
PH
219 if opts.ratelimit is not None:
220 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
221 if numeric_limit is None:
222 parser.error(u'invalid rate limit specified')
223 opts.ratelimit = numeric_limit
9e982f9e
JC
224 if opts.min_filesize is not None:
225 numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
226 if numeric_limit is None:
227 parser.error(u'invalid min_filesize specified')
228 opts.min_filesize = numeric_limit
229 if opts.max_filesize is not None:
230 numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
231 if numeric_limit is None:
232 parser.error(u'invalid max_filesize specified')
233 opts.max_filesize = numeric_limit
59ae15a5
PH
234 if opts.retries is not None:
235 try:
236 opts.retries = int(opts.retries)
dca08720 237 except (TypeError, ValueError):
59ae15a5
PH
238 parser.error(u'invalid retry count specified')
239 if opts.buffersize is not None:
240 numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
241 if numeric_buffersize is None:
242 parser.error(u'invalid buffer size specified')
243 opts.buffersize = numeric_buffersize
a19fd00c
PH
244 if opts.playliststart <= 0:
245 raise ValueError(u'Playlist start must be positive')
246 if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
247 raise ValueError(u'Playlist end must be greater than playlist start')
59ae15a5 248 if opts.extractaudio:
510e6f6d 249 if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
59ae15a5
PH
250 parser.error(u'invalid audio format specified')
251 if opts.audioquality:
252 opts.audioquality = opts.audioquality.strip('k').strip('K')
253 if not opts.audioquality.isdigit():
254 parser.error(u'invalid audio quality specified')
7851b379 255 if opts.recodevideo is not None:
b7d73595 256 if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv']:
7851b379 257 parser.error(u'invalid video recode format specified')
bd558525
JMF
258 if opts.date is not None:
259 date = DateRange.day(opts.date)
260 else:
261 date = DateRange(opts.dateafter, opts.datebefore)
59ae15a5 262
de3ef3ed
PH
263 # Do not download videos when there are audio-only formats
264 if opts.extractaudio and not opts.keepvideo and opts.format is None:
265 opts.format = 'bestaudio/best'
266
0b7f3118
JMF
267 # --all-sub automatically sets --write-sub if --write-auto-sub is not given
268 # this was the old behaviour if only --all-sub was given.
269 if opts.allsubtitles and (opts.writeautomaticsub == False):
270 opts.writesubtitles = True
271
5cb9c312
PH
272 if sys.version_info < (3,):
273 # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
0be41ec2
PH
274 if opts.outtmpl is not None:
275 opts.outtmpl = opts.outtmpl.decode(preferredencoding())
5cb9c312
PH
276 outtmpl =((opts.outtmpl is not None and opts.outtmpl)
277 or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
278 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
279 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
280 or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
281 or (opts.useid and u'%(id)s.%(ext)s')
282 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
acd69589 283 or DEFAULT_OUTTMPL)
dca02c80 284 if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
b61067fa 285 parser.error(u'Cannot download a video and extract audio into the same'
dca02c80
JMF
286 u' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
287 u' template'.format(outtmpl))
29c7a63d 288
63e0be34 289 any_printing = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
17093b83 290 download_archive_fn = os.path.expanduser(opts.download_archive) if opts.download_archive is not None else opts.download_archive
525ef922 291
bdde425c 292 ydl_opts = {
59ae15a5
PH
293 'usenetrc': opts.usenetrc,
294 'username': opts.username,
295 'password': opts.password,
83317f69 296 'twofactor': opts.twofactor,
c6c19746 297 'videopassword': opts.videopassword,
525ef922 298 'quiet': (opts.quiet or any_printing),
ad8915b7 299 'no_warnings': opts.no_warnings,
59ae15a5
PH
300 'forceurl': opts.geturl,
301 'forcetitle': opts.gettitle,
1a2adf3f 302 'forceid': opts.getid,
59ae15a5
PH
303 'forcethumbnail': opts.getthumbnail,
304 'forcedescription': opts.getdescription,
525ef922 305 'forceduration': opts.getduration,
59ae15a5
PH
306 'forcefilename': opts.getfilename,
307 'forceformat': opts.getformat,
9d153818 308 'forcejson': opts.dumpjson,
63e0be34 309 'dump_single_json': opts.dump_single_json,
59ae15a5 310 'simulate': opts.simulate,
525ef922 311 'skip_download': (opts.skip_download or opts.simulate or any_printing),
59ae15a5
PH
312 'format': opts.format,
313 'format_limit': opts.format_limit,
314 'listformats': opts.listformats,
5cb9c312 315 'outtmpl': outtmpl,
213c31ae 316 'autonumber_size': opts.autonumber_size,
59ae15a5
PH
317 'restrictfilenames': opts.restrictfilenames,
318 'ignoreerrors': opts.ignoreerrors,
319 'ratelimit': opts.ratelimit,
320 'nooverwrites': opts.nooverwrites,
321 'retries': opts.retries,
322 'buffersize': opts.buffersize,
323 'noresizebuffer': opts.noresizebuffer,
324 'continuedl': opts.continue_dl,
325 'noprogress': opts.noprogress,
5717d91a 326 'progress_with_newline': opts.progress_with_newline,
59ae15a5
PH
327 'playliststart': opts.playliststart,
328 'playlistend': opts.playlistend,
47192f92 329 'noplaylist': opts.noplaylist,
59ae15a5
PH
330 'logtostderr': opts.outtmpl == '-',
331 'consoletitle': opts.consoletitle,
332 'nopart': opts.nopart,
333 'updatetime': opts.updatetime,
334 'writedescription': opts.writedescription,
1fb07d10 335 'writeannotations': opts.writeannotations,
59ae15a5 336 'writeinfojson': opts.writeinfojson,
11d9224e 337 'writethumbnail': opts.writethumbnail,
59ae15a5 338 'writesubtitles': opts.writesubtitles,
b004821f 339 'writeautomaticsub': opts.writeautomaticsub,
ae608b80 340 'allsubtitles': opts.allsubtitles,
2a4093ea 341 'listsubtitles': opts.listsubtitles,
9e62bc44 342 'subtitlesformat': opts.subtitlesformat,
d6e203b3 343 'subtitleslangs': opts.subtitleslangs,
8271226a
PH
344 'matchtitle': decodeOption(opts.matchtitle),
345 'rejecttitle': decodeOption(opts.rejecttitle),
59ae15a5
PH
346 'max_downloads': opts.max_downloads,
347 'prefer_free_formats': opts.prefer_free_formats,
348 'verbose': opts.verbose,
855703e5 349 'dump_intermediate_pages': opts.dump_intermediate_pages,
d41e6efc 350 'write_pages': opts.write_pages,
8d5d3a5d 351 'test': opts.test,
7851b379 352 'keepvideo': opts.keepvideo,
9e982f9e 353 'min_filesize': opts.min_filesize,
bd558525 354 'max_filesize': opts.max_filesize,
5fe18bdb
PH
355 'min_views': opts.min_views,
356 'max_views': opts.max_views,
11d9224e 357 'daterange': date,
7f747732 358 'cachedir': opts.cachedir,
f8061589 359 'youtube_print_sig_code': opts.youtube_print_sig_code,
8dbe9899 360 'age_limit': opts.age_limit,
17093b83 361 'download_archive': download_archive_fn,
dca08720
PH
362 'cookiefile': opts.cookiefile,
363 'nocheckcertificate': opts.no_check_certificate,
7e8c0af0 364 'prefer_insecure': opts.prefer_insecure,
c2e52508 365 'proxy': opts.proxy,
6ad14cab 366 'socket_timeout': opts.socket_timeout,
0783b09b 367 'bidi_workaround': opts.bidi_workaround,
a0ddb8a2 368 'debug_printtraffic': opts.debug_printtraffic,
76b1bd67 369 'prefer_ffmpeg': opts.prefer_ffmpeg,
7b0817e8 370 'include_ads': opts.include_ads,
04b4d394 371 'default_search': opts.default_search,
4919603f 372 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
62fec3b2 373 'encoding': opts.encoding,
8d31fa3c 374 'exec_cmd': opts.exec_cmd,
057a5206 375 'extract_flat': opts.extract_flat,
bdde425c 376 }
59ae15a5 377
bdde425c 378 with YoutubeDL(ydl_opts) as ydl:
dca08720 379 ydl.print_debug_header()
bdde425c
PH
380 ydl.add_default_info_extractors()
381
382 # PostProcessors
383 # Add the metadata pp first, the other pps will copy it
384 if opts.addmetadata:
385 ydl.add_post_processor(FFmpegMetadataPP())
386 if opts.extractaudio:
387 ydl.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites))
388 if opts.recodevideo:
389 ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
390 if opts.embedsubtitles:
391 ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
e63fc1be 392 if opts.xattrs:
393 ydl.add_post_processor(XAttrMetadataPP())
0c14e2fb 394 if opts.embedthumbnail:
784763c5 395 if not opts.addmetadata:
396 ydl.add_post_processor(FFmpegAudioFixPP())
0c14e2fb 397 ydl.add_post_processor(AtomicParsleyPP())
bdde425c 398
a7cacbca 399
400 # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
401 # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
8d31fa3c
PH
402 if opts.exec_cmd:
403 ydl.add_post_processor(ExecAfterDownloadPP(
404 verboseOutput=opts.verbose, exec_cmd=opts.exec_cmd))
a7cacbca 405
bdde425c
PH
406 # Update version
407 if opts.update_self:
408 update_self(ydl.to_screen, opts.verbose)
409
052421ff
PH
410 # Remove cache dir
411 if opts.rm_cachedir:
a0e07d31 412 ydl.cache.remove()
052421ff 413
bdde425c 414 # Maybe do nothing
1dcc4c0c 415 if (len(all_urls) < 1) and (opts.load_info_filename is None):
052421ff 416 if not (opts.update_self or opts.rm_cachedir):
bdde425c
PH
417 parser.error(u'you must provide at least one URL')
418 else:
419 sys.exit()
59ae15a5 420
bdde425c 421 try:
1dcc4c0c
JMF
422 if opts.load_info_filename is not None:
423 retcode = ydl.download_with_info_file(opts.load_info_filename)
424 else:
425 retcode = ydl.download(all_urls)
bdde425c
PH
426 except MaxDownloadsReached:
427 ydl.to_screen(u'--max-download limit reached, aborting.')
428 retcode = 101
59ae15a5 429
59ae15a5 430 sys.exit(retcode)
235b3ba4 431
a27b9e8b 432
b8ad4f02 433def main(argv=None):
59ae15a5 434 try:
b8ad4f02 435 _real_main(argv)
59ae15a5
PH
436 except DownloadError:
437 sys.exit(1)
438 except SameFileError:
439 sys.exit(u'ERROR: fixed output name but more than one file to download')
440 except KeyboardInterrupt:
441 sys.exit(u'\nERROR: Interrupted by user')