]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[extractor/common] Extract submit inputs
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
912b38b4 6import calendar
676eb3f2 7import codecs
62e609ab 8import contextlib
e3946f98 9import ctypes
c496ca96
PH
10import datetime
11import email.utils
f45c185f 12import errno
be4a824d 13import functools
d77c3dfd 14import gzip
b7ab0590 15import itertools
03f9daab 16import io
f4bfd65f 17import json
d77c3dfd 18import locale
02dbf93f 19import math
347de493 20import operator
d77c3dfd 21import os
4eb7f1d1 22import pipes
c496ca96 23import platform
d77c3dfd 24import re
13ebea79 25import ssl
c496ca96 26import socket
b53466e1 27import struct
1c088fa8 28import subprocess
d77c3dfd 29import sys
181c8655 30import tempfile
01951dda 31import traceback
bcf89ce6 32import xml.etree.ElementTree
d77c3dfd 33import zlib
d77c3dfd 34
8c25f81b 35from .compat import (
8f9312c3 36 compat_basestring,
8c25f81b 37 compat_chr,
8c25f81b 38 compat_html_entities,
be4a824d 39 compat_http_client,
c86b6142 40 compat_kwargs,
8c25f81b 41 compat_parse_qs,
be4a824d 42 compat_socket_create_connection,
8c25f81b
PH
43 compat_str,
44 compat_urllib_error,
45 compat_urllib_parse,
46 compat_urllib_parse_urlparse,
47 compat_urllib_request,
48 compat_urlparse,
7d4111ed 49 shlex_quote,
8c25f81b 50)
4644ac55
S
51
52
468e2e92
FV
53# This is not clearly defined otherwise
54compiled_regex_type = type(re.compile(''))
55
3e669f36 56std_headers = {
18313934 57 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
58 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
59 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
60 'Accept-Encoding': 'gzip, deflate',
61 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 62}
f427df17 63
5f6a1245 64
bf42a990
S
65NO_DEFAULT = object()
66
7105440c
YCH
67ENGLISH_MONTH_NAMES = [
68 'January', 'February', 'March', 'April', 'May', 'June',
69 'July', 'August', 'September', 'October', 'November', 'December']
70
71
d77c3dfd 72def preferredencoding():
59ae15a5 73 """Get preferred encoding.
d77c3dfd 74
59ae15a5
PH
75 Returns the best encoding scheme for the system, based on
76 locale.getpreferredencoding() and some further tweaks.
77 """
78 try:
79 pref = locale.getpreferredencoding()
28e614de 80 'TEST'.encode(pref)
70a1165b 81 except Exception:
59ae15a5 82 pref = 'UTF-8'
bae611f2 83
59ae15a5 84 return pref
d77c3dfd 85
f4bfd65f 86
181c8655 87def write_json_file(obj, fn):
1394646a 88 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 89
92120217 90 fn = encodeFilename(fn)
61ee5aeb 91 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
92 encoding = get_filesystem_encoding()
93 # os.path.basename returns a bytes object, but NamedTemporaryFile
94 # will fail if the filename contains non ascii characters unless we
95 # use a unicode object
96 path_basename = lambda f: os.path.basename(fn).decode(encoding)
97 # the same for os.path.dirname
98 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
99 else:
100 path_basename = os.path.basename
101 path_dirname = os.path.dirname
102
73159f99
S
103 args = {
104 'suffix': '.tmp',
ec5f6016
JMF
105 'prefix': path_basename(fn) + '.',
106 'dir': path_dirname(fn),
73159f99
S
107 'delete': False,
108 }
109
181c8655
PH
110 # In Python 2.x, json.dump expects a bytestream.
111 # In Python 3.x, it writes to a character stream
112 if sys.version_info < (3, 0):
73159f99 113 args['mode'] = 'wb'
181c8655 114 else:
73159f99
S
115 args.update({
116 'mode': 'w',
117 'encoding': 'utf-8',
118 })
119
c86b6142 120 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
121
122 try:
123 with tf:
124 json.dump(obj, tf)
1394646a
IK
125 if sys.platform == 'win32':
126 # Need to remove existing file on Windows, else os.rename raises
127 # WindowsError or FileExistsError.
128 try:
129 os.unlink(fn)
130 except OSError:
131 pass
181c8655 132 os.rename(tf.name, fn)
70a1165b 133 except Exception:
181c8655
PH
134 try:
135 os.remove(tf.name)
136 except OSError:
137 pass
138 raise
139
140
141if sys.version_info >= (2, 7):
ee114368 142 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 143 """ Find the xpath xpath[@key=val] """
5d2354f1 144 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
145 if val:
146 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
147 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
148 return node.find(expr)
149else:
ee114368 150 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
151 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
152 # .//node does not match if a node is a direct child of . !
8f9312c3 153 if isinstance(xpath, compat_str):
4eefbfdb
PH
154 xpath = xpath.encode('ascii')
155
59ae56fa 156 for f in node.findall(xpath):
ee114368
S
157 if key not in f.attrib:
158 continue
159 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
160 return f
161 return None
162
d7e66d39
JMF
163# On python2.6 the xml.etree.ElementTree.Element methods don't support
164# the namespace parameter
5f6a1245
JW
165
166
d7e66d39
JMF
167def xpath_with_ns(path, ns_map):
168 components = [c.split(':') for c in path.split('/')]
169 replaced = []
170 for c in components:
171 if len(c) == 1:
172 replaced.append(c[0])
173 else:
174 ns, tag = c
175 replaced.append('{%s}%s' % (ns_map[ns], tag))
176 return '/'.join(replaced)
177
d77c3dfd 178
a41fb80c 179def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
d74bebd5
PH
180 if sys.version_info < (2, 7): # Crazy 2.6
181 xpath = xpath.encode('ascii')
182
bf0ff932 183 n = node.find(xpath)
8e636da4 184 if n is None:
bf42a990
S
185 if default is not NO_DEFAULT:
186 return default
187 elif fatal:
bf0ff932
PH
188 name = xpath if name is None else name
189 raise ExtractorError('Could not find XML element %s' % name)
190 else:
191 return None
a41fb80c
S
192 return n
193
194
195def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
196 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
197 if n is None or n == default:
198 return n
199 if n.text is None:
200 if default is not NO_DEFAULT:
201 return default
202 elif fatal:
203 name = xpath if name is None else name
204 raise ExtractorError('Could not find XML element\'s text %s' % name)
205 else:
206 return None
207 return n.text
a41fb80c
S
208
209
210def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
211 n = find_xpath_attr(node, xpath, key)
212 if n is None:
213 if default is not NO_DEFAULT:
214 return default
215 elif fatal:
216 name = '%s[@%s]' % (xpath, key) if name is None else name
217 raise ExtractorError('Could not find XML attribute %s' % name)
218 else:
219 return None
220 return n.attrib[key]
bf0ff932
PH
221
222
9e6dd238 223def get_element_by_id(id, html):
43e8fafd
ND
224 """Return the content of the tag with the specified ID in the passed HTML document"""
225 return get_element_by_attribute("id", id, html)
226
12ea2f30 227
43e8fafd
ND
228def get_element_by_attribute(attribute, value, html):
229 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 230
38285056
PH
231 m = re.search(r'''(?xs)
232 <([a-zA-Z0-9:._-]+)
233 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
234 \s+%s=['"]?%s['"]?
235 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
236 \s*>
237 (?P<content>.*?)
238 </\1>
239 ''' % (re.escape(attribute), re.escape(value)), html)
240
241 if not m:
242 return None
243 res = m.group('content')
244
245 if res.startswith('"') or res.startswith("'"):
246 res = res[1:-1]
a921f407 247
38285056 248 return unescapeHTML(res)
a921f407 249
9e6dd238
FV
250
251def clean_html(html):
59ae15a5 252 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
253
254 if html is None: # Convenience for sanitizing descriptions etc.
255 return html
256
59ae15a5
PH
257 # Newline vs <br />
258 html = html.replace('\n', ' ')
6b3aef80
FV
259 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
260 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
261 # Strip html tags
262 html = re.sub('<.*?>', '', html)
263 # Replace html entities
264 html = unescapeHTML(html)
7decf895 265 return html.strip()
9e6dd238
FV
266
267
d77c3dfd 268def sanitize_open(filename, open_mode):
59ae15a5
PH
269 """Try to open the given filename, and slightly tweak it if this fails.
270
271 Attempts to open the given filename. If this fails, it tries to change
272 the filename slightly, step by step, until it's either able to open it
273 or it fails and raises a final exception, like the standard open()
274 function.
275
276 It returns the tuple (stream, definitive_file_name).
277 """
278 try:
28e614de 279 if filename == '-':
59ae15a5
PH
280 if sys.platform == 'win32':
281 import msvcrt
282 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 283 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
284 stream = open(encodeFilename(filename), open_mode)
285 return (stream, filename)
286 except (IOError, OSError) as err:
f45c185f
PH
287 if err.errno in (errno.EACCES,):
288 raise
59ae15a5 289
f45c185f 290 # In case of error, try to remove win32 forbidden chars
d55de57b 291 alt_filename = sanitize_path(filename)
f45c185f
PH
292 if alt_filename == filename:
293 raise
294 else:
295 # An exception here should be caught in the caller
d55de57b 296 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 297 return (stream, alt_filename)
d77c3dfd
FV
298
299
300def timeconvert(timestr):
59ae15a5
PH
301 """Convert RFC 2822 defined time string into system timestamp"""
302 timestamp = None
303 timetuple = email.utils.parsedate_tz(timestr)
304 if timetuple is not None:
305 timestamp = email.utils.mktime_tz(timetuple)
306 return timestamp
1c469a94 307
5f6a1245 308
796173d0 309def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
310 """Sanitizes a string so it could be used as part of a filename.
311 If restricted is set, use a stricter subset of allowed characters.
796173d0 312 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
313 """
314 def replace_insane(char):
315 if char == '?' or ord(char) < 32 or ord(char) == 127:
316 return ''
317 elif char == '"':
318 return '' if restricted else '\''
319 elif char == ':':
320 return '_-' if restricted else ' -'
321 elif char in '\\/|*<>':
322 return '_'
627dcfff 323 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
324 return '_'
325 if restricted and ord(char) > 127:
326 return '_'
327 return char
328
2aeb06d6
PH
329 # Handle timestamps
330 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 331 result = ''.join(map(replace_insane, s))
796173d0
PH
332 if not is_id:
333 while '__' in result:
334 result = result.replace('__', '_')
335 result = result.strip('_')
336 # Common case of "Foreign band name - English song title"
337 if restricted and result.startswith('-_'):
338 result = result[2:]
5a42414b
PH
339 if result.startswith('-'):
340 result = '_' + result[len('-'):]
a7440261 341 result = result.lstrip('.')
796173d0
PH
342 if not result:
343 result = '_'
59ae15a5 344 return result
d77c3dfd 345
5f6a1245 346
a2aaf4db
S
347def sanitize_path(s):
348 """Sanitizes and normalizes path on Windows"""
349 if sys.platform != 'win32':
350 return s
be531ef1
S
351 drive_or_unc, _ = os.path.splitdrive(s)
352 if sys.version_info < (2, 7) and not drive_or_unc:
353 drive_or_unc, _ = os.path.splitunc(s)
354 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
355 if drive_or_unc:
a2aaf4db
S
356 norm_path.pop(0)
357 sanitized_path = [
2ebfeaca 358 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db 359 for path_part in norm_path]
be531ef1
S
360 if drive_or_unc:
361 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
362 return os.path.join(*sanitized_path)
363
364
d77c3dfd 365def orderedSet(iterable):
59ae15a5
PH
366 """ Remove all duplicates from the input iterable """
367 res = []
368 for el in iterable:
369 if el not in res:
370 res.append(el)
371 return res
d77c3dfd 372
912b38b4 373
4e408e47
PH
374def _htmlentity_transform(entity):
375 """Transforms an HTML entity to a character."""
376 # Known non-numeric HTML entity
377 if entity in compat_html_entities.name2codepoint:
378 return compat_chr(compat_html_entities.name2codepoint[entity])
379
91757b0f 380 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
381 if mobj is not None:
382 numstr = mobj.group(1)
28e614de 383 if numstr.startswith('x'):
4e408e47 384 base = 16
28e614de 385 numstr = '0%s' % numstr
4e408e47
PH
386 else:
387 base = 10
388 return compat_chr(int(numstr, base))
389
390 # Unknown entity in name, return its literal representation
28e614de 391 return ('&%s;' % entity)
4e408e47
PH
392
393
d77c3dfd 394def unescapeHTML(s):
912b38b4
PH
395 if s is None:
396 return None
397 assert type(s) == compat_str
d77c3dfd 398
4e408e47
PH
399 return re.sub(
400 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 401
8bf48f23 402
aa49acd1
S
403def get_subprocess_encoding():
404 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
405 # For subprocess calls, encode with locale encoding
406 # Refer to http://stackoverflow.com/a/9951851/35070
407 encoding = preferredencoding()
408 else:
409 encoding = sys.getfilesystemencoding()
410 if encoding is None:
411 encoding = 'utf-8'
412 return encoding
413
414
8bf48f23 415def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
416 """
417 @param s The name of the file
418 """
d77c3dfd 419
8bf48f23 420 assert type(s) == compat_str
d77c3dfd 421
59ae15a5
PH
422 # Python 3 has a Unicode API
423 if sys.version_info >= (3, 0):
424 return s
0f00efed 425
aa49acd1
S
426 # Pass '' directly to use Unicode APIs on Windows 2000 and up
427 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
428 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
429 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
430 return s
431
432 return s.encode(get_subprocess_encoding(), 'ignore')
433
434
435def decodeFilename(b, for_subprocess=False):
436
437 if sys.version_info >= (3, 0):
438 return b
439
440 if not isinstance(b, bytes):
441 return b
442
443 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 444
f07b74fc
PH
445
446def encodeArgument(s):
447 if not isinstance(s, compat_str):
448 # Legacy code that uses byte strings
449 # Uncomment the following line after fixing all post processors
7af808a5 450 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
451 s = s.decode('ascii')
452 return encodeFilename(s, True)
453
454
aa49acd1
S
455def decodeArgument(b):
456 return decodeFilename(b, True)
457
458
8271226a
PH
459def decodeOption(optval):
460 if optval is None:
461 return optval
462 if isinstance(optval, bytes):
463 optval = optval.decode(preferredencoding())
464
465 assert isinstance(optval, compat_str)
466 return optval
1c256f70 467
5f6a1245 468
4539dd30
PH
469def formatSeconds(secs):
470 if secs > 3600:
471 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
472 elif secs > 60:
473 return '%d:%02d' % (secs // 60, secs % 60)
474 else:
475 return '%d' % secs
476
a0ddb8a2 477
be4a824d
PH
478def make_HTTPS_handler(params, **kwargs):
479 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 480 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 481 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 482 if opts_no_check_certificate:
be5f2c19 483 context.check_hostname = False
0db261ba 484 context.verify_mode = ssl.CERT_NONE
a2366922 485 try:
be4a824d 486 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
487 except TypeError:
488 # Python 2.7.8
489 # (create_default_context present but HTTPSHandler has no context=)
490 pass
491
492 if sys.version_info < (3, 2):
d7932313 493 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 494 else: # Python < 3.4
d7932313 495 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 496 context.verify_mode = (ssl.CERT_NONE
dca08720 497 if opts_no_check_certificate
ea6d901e 498 else ssl.CERT_REQUIRED)
303b479e 499 context.set_default_verify_paths()
be4a824d 500 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 501
732ea2f0 502
08f2a92c
JMF
503def bug_reports_message():
504 if ytdl_is_updateable():
505 update_cmd = 'type youtube-dl -U to update'
506 else:
507 update_cmd = 'see https://yt-dl.org/update on how to update'
508 msg = '; please report this issue on https://yt-dl.org/bug .'
509 msg += ' Make sure you are using the latest version; %s.' % update_cmd
510 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
511 return msg
512
513
1c256f70
PH
514class ExtractorError(Exception):
515 """Error during info extraction."""
5f6a1245 516
d11271dd 517 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
518 """ tb, if given, is the original traceback (so that it can be printed out).
519 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
520 """
521
522 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
523 expected = True
d11271dd
PH
524 if video_id is not None:
525 msg = video_id + ': ' + msg
410f3e73 526 if cause:
28e614de 527 msg += ' (caused by %r)' % cause
9a82b238 528 if not expected:
08f2a92c 529 msg += bug_reports_message()
1c256f70 530 super(ExtractorError, self).__init__(msg)
d5979c5d 531
1c256f70 532 self.traceback = tb
8cc83b8d 533 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 534 self.cause = cause
d11271dd 535 self.video_id = video_id
1c256f70 536
01951dda
PH
537 def format_traceback(self):
538 if self.traceback is None:
539 return None
28e614de 540 return ''.join(traceback.format_tb(self.traceback))
01951dda 541
1c256f70 542
416c7fcb
PH
543class UnsupportedError(ExtractorError):
544 def __init__(self, url):
545 super(UnsupportedError, self).__init__(
546 'Unsupported URL: %s' % url, expected=True)
547 self.url = url
548
549
55b3e45b
JMF
550class RegexNotFoundError(ExtractorError):
551 """Error when a regex didn't match"""
552 pass
553
554
d77c3dfd 555class DownloadError(Exception):
59ae15a5 556 """Download Error exception.
d77c3dfd 557
59ae15a5
PH
558 This exception may be thrown by FileDownloader objects if they are not
559 configured to continue on errors. They will contain the appropriate
560 error message.
561 """
5f6a1245 562
8cc83b8d
FV
563 def __init__(self, msg, exc_info=None):
564 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
565 super(DownloadError, self).__init__(msg)
566 self.exc_info = exc_info
d77c3dfd
FV
567
568
569class SameFileError(Exception):
59ae15a5 570 """Same File exception.
d77c3dfd 571
59ae15a5
PH
572 This exception will be thrown by FileDownloader objects if they detect
573 multiple files would have to be downloaded to the same file on disk.
574 """
575 pass
d77c3dfd
FV
576
577
578class PostProcessingError(Exception):
59ae15a5 579 """Post Processing exception.
d77c3dfd 580
59ae15a5
PH
581 This exception may be raised by PostProcessor's .run() method to
582 indicate an error in the postprocessing task.
583 """
5f6a1245 584
7851b379
PH
585 def __init__(self, msg):
586 self.msg = msg
d77c3dfd 587
5f6a1245 588
d77c3dfd 589class MaxDownloadsReached(Exception):
59ae15a5
PH
590 """ --max-downloads limit has been reached. """
591 pass
d77c3dfd
FV
592
593
594class UnavailableVideoError(Exception):
59ae15a5 595 """Unavailable Format exception.
d77c3dfd 596
59ae15a5
PH
597 This exception will be thrown when a video is requested
598 in a format that is not available for that video.
599 """
600 pass
d77c3dfd
FV
601
602
603class ContentTooShortError(Exception):
59ae15a5 604 """Content Too Short exception.
d77c3dfd 605
59ae15a5
PH
606 This exception may be raised by FileDownloader objects when a file they
607 download is too small for what the server announced first, indicating
608 the connection was probably interrupted.
609 """
d77c3dfd 610
59ae15a5 611 def __init__(self, downloaded, expected):
2c7ed247 612 # Both in bytes
59ae15a5
PH
613 self.downloaded = downloaded
614 self.expected = expected
d77c3dfd 615
5f6a1245 616
c5a59d93 617def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
618 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
619 # expected HTTP responses to meet HTTP/1.0 or later (see also
620 # https://github.com/rg3/youtube-dl/issues/6727)
621 if sys.version_info < (3, 0):
622 kwargs['strict'] = True
be4a824d
PH
623 hc = http_class(*args, **kwargs)
624 source_address = ydl_handler._params.get('source_address')
625 if source_address is not None:
626 sa = (source_address, 0)
627 if hasattr(hc, 'source_address'): # Python 2.7+
628 hc.source_address = sa
629 else: # Python 2.6
630 def _hc_connect(self, *args, **kwargs):
631 sock = compat_socket_create_connection(
632 (self.host, self.port), self.timeout, sa)
633 if is_https:
d7932313
PH
634 self.sock = ssl.wrap_socket(
635 sock, self.key_file, self.cert_file,
636 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
637 else:
638 self.sock = sock
639 hc.connect = functools.partial(_hc_connect, hc)
640
641 return hc
642
643
acebc9cd 644class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
645 """Handler for HTTP requests and responses.
646
647 This class, when installed with an OpenerDirector, automatically adds
648 the standard headers to every HTTP request and handles gzipped and
649 deflated responses from web servers. If compression is to be avoided in
650 a particular request, the original request in the program code only has
651 to include the HTTP header "Youtubedl-No-Compression", which will be
652 removed before making the real request.
653
654 Part of this code was copied from:
655
656 http://techknack.net/python-urllib2-handlers/
657
658 Andrew Rowls, the author of that code, agreed to release it to the
659 public domain.
660 """
661
be4a824d
PH
662 def __init__(self, params, *args, **kwargs):
663 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
664 self._params = params
665
666 def http_open(self, req):
667 return self.do_open(functools.partial(
c5a59d93 668 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
669 req)
670
59ae15a5
PH
671 @staticmethod
672 def deflate(data):
673 try:
674 return zlib.decompress(data, -zlib.MAX_WBITS)
675 except zlib.error:
676 return zlib.decompress(data)
677
678 @staticmethod
679 def addinfourl_wrapper(stream, headers, url, code):
680 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
681 return compat_urllib_request.addinfourl(stream, headers, url, code)
682 ret = compat_urllib_request.addinfourl(stream, headers, url)
683 ret.code = code
684 return ret
685
acebc9cd 686 def http_request(self, req):
51f267d9
S
687 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
688 # always respected by websites, some tend to give out URLs with non percent-encoded
689 # non-ASCII characters (see telemb.py, ard.py [#3412])
690 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
691 # To work around aforementioned issue we will replace request's original URL with
692 # percent-encoded one
693 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
694 # the code of this workaround has been moved here from YoutubeDL.urlopen()
695 url = req.get_full_url()
696 url_escaped = escape_url(url)
697
698 # Substitute URL if any change after escaping
699 if url != url_escaped:
700 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
701 new_req = req_type(
702 url_escaped, data=req.data, headers=req.headers,
703 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
704 new_req.timeout = req.timeout
705 req = new_req
706
33ac271b 707 for h, v in std_headers.items():
3d5f7a39
JK
708 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
709 # The dict keys are capitalized because of this bug by urllib
710 if h.capitalize() not in req.headers:
33ac271b 711 req.add_header(h, v)
59ae15a5
PH
712 if 'Youtubedl-no-compression' in req.headers:
713 if 'Accept-encoding' in req.headers:
714 del req.headers['Accept-encoding']
715 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
716
717 if sys.version_info < (2, 7) and '#' in req.get_full_url():
718 # Python 2.6 is brain-dead when it comes to fragments
719 req._Request__original = req._Request__original.partition('#')[0]
720 req._Request__r_type = req._Request__r_type.partition('#')[0]
721
59ae15a5
PH
722 return req
723
acebc9cd 724 def http_response(self, req, resp):
59ae15a5
PH
725 old_resp = resp
726 # gzip
727 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
728 content = resp.read()
729 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
730 try:
731 uncompressed = io.BytesIO(gz.read())
732 except IOError as original_ioerror:
733 # There may be junk add the end of the file
734 # See http://stackoverflow.com/q/4928560/35070 for details
735 for i in range(1, 1024):
736 try:
737 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
738 uncompressed = io.BytesIO(gz.read())
739 except IOError:
740 continue
741 break
742 else:
743 raise original_ioerror
744 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
745 resp.msg = old_resp.msg
746 # deflate
747 if resp.headers.get('Content-encoding', '') == 'deflate':
748 gz = io.BytesIO(self.deflate(resp.read()))
749 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
750 resp.msg = old_resp.msg
ad729172
S
751 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
752 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
753 if 300 <= resp.code < 400:
754 location = resp.headers.get('Location')
755 if location:
756 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
757 if sys.version_info >= (3, 0):
758 location = location.encode('iso-8859-1').decode('utf-8')
759 location_escaped = escape_url(location)
760 if location != location_escaped:
761 del resp.headers['Location']
762 resp.headers['Location'] = location_escaped
59ae15a5 763 return resp
0f8d03f8 764
acebc9cd
PH
765 https_request = http_request
766 https_response = http_response
bf50b038 767
5de90176 768
be4a824d
PH
769class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
770 def __init__(self, params, https_conn_class=None, *args, **kwargs):
771 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
772 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
773 self._params = params
774
775 def https_open(self, req):
4f264c02
JMF
776 kwargs = {}
777 if hasattr(self, '_context'): # python > 2.6
778 kwargs['context'] = self._context
779 if hasattr(self, '_check_hostname'): # python 3.x
780 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
781 return self.do_open(functools.partial(
782 _create_http_connection, self, self._https_conn_class, True),
4f264c02 783 req, **kwargs)
be4a824d
PH
784
785
a6420bf5
S
786class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
787 def __init__(self, cookiejar=None):
788 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
789
790 def http_response(self, request, response):
791 # Python 2 will choke on next HTTP request in row if there are non-ASCII
792 # characters in Set-Cookie HTTP header of last response (see
793 # https://github.com/rg3/youtube-dl/issues/6769).
794 # In order to at least prevent crashing we will percent encode Set-Cookie
795 # header before HTTPCookieProcessor starts processing it.
796 if sys.version_info < (3, 0) and response.headers:
797 for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
798 set_cookie = response.headers.get(set_cookie_header)
799 if set_cookie:
800 set_cookie_escaped = '; '.join([
801 escape_rfc3986(cookie_attr.strip())
802 for cookie_attr in set_cookie.decode('iso-8859-1').split(';')]).encode('iso-8859-1')
803 if set_cookie != set_cookie_escaped:
804 del response.headers[set_cookie_header]
805 response.headers[set_cookie_header] = set_cookie_escaped
806 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
807
808 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
809 https_response = http_response
810
811
08b38d54 812def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
813 """ Return a UNIX timestamp from the given date """
814
815 if date_str is None:
816 return None
817
08b38d54
PH
818 if timezone is None:
819 m = re.search(
820 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
821 date_str)
822 if not m:
912b38b4
PH
823 timezone = datetime.timedelta()
824 else:
08b38d54
PH
825 date_str = date_str[:-len(m.group(0))]
826 if not m.group('sign'):
827 timezone = datetime.timedelta()
828 else:
829 sign = 1 if m.group('sign') == '+' else -1
830 timezone = datetime.timedelta(
831 hours=sign * int(m.group('hours')),
832 minutes=sign * int(m.group('minutes')))
6ad4013d 833 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 834 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
835 return calendar.timegm(dt.timetuple())
836
837
42bdd9d0 838def unified_strdate(date_str, day_first=True):
bf50b038 839 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
840
841 if date_str is None:
842 return None
bf50b038 843 upload_date = None
5f6a1245 844 # Replace commas
026fcc04 845 date_str = date_str.replace(',', ' ')
bf50b038 846 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
847 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
848 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 849 # Remove AM/PM + timezone
9bb8e0a3 850 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 851
19e1d359
JMF
852 format_expressions = [
853 '%d %B %Y',
0f99566c 854 '%d %b %Y',
19e1d359
JMF
855 '%B %d %Y',
856 '%b %d %Y',
78ff59d0
PP
857 '%b %dst %Y %I:%M%p',
858 '%b %dnd %Y %I:%M%p',
859 '%b %dth %Y %I:%M%p',
a69801e2 860 '%Y %m %d',
19e1d359 861 '%Y-%m-%d',
fe556f1b 862 '%Y/%m/%d',
19e1d359 863 '%Y/%m/%d %H:%M:%S',
5d73273f 864 '%Y-%m-%d %H:%M:%S',
e9be9a6a 865 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 866 '%d.%m.%Y %H:%M',
b047de6f 867 '%d.%m.%Y %H.%M',
19e1d359 868 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
869 '%Y-%m-%dT%H:%M:%S.%fZ',
870 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 871 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 872 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 873 '%Y-%m-%dT%H:%M',
19e1d359 874 ]
42bdd9d0
PH
875 if day_first:
876 format_expressions.extend([
79c21abb 877 '%d-%m-%Y',
776dc399
S
878 '%d.%m.%Y',
879 '%d/%m/%Y',
880 '%d/%m/%y',
42bdd9d0
PH
881 '%d/%m/%Y %H:%M:%S',
882 ])
883 else:
884 format_expressions.extend([
79c21abb 885 '%m-%d-%Y',
776dc399
S
886 '%m.%d.%Y',
887 '%m/%d/%Y',
888 '%m/%d/%y',
42bdd9d0
PH
889 '%m/%d/%Y %H:%M:%S',
890 ])
bf50b038
JMF
891 for expression in format_expressions:
892 try:
893 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 894 except ValueError:
bf50b038 895 pass
42393ce2
PH
896 if upload_date is None:
897 timetuple = email.utils.parsedate_tz(date_str)
898 if timetuple:
899 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
900 return upload_date
901
5f6a1245 902
28e614de 903def determine_ext(url, default_ext='unknown_video'):
f4776371
S
904 if url is None:
905 return default_ext
28e614de 906 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
907 if re.match(r'^[A-Za-z0-9]+$', guess):
908 return guess
909 else:
cbdbb766 910 return default_ext
73e79f2a 911
5f6a1245 912
d4051a8e 913def subtitles_filename(filename, sub_lang, sub_format):
28e614de 914 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 915
5f6a1245 916
bd558525 917def date_from_str(date_str):
37254abc
JMF
918 """
919 Return a datetime object from a string in the format YYYYMMDD or
920 (now|today)[+-][0-9](day|week|month|year)(s)?"""
921 today = datetime.date.today()
f8795e10 922 if date_str in ('now', 'today'):
37254abc 923 return today
f8795e10
PH
924 if date_str == 'yesterday':
925 return today - datetime.timedelta(days=1)
37254abc
JMF
926 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
927 if match is not None:
928 sign = match.group('sign')
929 time = int(match.group('time'))
930 if sign == '-':
931 time = -time
932 unit = match.group('unit')
5f6a1245 933 # A bad aproximation?
37254abc
JMF
934 if unit == 'month':
935 unit = 'day'
936 time *= 30
937 elif unit == 'year':
938 unit = 'day'
939 time *= 365
940 unit += 's'
941 delta = datetime.timedelta(**{unit: time})
942 return today + delta
bd558525 943 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
944
945
e63fc1be 946def hyphenate_date(date_str):
947 """
948 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
949 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
950 if match is not None:
951 return '-'.join(match.groups())
952 else:
953 return date_str
954
5f6a1245 955
bd558525
JMF
956class DateRange(object):
957 """Represents a time interval between two dates"""
5f6a1245 958
bd558525
JMF
959 def __init__(self, start=None, end=None):
960 """start and end must be strings in the format accepted by date"""
961 if start is not None:
962 self.start = date_from_str(start)
963 else:
964 self.start = datetime.datetime.min.date()
965 if end is not None:
966 self.end = date_from_str(end)
967 else:
968 self.end = datetime.datetime.max.date()
37254abc 969 if self.start > self.end:
bd558525 970 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 971
bd558525
JMF
972 @classmethod
973 def day(cls, day):
974 """Returns a range that only contains the given day"""
5f6a1245
JW
975 return cls(day, day)
976
bd558525
JMF
977 def __contains__(self, date):
978 """Check if the date is in the range"""
37254abc
JMF
979 if not isinstance(date, datetime.date):
980 date = date_from_str(date)
981 return self.start <= date <= self.end
5f6a1245 982
bd558525 983 def __str__(self):
5f6a1245 984 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
985
986
987def platform_name():
988 """ Returns the platform name as a compat_str """
989 res = platform.platform()
990 if isinstance(res, bytes):
991 res = res.decode(preferredencoding())
992
993 assert isinstance(res, compat_str)
994 return res
c257baff
PH
995
996
b58ddb32
PH
997def _windows_write_string(s, out):
998 """ Returns True if the string was written using special methods,
999 False if it has yet to be written out."""
1000 # Adapted from http://stackoverflow.com/a/3259271/35070
1001
1002 import ctypes
1003 import ctypes.wintypes
1004
1005 WIN_OUTPUT_IDS = {
1006 1: -11,
1007 2: -12,
1008 }
1009
a383a98a
PH
1010 try:
1011 fileno = out.fileno()
1012 except AttributeError:
1013 # If the output stream doesn't have a fileno, it's virtual
1014 return False
aa42e873
PH
1015 except io.UnsupportedOperation:
1016 # Some strange Windows pseudo files?
1017 return False
b58ddb32
PH
1018 if fileno not in WIN_OUTPUT_IDS:
1019 return False
1020
e2f89ec7 1021 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1022 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 1023 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
1024 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1025
e2f89ec7 1026 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1027 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1028 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 1029 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
1030 written = ctypes.wintypes.DWORD(0)
1031
6ac4e806 1032 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
1033 FILE_TYPE_CHAR = 0x0002
1034 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1035 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1036 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1037 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 1038 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
1039 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1040
1041 def not_a_console(handle):
1042 if handle == INVALID_HANDLE_VALUE or handle is None:
1043 return True
8fb3ac36
PH
1044 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1045 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1046
1047 if not_a_console(h):
1048 return False
1049
d1b9c912
PH
1050 def next_nonbmp_pos(s):
1051 try:
1052 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1053 except StopIteration:
1054 return len(s)
1055
1056 while s:
1057 count = min(next_nonbmp_pos(s), 1024)
1058
b58ddb32 1059 ret = WriteConsoleW(
d1b9c912 1060 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1061 if ret == 0:
1062 raise OSError('Failed to write string')
d1b9c912
PH
1063 if not count: # We just wrote a non-BMP character
1064 assert written.value == 2
1065 s = s[1:]
1066 else:
1067 assert written.value > 0
1068 s = s[written.value:]
b58ddb32
PH
1069 return True
1070
1071
734f90bb 1072def write_string(s, out=None, encoding=None):
7459e3a2
PH
1073 if out is None:
1074 out = sys.stderr
8bf48f23 1075 assert type(s) == compat_str
7459e3a2 1076
b58ddb32
PH
1077 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1078 if _windows_write_string(s, out):
1079 return
1080
7459e3a2
PH
1081 if ('b' in getattr(out, 'mode', '') or
1082 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1083 byt = s.encode(encoding or preferredencoding(), 'ignore')
1084 out.write(byt)
1085 elif hasattr(out, 'buffer'):
1086 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1087 byt = s.encode(enc, 'ignore')
1088 out.buffer.write(byt)
1089 else:
8bf48f23 1090 out.write(s)
7459e3a2
PH
1091 out.flush()
1092
1093
48ea9cea
PH
1094def bytes_to_intlist(bs):
1095 if not bs:
1096 return []
1097 if isinstance(bs[0], int): # Python 3
1098 return list(bs)
1099 else:
1100 return [ord(c) for c in bs]
1101
c257baff 1102
cba892fa 1103def intlist_to_bytes(xs):
1104 if not xs:
1105 return b''
eb4157fd 1106 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1107
1108
c1c9a79c
PH
1109# Cross-platform file locking
1110if sys.platform == 'win32':
1111 import ctypes.wintypes
1112 import msvcrt
1113
1114 class OVERLAPPED(ctypes.Structure):
1115 _fields_ = [
1116 ('Internal', ctypes.wintypes.LPVOID),
1117 ('InternalHigh', ctypes.wintypes.LPVOID),
1118 ('Offset', ctypes.wintypes.DWORD),
1119 ('OffsetHigh', ctypes.wintypes.DWORD),
1120 ('hEvent', ctypes.wintypes.HANDLE),
1121 ]
1122
1123 kernel32 = ctypes.windll.kernel32
1124 LockFileEx = kernel32.LockFileEx
1125 LockFileEx.argtypes = [
1126 ctypes.wintypes.HANDLE, # hFile
1127 ctypes.wintypes.DWORD, # dwFlags
1128 ctypes.wintypes.DWORD, # dwReserved
1129 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1130 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1131 ctypes.POINTER(OVERLAPPED) # Overlapped
1132 ]
1133 LockFileEx.restype = ctypes.wintypes.BOOL
1134 UnlockFileEx = kernel32.UnlockFileEx
1135 UnlockFileEx.argtypes = [
1136 ctypes.wintypes.HANDLE, # hFile
1137 ctypes.wintypes.DWORD, # dwReserved
1138 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1139 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1140 ctypes.POINTER(OVERLAPPED) # Overlapped
1141 ]
1142 UnlockFileEx.restype = ctypes.wintypes.BOOL
1143 whole_low = 0xffffffff
1144 whole_high = 0x7fffffff
1145
1146 def _lock_file(f, exclusive):
1147 overlapped = OVERLAPPED()
1148 overlapped.Offset = 0
1149 overlapped.OffsetHigh = 0
1150 overlapped.hEvent = 0
1151 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1152 handle = msvcrt.get_osfhandle(f.fileno())
1153 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1154 whole_low, whole_high, f._lock_file_overlapped_p):
1155 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1156
1157 def _unlock_file(f):
1158 assert f._lock_file_overlapped_p
1159 handle = msvcrt.get_osfhandle(f.fileno())
1160 if not UnlockFileEx(handle, 0,
1161 whole_low, whole_high, f._lock_file_overlapped_p):
1162 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1163
1164else:
1165 import fcntl
1166
1167 def _lock_file(f, exclusive):
2582bebe 1168 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1169
1170 def _unlock_file(f):
2582bebe 1171 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1172
1173
1174class locked_file(object):
1175 def __init__(self, filename, mode, encoding=None):
1176 assert mode in ['r', 'a', 'w']
1177 self.f = io.open(filename, mode, encoding=encoding)
1178 self.mode = mode
1179
1180 def __enter__(self):
1181 exclusive = self.mode != 'r'
1182 try:
1183 _lock_file(self.f, exclusive)
1184 except IOError:
1185 self.f.close()
1186 raise
1187 return self
1188
1189 def __exit__(self, etype, value, traceback):
1190 try:
1191 _unlock_file(self.f)
1192 finally:
1193 self.f.close()
1194
1195 def __iter__(self):
1196 return iter(self.f)
1197
1198 def write(self, *args):
1199 return self.f.write(*args)
1200
1201 def read(self, *args):
1202 return self.f.read(*args)
4eb7f1d1
JMF
1203
1204
4644ac55
S
1205def get_filesystem_encoding():
1206 encoding = sys.getfilesystemencoding()
1207 return encoding if encoding is not None else 'utf-8'
1208
1209
4eb7f1d1 1210def shell_quote(args):
a6a173c2 1211 quoted_args = []
4644ac55 1212 encoding = get_filesystem_encoding()
a6a173c2
JMF
1213 for a in args:
1214 if isinstance(a, bytes):
1215 # We may get a filename encoded with 'encodeFilename'
1216 a = a.decode(encoding)
1217 quoted_args.append(pipes.quote(a))
28e614de 1218 return ' '.join(quoted_args)
9d4660ca
PH
1219
1220
1221def smuggle_url(url, data):
1222 """ Pass additional data in a URL for internal use. """
1223
1224 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1225 {'__youtubedl_smuggle': json.dumps(data)})
1226 return url + '#' + sdata
9d4660ca
PH
1227
1228
79f82953 1229def unsmuggle_url(smug_url, default=None):
83e865a3 1230 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1231 return smug_url, default
28e614de
PH
1232 url, _, sdata = smug_url.rpartition('#')
1233 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1234 data = json.loads(jsond)
1235 return url, data
02dbf93f
PH
1236
1237
02dbf93f
PH
1238def format_bytes(bytes):
1239 if bytes is None:
28e614de 1240 return 'N/A'
02dbf93f
PH
1241 if type(bytes) is str:
1242 bytes = float(bytes)
1243 if bytes == 0.0:
1244 exponent = 0
1245 else:
1246 exponent = int(math.log(bytes, 1024.0))
28e614de 1247 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1248 converted = float(bytes) / float(1024 ** exponent)
28e614de 1249 return '%.2f%s' % (converted, suffix)
f53c966a 1250
1c088fa8 1251
be64b5b0
PH
1252def parse_filesize(s):
1253 if s is None:
1254 return None
1255
1256 # The lower-case forms are of course incorrect and inofficial,
1257 # but we support those too
1258 _UNIT_TABLE = {
1259 'B': 1,
1260 'b': 1,
1261 'KiB': 1024,
1262 'KB': 1000,
1263 'kB': 1024,
1264 'Kb': 1000,
1265 'MiB': 1024 ** 2,
1266 'MB': 1000 ** 2,
1267 'mB': 1024 ** 2,
1268 'Mb': 1000 ** 2,
1269 'GiB': 1024 ** 3,
1270 'GB': 1000 ** 3,
1271 'gB': 1024 ** 3,
1272 'Gb': 1000 ** 3,
1273 'TiB': 1024 ** 4,
1274 'TB': 1000 ** 4,
1275 'tB': 1024 ** 4,
1276 'Tb': 1000 ** 4,
1277 'PiB': 1024 ** 5,
1278 'PB': 1000 ** 5,
1279 'pB': 1024 ** 5,
1280 'Pb': 1000 ** 5,
1281 'EiB': 1024 ** 6,
1282 'EB': 1000 ** 6,
1283 'eB': 1024 ** 6,
1284 'Eb': 1000 ** 6,
1285 'ZiB': 1024 ** 7,
1286 'ZB': 1000 ** 7,
1287 'zB': 1024 ** 7,
1288 'Zb': 1000 ** 7,
1289 'YiB': 1024 ** 8,
1290 'YB': 1000 ** 8,
1291 'yB': 1024 ** 8,
1292 'Yb': 1000 ** 8,
1293 }
1294
1295 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1296 m = re.match(
1297 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1298 if not m:
1299 return None
1300
4349c07d
PH
1301 num_str = m.group('num').replace(',', '.')
1302 mult = _UNIT_TABLE[m.group('unit')]
1303 return int(float(num_str) * mult)
be64b5b0
PH
1304
1305
caefb1de
PH
1306def month_by_name(name):
1307 """ Return the number of a month by (locale-independently) English name """
1308
caefb1de 1309 try:
7105440c
YCH
1310 return ENGLISH_MONTH_NAMES.index(name) + 1
1311 except ValueError:
1312 return None
1313
1314
1315def month_by_abbreviation(abbrev):
1316 """ Return the number of a month by (locale-independently) English
1317 abbreviations """
1318
1319 try:
1320 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1321 except ValueError:
1322 return None
18258362
JMF
1323
1324
5aafe895 1325def fix_xml_ampersands(xml_str):
18258362 1326 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1327 return re.sub(
1328 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1329 '&amp;',
5aafe895 1330 xml_str)
e3946f98
PH
1331
1332
1333def setproctitle(title):
8bf48f23 1334 assert isinstance(title, compat_str)
e3946f98
PH
1335 try:
1336 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1337 except OSError:
1338 return
6eefe533
PH
1339 title_bytes = title.encode('utf-8')
1340 buf = ctypes.create_string_buffer(len(title_bytes))
1341 buf.value = title_bytes
e3946f98 1342 try:
6eefe533 1343 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1344 except AttributeError:
1345 return # Strange libc, just skip this
d7dda168
PH
1346
1347
1348def remove_start(s, start):
1349 if s.startswith(start):
1350 return s[len(start):]
1351 return s
29eb5174
PH
1352
1353
2b9faf55
PH
1354def remove_end(s, end):
1355 if s.endswith(end):
1356 return s[:-len(end)]
1357 return s
1358
1359
29eb5174 1360def url_basename(url):
9b8aaeed 1361 path = compat_urlparse.urlparse(url).path
28e614de 1362 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1363
1364
1365class HEADRequest(compat_urllib_request.Request):
1366 def get_method(self):
1367 return "HEAD"
7217e148
PH
1368
1369
9732d77e 1370def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1371 if get_attr:
1372 if v is not None:
1373 v = getattr(v, get_attr, None)
9572013d
PH
1374 if v == '':
1375 v = None
9732d77e
PH
1376 return default if v is None else (int(v) * invscale // scale)
1377
9572013d 1378
40a90862
JMF
1379def str_or_none(v, default=None):
1380 return default if v is None else compat_str(v)
1381
9732d77e
PH
1382
1383def str_to_int(int_str):
48d4681e 1384 """ A more relaxed version of int_or_none """
9732d77e
PH
1385 if int_str is None:
1386 return None
28e614de 1387 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1388 return int(int_str)
608d11f5
PH
1389
1390
9732d77e
PH
1391def float_or_none(v, scale=1, invscale=1, default=None):
1392 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1393
1394
608d11f5 1395def parse_duration(s):
8f9312c3 1396 if not isinstance(s, compat_basestring):
608d11f5
PH
1397 return None
1398
ca7b3246
S
1399 s = s.strip()
1400
608d11f5 1401 m = re.match(
9d22a7df 1402 r'''(?ix)(?:P?T)?
e8df5cee 1403 (?:
9c29bc69 1404 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1405 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1406
9c29bc69 1407 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1408 (?:
8f4b58d7
PH
1409 (?:
1410 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1411 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1412 )?
6a68bb57
PH
1413 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1414 )?
e8df5cee
PH
1415 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1416 )$''', s)
608d11f5
PH
1417 if not m:
1418 return None
e8df5cee
PH
1419 res = 0
1420 if m.group('only_mins'):
1421 return float_or_none(m.group('only_mins'), invscale=60)
1422 if m.group('only_hours'):
1423 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1424 if m.group('secs'):
1425 res += int(m.group('secs'))
3e675fab
PH
1426 if m.group('mins_reversed'):
1427 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1428 if m.group('mins'):
1429 res += int(m.group('mins')) * 60
e8df5cee
PH
1430 if m.group('hours'):
1431 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1432 if m.group('hours_reversed'):
1433 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1434 if m.group('days'):
1435 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1436 if m.group('ms'):
1437 res += float(m.group('ms'))
608d11f5 1438 return res
91d7d0b3
JMF
1439
1440
e65e4c88 1441def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1442 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1443 return (
1444 '{0}.{1}{2}'.format(name, ext, real_ext)
1445 if not expected_real_ext or real_ext[1:] == expected_real_ext
1446 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1447
1448
b3ed15b7
S
1449def replace_extension(filename, ext, expected_real_ext=None):
1450 name, real_ext = os.path.splitext(filename)
1451 return '{0}.{1}'.format(
1452 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1453 ext)
1454
1455
d70ad093
PH
1456def check_executable(exe, args=[]):
1457 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1458 args can be a list of arguments for a short output (like -version) """
1459 try:
1460 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1461 except OSError:
1462 return False
1463 return exe
b7ab0590
PH
1464
1465
95807118 1466def get_exe_version(exe, args=['--version'],
cae97f65 1467 version_re=None, unrecognized='present'):
95807118
PH
1468 """ Returns the version of the specified executable,
1469 or False if the executable is not present """
1470 try:
cae97f65 1471 out, _ = subprocess.Popen(
54116803 1472 [encodeArgument(exe)] + args,
95807118
PH
1473 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1474 except OSError:
1475 return False
cae97f65
PH
1476 if isinstance(out, bytes): # Python 2.x
1477 out = out.decode('ascii', 'ignore')
1478 return detect_exe_version(out, version_re, unrecognized)
1479
1480
1481def detect_exe_version(output, version_re=None, unrecognized='present'):
1482 assert isinstance(output, compat_str)
1483 if version_re is None:
1484 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1485 m = re.search(version_re, output)
95807118
PH
1486 if m:
1487 return m.group(1)
1488 else:
1489 return unrecognized
1490
1491
b7ab0590 1492class PagedList(object):
dd26ced1
PH
1493 def __len__(self):
1494 # This is only useful for tests
1495 return len(self.getslice())
1496
9c44d242
PH
1497
1498class OnDemandPagedList(PagedList):
1499 def __init__(self, pagefunc, pagesize):
1500 self._pagefunc = pagefunc
1501 self._pagesize = pagesize
1502
b7ab0590
PH
1503 def getslice(self, start=0, end=None):
1504 res = []
1505 for pagenum in itertools.count(start // self._pagesize):
1506 firstid = pagenum * self._pagesize
1507 nextfirstid = pagenum * self._pagesize + self._pagesize
1508 if start >= nextfirstid:
1509 continue
1510
1511 page_results = list(self._pagefunc(pagenum))
1512
1513 startv = (
1514 start % self._pagesize
1515 if firstid <= start < nextfirstid
1516 else 0)
1517
1518 endv = (
1519 ((end - 1) % self._pagesize) + 1
1520 if (end is not None and firstid <= end <= nextfirstid)
1521 else None)
1522
1523 if startv != 0 or endv is not None:
1524 page_results = page_results[startv:endv]
1525 res.extend(page_results)
1526
1527 # A little optimization - if current page is not "full", ie. does
1528 # not contain page_size videos then we can assume that this page
1529 # is the last one - there are no more ids on further pages -
1530 # i.e. no need to query again.
1531 if len(page_results) + startv < self._pagesize:
1532 break
1533
1534 # If we got the whole page, but the next page is not interesting,
1535 # break out early as well
1536 if end == nextfirstid:
1537 break
1538 return res
81c2f20b
PH
1539
1540
9c44d242
PH
1541class InAdvancePagedList(PagedList):
1542 def __init__(self, pagefunc, pagecount, pagesize):
1543 self._pagefunc = pagefunc
1544 self._pagecount = pagecount
1545 self._pagesize = pagesize
1546
1547 def getslice(self, start=0, end=None):
1548 res = []
1549 start_page = start // self._pagesize
1550 end_page = (
1551 self._pagecount if end is None else (end // self._pagesize + 1))
1552 skip_elems = start - start_page * self._pagesize
1553 only_more = None if end is None else end - start
1554 for pagenum in range(start_page, end_page):
1555 page = list(self._pagefunc(pagenum))
1556 if skip_elems:
1557 page = page[skip_elems:]
1558 skip_elems = None
1559 if only_more is not None:
1560 if len(page) < only_more:
1561 only_more -= len(page)
1562 else:
1563 page = page[:only_more]
1564 res.extend(page)
1565 break
1566 res.extend(page)
1567 return res
1568
1569
81c2f20b 1570def uppercase_escape(s):
676eb3f2 1571 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1572 return re.sub(
a612753d 1573 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1574 lambda m: unicode_escape(m.group(0))[0],
1575 s)
0fe2ff78
YCH
1576
1577
1578def lowercase_escape(s):
1579 unicode_escape = codecs.getdecoder('unicode_escape')
1580 return re.sub(
1581 r'\\u[0-9a-fA-F]{4}',
1582 lambda m: unicode_escape(m.group(0))[0],
1583 s)
b53466e1 1584
d05cfe06
S
1585
1586def escape_rfc3986(s):
1587 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1588 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1589 s = s.encode('utf-8')
ecc0c5ee 1590 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1591
1592
1593def escape_url(url):
1594 """Escape URL as suggested by RFC 3986"""
1595 url_parsed = compat_urllib_parse_urlparse(url)
1596 return url_parsed._replace(
1597 path=escape_rfc3986(url_parsed.path),
1598 params=escape_rfc3986(url_parsed.params),
1599 query=escape_rfc3986(url_parsed.query),
1600 fragment=escape_rfc3986(url_parsed.fragment)
1601 ).geturl()
1602
b53466e1 1603try:
28e614de 1604 struct.pack('!I', 0)
b53466e1
PH
1605except TypeError:
1606 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1607 def struct_pack(spec, *args):
1608 if isinstance(spec, compat_str):
1609 spec = spec.encode('ascii')
1610 return struct.pack(spec, *args)
1611
1612 def struct_unpack(spec, *args):
1613 if isinstance(spec, compat_str):
1614 spec = spec.encode('ascii')
1615 return struct.unpack(spec, *args)
1616else:
1617 struct_pack = struct.pack
1618 struct_unpack = struct.unpack
62e609ab
PH
1619
1620
1621def read_batch_urls(batch_fd):
1622 def fixup(url):
1623 if not isinstance(url, compat_str):
1624 url = url.decode('utf-8', 'replace')
28e614de 1625 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1626 if url.startswith(BOM_UTF8):
1627 url = url[len(BOM_UTF8):]
1628 url = url.strip()
1629 if url.startswith(('#', ';', ']')):
1630 return False
1631 return url
1632
1633 with contextlib.closing(batch_fd) as fd:
1634 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1635
1636
1637def urlencode_postdata(*args, **kargs):
1638 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1639
1640
0990305d
PH
1641try:
1642 etree_iter = xml.etree.ElementTree.Element.iter
1643except AttributeError: # Python <=2.6
1644 etree_iter = lambda n: n.findall('.//*')
1645
1646
bcf89ce6
PH
1647def parse_xml(s):
1648 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1649 def doctype(self, name, pubid, system):
1650 pass # Ignore doctypes
1651
1652 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1653 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1654 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1655 # Fix up XML parser in Python 2.x
1656 if sys.version_info < (3, 0):
1657 for n in etree_iter(tree):
1658 if n.text is not None:
1659 if not isinstance(n.text, compat_str):
1660 n.text = n.text.decode('utf-8')
1661 return tree
e68301af
PH
1662
1663
a1a530b0
PH
1664US_RATINGS = {
1665 'G': 0,
1666 'PG': 10,
1667 'PG-13': 13,
1668 'R': 16,
1669 'NC': 18,
1670}
fac55558
PH
1671
1672
146c80e2
S
1673def parse_age_limit(s):
1674 if s is None:
d838b1bd 1675 return None
146c80e2 1676 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1677 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1678
1679
fac55558 1680def strip_jsonp(code):
609a61e3
PH
1681 return re.sub(
1682 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1683
1684
e05f6939
PH
1685def js_to_json(code):
1686 def fix_kv(m):
e7b6d122
PH
1687 v = m.group(0)
1688 if v in ('true', 'false', 'null'):
1689 return v
1690 if v.startswith('"'):
1691 return v
1692 if v.startswith("'"):
1693 v = v[1:-1]
1694 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1695 '\\\\': '\\\\',
1696 "\\'": "'",
1697 '"': '\\"',
1698 }[m.group(0)], v)
1699 return '"%s"' % v
e05f6939
PH
1700
1701 res = re.sub(r'''(?x)
d305dd73
PH
1702 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1703 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1704 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1705 ''', fix_kv, code)
ba9e68f4 1706 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1707 return res
1708
1709
478c2c61
PH
1710def qualities(quality_ids):
1711 """ Get a numeric quality value out of a list of possible values """
1712 def q(qid):
1713 try:
1714 return quality_ids.index(qid)
1715 except ValueError:
1716 return -1
1717 return q
1718
acd69589
PH
1719
1720DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1721
a020a0dc
PH
1722
1723def limit_length(s, length):
1724 """ Add ellipses to overly long strings """
1725 if s is None:
1726 return None
1727 ELLIPSES = '...'
1728 if len(s) > length:
1729 return s[:length - len(ELLIPSES)] + ELLIPSES
1730 return s
48844745
PH
1731
1732
1733def version_tuple(v):
5f9b8394 1734 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1735
1736
1737def is_outdated_version(version, limit, assume_new=True):
1738 if not version:
1739 return not assume_new
1740 try:
1741 return version_tuple(version) < version_tuple(limit)
1742 except ValueError:
1743 return not assume_new
732ea2f0
PH
1744
1745
1746def ytdl_is_updateable():
1747 """ Returns if youtube-dl can be updated with -U """
1748 from zipimport import zipimporter
1749
1750 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1751
1752
1753def args_to_str(args):
1754 # Get a short string representation for a subprocess command
1755 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1756
1757
c460bdd5
PH
1758def mimetype2ext(mt):
1759 _, _, res = mt.rpartition('/')
1760
1761 return {
1762 'x-ms-wmv': 'wmv',
1763 'x-mp4-fragmented': 'mp4',
ecee5724 1764 'ttml+xml': 'ttml',
c460bdd5
PH
1765 }.get(res, res)
1766
1767
2ccd1b10
PH
1768def urlhandle_detect_ext(url_handle):
1769 try:
1770 url_handle.headers
1771 getheader = lambda h: url_handle.headers[h]
1772 except AttributeError: # Python < 3
1773 getheader = url_handle.info().getheader
1774
b55ee18f
PH
1775 cd = getheader('Content-Disposition')
1776 if cd:
1777 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1778 if m:
1779 e = determine_ext(m.group('filename'), default_ext=None)
1780 if e:
1781 return e
1782
c460bdd5 1783 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1784
1785
1786def age_restricted(content_limit, age_limit):
1787 """ Returns True iff the content should be blocked """
1788
1789 if age_limit is None: # No limit set
1790 return False
1791 if content_limit is None:
1792 return False # Content available for everyone
1793 return age_limit < content_limit
61ca9a80
PH
1794
1795
1796def is_html(first_bytes):
1797 """ Detect whether a file contains HTML by examining its first bytes. """
1798
1799 BOMS = [
1800 (b'\xef\xbb\xbf', 'utf-8'),
1801 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1802 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1803 (b'\xff\xfe', 'utf-16-le'),
1804 (b'\xfe\xff', 'utf-16-be'),
1805 ]
1806 for bom, enc in BOMS:
1807 if first_bytes.startswith(bom):
1808 s = first_bytes[len(bom):].decode(enc, 'replace')
1809 break
1810 else:
1811 s = first_bytes.decode('utf-8', 'replace')
1812
1813 return re.match(r'^\s*<', s)
a055469f
PH
1814
1815
1816def determine_protocol(info_dict):
1817 protocol = info_dict.get('protocol')
1818 if protocol is not None:
1819 return protocol
1820
1821 url = info_dict['url']
1822 if url.startswith('rtmp'):
1823 return 'rtmp'
1824 elif url.startswith('mms'):
1825 return 'mms'
1826 elif url.startswith('rtsp'):
1827 return 'rtsp'
1828
1829 ext = determine_ext(url)
1830 if ext == 'm3u8':
1831 return 'm3u8'
1832 elif ext == 'f4m':
1833 return 'f4m'
1834
1835 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1836
1837
1838def render_table(header_row, data):
1839 """ Render a list of rows, each as a list of values """
1840 table = [header_row] + data
1841 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1842 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1843 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1844
1845
1846def _match_one(filter_part, dct):
1847 COMPARISON_OPERATORS = {
1848 '<': operator.lt,
1849 '<=': operator.le,
1850 '>': operator.gt,
1851 '>=': operator.ge,
1852 '=': operator.eq,
1853 '!=': operator.ne,
1854 }
1855 operator_rex = re.compile(r'''(?x)\s*
1856 (?P<key>[a-z_]+)
1857 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1858 (?:
1859 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1860 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1861 )
1862 \s*$
1863 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1864 m = operator_rex.search(filter_part)
1865 if m:
1866 op = COMPARISON_OPERATORS[m.group('op')]
1867 if m.group('strval') is not None:
1868 if m.group('op') not in ('=', '!='):
1869 raise ValueError(
1870 'Operator %s does not support string values!' % m.group('op'))
1871 comparison_value = m.group('strval')
1872 else:
1873 try:
1874 comparison_value = int(m.group('intval'))
1875 except ValueError:
1876 comparison_value = parse_filesize(m.group('intval'))
1877 if comparison_value is None:
1878 comparison_value = parse_filesize(m.group('intval') + 'B')
1879 if comparison_value is None:
1880 raise ValueError(
1881 'Invalid integer value %r in filter part %r' % (
1882 m.group('intval'), filter_part))
1883 actual_value = dct.get(m.group('key'))
1884 if actual_value is None:
1885 return m.group('none_inclusive')
1886 return op(actual_value, comparison_value)
1887
1888 UNARY_OPERATORS = {
1889 '': lambda v: v is not None,
1890 '!': lambda v: v is None,
1891 }
1892 operator_rex = re.compile(r'''(?x)\s*
1893 (?P<op>%s)\s*(?P<key>[a-z_]+)
1894 \s*$
1895 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1896 m = operator_rex.search(filter_part)
1897 if m:
1898 op = UNARY_OPERATORS[m.group('op')]
1899 actual_value = dct.get(m.group('key'))
1900 return op(actual_value)
1901
1902 raise ValueError('Invalid filter part %r' % filter_part)
1903
1904
1905def match_str(filter_str, dct):
1906 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1907
1908 return all(
1909 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1910
1911
1912def match_filter_func(filter_str):
1913 def _match_func(info_dict):
1914 if match_str(filter_str, info_dict):
1915 return None
1916 else:
1917 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1918 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1919 return _match_func
91410c9b
PH
1920
1921
bf6427d2
YCH
1922def parse_dfxp_time_expr(time_expr):
1923 if not time_expr:
1924 return 0.0
1925
1926 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1927 if mobj:
1928 return float(mobj.group('time_offset'))
1929
1930 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1931 if mobj:
1932 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1933
1934
c1c924ab
YCH
1935def srt_subtitles_timecode(seconds):
1936 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
1937
1938
1939def dfxp2srt(dfxp_data):
4e335771
YCH
1940 _x = functools.partial(xpath_with_ns, ns_map={
1941 'ttml': 'http://www.w3.org/ns/ttml',
1942 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
1943 })
bf6427d2
YCH
1944
1945 def parse_node(node):
1946 str_or_empty = functools.partial(str_or_none, default='')
1947
1948 out = str_or_empty(node.text)
1949
1950 for child in node:
4e335771 1951 if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
bf6427d2 1952 out += '\n' + str_or_empty(child.tail)
4e335771 1953 elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
bf6427d2
YCH
1954 out += str_or_empty(parse_node(child))
1955 else:
1956 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1957
1958 return out
1959
1960 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1961 out = []
4e335771 1962 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
1963
1964 if not paras:
1965 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
1966
1967 for para, index in zip(paras, itertools.count(1)):
7dff0363
YCH
1968 begin_time = parse_dfxp_time_expr(para.attrib['begin'])
1969 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
1970 if not end_time:
1971 end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
bf6427d2
YCH
1972 out.append('%d\n%s --> %s\n%s\n\n' % (
1973 index,
c1c924ab
YCH
1974 srt_subtitles_timecode(begin_time),
1975 srt_subtitles_timecode(end_time),
bf6427d2
YCH
1976 parse_node(para)))
1977
1978 return ''.join(out)
1979
1980
66e289ba
S
1981def cli_option(params, command_option, param):
1982 param = params.get(param)
1983 return [command_option, param] if param is not None else []
1984
1985
1986def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
1987 param = params.get(param)
1988 assert isinstance(param, bool)
1989 if separator:
1990 return [command_option + separator + (true_value if param else false_value)]
1991 return [command_option, true_value if param else false_value]
1992
1993
1994def cli_valueless_option(params, command_option, param, expected_value=True):
1995 param = params.get(param)
1996 return [command_option] if param == expected_value else []
1997
1998
1999def cli_configuration_args(params, param, default=[]):
2000 ex_args = params.get(param)
2001 if ex_args is None:
2002 return default
2003 assert isinstance(ex_args, list)
2004 return ex_args
2005
2006
39672624
YCH
2007class ISO639Utils(object):
2008 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2009 _lang_map = {
2010 'aa': 'aar',
2011 'ab': 'abk',
2012 'ae': 'ave',
2013 'af': 'afr',
2014 'ak': 'aka',
2015 'am': 'amh',
2016 'an': 'arg',
2017 'ar': 'ara',
2018 'as': 'asm',
2019 'av': 'ava',
2020 'ay': 'aym',
2021 'az': 'aze',
2022 'ba': 'bak',
2023 'be': 'bel',
2024 'bg': 'bul',
2025 'bh': 'bih',
2026 'bi': 'bis',
2027 'bm': 'bam',
2028 'bn': 'ben',
2029 'bo': 'bod',
2030 'br': 'bre',
2031 'bs': 'bos',
2032 'ca': 'cat',
2033 'ce': 'che',
2034 'ch': 'cha',
2035 'co': 'cos',
2036 'cr': 'cre',
2037 'cs': 'ces',
2038 'cu': 'chu',
2039 'cv': 'chv',
2040 'cy': 'cym',
2041 'da': 'dan',
2042 'de': 'deu',
2043 'dv': 'div',
2044 'dz': 'dzo',
2045 'ee': 'ewe',
2046 'el': 'ell',
2047 'en': 'eng',
2048 'eo': 'epo',
2049 'es': 'spa',
2050 'et': 'est',
2051 'eu': 'eus',
2052 'fa': 'fas',
2053 'ff': 'ful',
2054 'fi': 'fin',
2055 'fj': 'fij',
2056 'fo': 'fao',
2057 'fr': 'fra',
2058 'fy': 'fry',
2059 'ga': 'gle',
2060 'gd': 'gla',
2061 'gl': 'glg',
2062 'gn': 'grn',
2063 'gu': 'guj',
2064 'gv': 'glv',
2065 'ha': 'hau',
2066 'he': 'heb',
2067 'hi': 'hin',
2068 'ho': 'hmo',
2069 'hr': 'hrv',
2070 'ht': 'hat',
2071 'hu': 'hun',
2072 'hy': 'hye',
2073 'hz': 'her',
2074 'ia': 'ina',
2075 'id': 'ind',
2076 'ie': 'ile',
2077 'ig': 'ibo',
2078 'ii': 'iii',
2079 'ik': 'ipk',
2080 'io': 'ido',
2081 'is': 'isl',
2082 'it': 'ita',
2083 'iu': 'iku',
2084 'ja': 'jpn',
2085 'jv': 'jav',
2086 'ka': 'kat',
2087 'kg': 'kon',
2088 'ki': 'kik',
2089 'kj': 'kua',
2090 'kk': 'kaz',
2091 'kl': 'kal',
2092 'km': 'khm',
2093 'kn': 'kan',
2094 'ko': 'kor',
2095 'kr': 'kau',
2096 'ks': 'kas',
2097 'ku': 'kur',
2098 'kv': 'kom',
2099 'kw': 'cor',
2100 'ky': 'kir',
2101 'la': 'lat',
2102 'lb': 'ltz',
2103 'lg': 'lug',
2104 'li': 'lim',
2105 'ln': 'lin',
2106 'lo': 'lao',
2107 'lt': 'lit',
2108 'lu': 'lub',
2109 'lv': 'lav',
2110 'mg': 'mlg',
2111 'mh': 'mah',
2112 'mi': 'mri',
2113 'mk': 'mkd',
2114 'ml': 'mal',
2115 'mn': 'mon',
2116 'mr': 'mar',
2117 'ms': 'msa',
2118 'mt': 'mlt',
2119 'my': 'mya',
2120 'na': 'nau',
2121 'nb': 'nob',
2122 'nd': 'nde',
2123 'ne': 'nep',
2124 'ng': 'ndo',
2125 'nl': 'nld',
2126 'nn': 'nno',
2127 'no': 'nor',
2128 'nr': 'nbl',
2129 'nv': 'nav',
2130 'ny': 'nya',
2131 'oc': 'oci',
2132 'oj': 'oji',
2133 'om': 'orm',
2134 'or': 'ori',
2135 'os': 'oss',
2136 'pa': 'pan',
2137 'pi': 'pli',
2138 'pl': 'pol',
2139 'ps': 'pus',
2140 'pt': 'por',
2141 'qu': 'que',
2142 'rm': 'roh',
2143 'rn': 'run',
2144 'ro': 'ron',
2145 'ru': 'rus',
2146 'rw': 'kin',
2147 'sa': 'san',
2148 'sc': 'srd',
2149 'sd': 'snd',
2150 'se': 'sme',
2151 'sg': 'sag',
2152 'si': 'sin',
2153 'sk': 'slk',
2154 'sl': 'slv',
2155 'sm': 'smo',
2156 'sn': 'sna',
2157 'so': 'som',
2158 'sq': 'sqi',
2159 'sr': 'srp',
2160 'ss': 'ssw',
2161 'st': 'sot',
2162 'su': 'sun',
2163 'sv': 'swe',
2164 'sw': 'swa',
2165 'ta': 'tam',
2166 'te': 'tel',
2167 'tg': 'tgk',
2168 'th': 'tha',
2169 'ti': 'tir',
2170 'tk': 'tuk',
2171 'tl': 'tgl',
2172 'tn': 'tsn',
2173 'to': 'ton',
2174 'tr': 'tur',
2175 'ts': 'tso',
2176 'tt': 'tat',
2177 'tw': 'twi',
2178 'ty': 'tah',
2179 'ug': 'uig',
2180 'uk': 'ukr',
2181 'ur': 'urd',
2182 'uz': 'uzb',
2183 've': 'ven',
2184 'vi': 'vie',
2185 'vo': 'vol',
2186 'wa': 'wln',
2187 'wo': 'wol',
2188 'xh': 'xho',
2189 'yi': 'yid',
2190 'yo': 'yor',
2191 'za': 'zha',
2192 'zh': 'zho',
2193 'zu': 'zul',
2194 }
2195
2196 @classmethod
2197 def short2long(cls, code):
2198 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2199 return cls._lang_map.get(code[:2])
2200
2201 @classmethod
2202 def long2short(cls, code):
2203 """Convert language code from ISO 639-2/T to ISO 639-1"""
2204 for short_name, long_name in cls._lang_map.items():
2205 if long_name == code:
2206 return short_name
2207
2208
4eb10f66
YCH
2209class ISO3166Utils(object):
2210 # From http://data.okfn.org/data/core/country-list
2211 _country_map = {
2212 'AF': 'Afghanistan',
2213 'AX': 'Åland Islands',
2214 'AL': 'Albania',
2215 'DZ': 'Algeria',
2216 'AS': 'American Samoa',
2217 'AD': 'Andorra',
2218 'AO': 'Angola',
2219 'AI': 'Anguilla',
2220 'AQ': 'Antarctica',
2221 'AG': 'Antigua and Barbuda',
2222 'AR': 'Argentina',
2223 'AM': 'Armenia',
2224 'AW': 'Aruba',
2225 'AU': 'Australia',
2226 'AT': 'Austria',
2227 'AZ': 'Azerbaijan',
2228 'BS': 'Bahamas',
2229 'BH': 'Bahrain',
2230 'BD': 'Bangladesh',
2231 'BB': 'Barbados',
2232 'BY': 'Belarus',
2233 'BE': 'Belgium',
2234 'BZ': 'Belize',
2235 'BJ': 'Benin',
2236 'BM': 'Bermuda',
2237 'BT': 'Bhutan',
2238 'BO': 'Bolivia, Plurinational State of',
2239 'BQ': 'Bonaire, Sint Eustatius and Saba',
2240 'BA': 'Bosnia and Herzegovina',
2241 'BW': 'Botswana',
2242 'BV': 'Bouvet Island',
2243 'BR': 'Brazil',
2244 'IO': 'British Indian Ocean Territory',
2245 'BN': 'Brunei Darussalam',
2246 'BG': 'Bulgaria',
2247 'BF': 'Burkina Faso',
2248 'BI': 'Burundi',
2249 'KH': 'Cambodia',
2250 'CM': 'Cameroon',
2251 'CA': 'Canada',
2252 'CV': 'Cape Verde',
2253 'KY': 'Cayman Islands',
2254 'CF': 'Central African Republic',
2255 'TD': 'Chad',
2256 'CL': 'Chile',
2257 'CN': 'China',
2258 'CX': 'Christmas Island',
2259 'CC': 'Cocos (Keeling) Islands',
2260 'CO': 'Colombia',
2261 'KM': 'Comoros',
2262 'CG': 'Congo',
2263 'CD': 'Congo, the Democratic Republic of the',
2264 'CK': 'Cook Islands',
2265 'CR': 'Costa Rica',
2266 'CI': 'Côte d\'Ivoire',
2267 'HR': 'Croatia',
2268 'CU': 'Cuba',
2269 'CW': 'Curaçao',
2270 'CY': 'Cyprus',
2271 'CZ': 'Czech Republic',
2272 'DK': 'Denmark',
2273 'DJ': 'Djibouti',
2274 'DM': 'Dominica',
2275 'DO': 'Dominican Republic',
2276 'EC': 'Ecuador',
2277 'EG': 'Egypt',
2278 'SV': 'El Salvador',
2279 'GQ': 'Equatorial Guinea',
2280 'ER': 'Eritrea',
2281 'EE': 'Estonia',
2282 'ET': 'Ethiopia',
2283 'FK': 'Falkland Islands (Malvinas)',
2284 'FO': 'Faroe Islands',
2285 'FJ': 'Fiji',
2286 'FI': 'Finland',
2287 'FR': 'France',
2288 'GF': 'French Guiana',
2289 'PF': 'French Polynesia',
2290 'TF': 'French Southern Territories',
2291 'GA': 'Gabon',
2292 'GM': 'Gambia',
2293 'GE': 'Georgia',
2294 'DE': 'Germany',
2295 'GH': 'Ghana',
2296 'GI': 'Gibraltar',
2297 'GR': 'Greece',
2298 'GL': 'Greenland',
2299 'GD': 'Grenada',
2300 'GP': 'Guadeloupe',
2301 'GU': 'Guam',
2302 'GT': 'Guatemala',
2303 'GG': 'Guernsey',
2304 'GN': 'Guinea',
2305 'GW': 'Guinea-Bissau',
2306 'GY': 'Guyana',
2307 'HT': 'Haiti',
2308 'HM': 'Heard Island and McDonald Islands',
2309 'VA': 'Holy See (Vatican City State)',
2310 'HN': 'Honduras',
2311 'HK': 'Hong Kong',
2312 'HU': 'Hungary',
2313 'IS': 'Iceland',
2314 'IN': 'India',
2315 'ID': 'Indonesia',
2316 'IR': 'Iran, Islamic Republic of',
2317 'IQ': 'Iraq',
2318 'IE': 'Ireland',
2319 'IM': 'Isle of Man',
2320 'IL': 'Israel',
2321 'IT': 'Italy',
2322 'JM': 'Jamaica',
2323 'JP': 'Japan',
2324 'JE': 'Jersey',
2325 'JO': 'Jordan',
2326 'KZ': 'Kazakhstan',
2327 'KE': 'Kenya',
2328 'KI': 'Kiribati',
2329 'KP': 'Korea, Democratic People\'s Republic of',
2330 'KR': 'Korea, Republic of',
2331 'KW': 'Kuwait',
2332 'KG': 'Kyrgyzstan',
2333 'LA': 'Lao People\'s Democratic Republic',
2334 'LV': 'Latvia',
2335 'LB': 'Lebanon',
2336 'LS': 'Lesotho',
2337 'LR': 'Liberia',
2338 'LY': 'Libya',
2339 'LI': 'Liechtenstein',
2340 'LT': 'Lithuania',
2341 'LU': 'Luxembourg',
2342 'MO': 'Macao',
2343 'MK': 'Macedonia, the Former Yugoslav Republic of',
2344 'MG': 'Madagascar',
2345 'MW': 'Malawi',
2346 'MY': 'Malaysia',
2347 'MV': 'Maldives',
2348 'ML': 'Mali',
2349 'MT': 'Malta',
2350 'MH': 'Marshall Islands',
2351 'MQ': 'Martinique',
2352 'MR': 'Mauritania',
2353 'MU': 'Mauritius',
2354 'YT': 'Mayotte',
2355 'MX': 'Mexico',
2356 'FM': 'Micronesia, Federated States of',
2357 'MD': 'Moldova, Republic of',
2358 'MC': 'Monaco',
2359 'MN': 'Mongolia',
2360 'ME': 'Montenegro',
2361 'MS': 'Montserrat',
2362 'MA': 'Morocco',
2363 'MZ': 'Mozambique',
2364 'MM': 'Myanmar',
2365 'NA': 'Namibia',
2366 'NR': 'Nauru',
2367 'NP': 'Nepal',
2368 'NL': 'Netherlands',
2369 'NC': 'New Caledonia',
2370 'NZ': 'New Zealand',
2371 'NI': 'Nicaragua',
2372 'NE': 'Niger',
2373 'NG': 'Nigeria',
2374 'NU': 'Niue',
2375 'NF': 'Norfolk Island',
2376 'MP': 'Northern Mariana Islands',
2377 'NO': 'Norway',
2378 'OM': 'Oman',
2379 'PK': 'Pakistan',
2380 'PW': 'Palau',
2381 'PS': 'Palestine, State of',
2382 'PA': 'Panama',
2383 'PG': 'Papua New Guinea',
2384 'PY': 'Paraguay',
2385 'PE': 'Peru',
2386 'PH': 'Philippines',
2387 'PN': 'Pitcairn',
2388 'PL': 'Poland',
2389 'PT': 'Portugal',
2390 'PR': 'Puerto Rico',
2391 'QA': 'Qatar',
2392 'RE': 'Réunion',
2393 'RO': 'Romania',
2394 'RU': 'Russian Federation',
2395 'RW': 'Rwanda',
2396 'BL': 'Saint Barthélemy',
2397 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2398 'KN': 'Saint Kitts and Nevis',
2399 'LC': 'Saint Lucia',
2400 'MF': 'Saint Martin (French part)',
2401 'PM': 'Saint Pierre and Miquelon',
2402 'VC': 'Saint Vincent and the Grenadines',
2403 'WS': 'Samoa',
2404 'SM': 'San Marino',
2405 'ST': 'Sao Tome and Principe',
2406 'SA': 'Saudi Arabia',
2407 'SN': 'Senegal',
2408 'RS': 'Serbia',
2409 'SC': 'Seychelles',
2410 'SL': 'Sierra Leone',
2411 'SG': 'Singapore',
2412 'SX': 'Sint Maarten (Dutch part)',
2413 'SK': 'Slovakia',
2414 'SI': 'Slovenia',
2415 'SB': 'Solomon Islands',
2416 'SO': 'Somalia',
2417 'ZA': 'South Africa',
2418 'GS': 'South Georgia and the South Sandwich Islands',
2419 'SS': 'South Sudan',
2420 'ES': 'Spain',
2421 'LK': 'Sri Lanka',
2422 'SD': 'Sudan',
2423 'SR': 'Suriname',
2424 'SJ': 'Svalbard and Jan Mayen',
2425 'SZ': 'Swaziland',
2426 'SE': 'Sweden',
2427 'CH': 'Switzerland',
2428 'SY': 'Syrian Arab Republic',
2429 'TW': 'Taiwan, Province of China',
2430 'TJ': 'Tajikistan',
2431 'TZ': 'Tanzania, United Republic of',
2432 'TH': 'Thailand',
2433 'TL': 'Timor-Leste',
2434 'TG': 'Togo',
2435 'TK': 'Tokelau',
2436 'TO': 'Tonga',
2437 'TT': 'Trinidad and Tobago',
2438 'TN': 'Tunisia',
2439 'TR': 'Turkey',
2440 'TM': 'Turkmenistan',
2441 'TC': 'Turks and Caicos Islands',
2442 'TV': 'Tuvalu',
2443 'UG': 'Uganda',
2444 'UA': 'Ukraine',
2445 'AE': 'United Arab Emirates',
2446 'GB': 'United Kingdom',
2447 'US': 'United States',
2448 'UM': 'United States Minor Outlying Islands',
2449 'UY': 'Uruguay',
2450 'UZ': 'Uzbekistan',
2451 'VU': 'Vanuatu',
2452 'VE': 'Venezuela, Bolivarian Republic of',
2453 'VN': 'Viet Nam',
2454 'VG': 'Virgin Islands, British',
2455 'VI': 'Virgin Islands, U.S.',
2456 'WF': 'Wallis and Futuna',
2457 'EH': 'Western Sahara',
2458 'YE': 'Yemen',
2459 'ZM': 'Zambia',
2460 'ZW': 'Zimbabwe',
2461 }
2462
2463 @classmethod
2464 def short2full(cls, code):
2465 """Convert an ISO 3166-2 country code to the corresponding full name"""
2466 return cls._country_map.get(code.upper())
2467
2468
91410c9b 2469class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2470 def __init__(self, proxies=None):
2471 # Set default handlers
2472 for type in ('http', 'https'):
2473 setattr(self, '%s_open' % type,
2474 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2475 meth(r, proxy, type))
2476 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2477
91410c9b 2478 def proxy_open(self, req, proxy, type):
2461f79d 2479 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2480 if req_proxy is not None:
2481 proxy = req_proxy
2461f79d
PH
2482 del req.headers['Ytdl-request-proxy']
2483
2484 if proxy == '__noproxy__':
2485 return None # No Proxy
91410c9b
PH
2486 return compat_urllib_request.ProxyHandler.proxy_open(
2487 self, req, proxy, type)