]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[clyp] Add extractor
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
912b38b4 7import calendar
676eb3f2 8import codecs
62e609ab 9import contextlib
e3946f98 10import ctypes
c496ca96
PH
11import datetime
12import email.utils
f45c185f 13import errno
be4a824d 14import functools
d77c3dfd 15import gzip
b7ab0590 16import itertools
03f9daab 17import io
f4bfd65f 18import json
d77c3dfd 19import locale
02dbf93f 20import math
347de493 21import operator
d77c3dfd 22import os
4eb7f1d1 23import pipes
c496ca96 24import platform
d77c3dfd 25import re
13ebea79 26import ssl
c496ca96 27import socket
b53466e1 28import struct
1c088fa8 29import subprocess
d77c3dfd 30import sys
181c8655 31import tempfile
01951dda 32import traceback
bcf89ce6 33import xml.etree.ElementTree
d77c3dfd 34import zlib
d77c3dfd 35
8c25f81b 36from .compat import (
8f9312c3 37 compat_basestring,
8c25f81b 38 compat_chr,
8c25f81b 39 compat_html_entities,
be4a824d 40 compat_http_client,
c86b6142 41 compat_kwargs,
8c25f81b 42 compat_parse_qs,
be4a824d 43 compat_socket_create_connection,
8c25f81b
PH
44 compat_str,
45 compat_urllib_error,
46 compat_urllib_parse,
47 compat_urllib_parse_urlparse,
48 compat_urllib_request,
49 compat_urlparse,
7d4111ed 50 shlex_quote,
8c25f81b 51)
4644ac55
S
52
53
468e2e92
FV
54# This is not clearly defined otherwise
55compiled_regex_type = type(re.compile(''))
56
3e669f36 57std_headers = {
18313934 58 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
59 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
60 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
61 'Accept-Encoding': 'gzip, deflate',
62 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 63}
f427df17 64
5f6a1245 65
bf42a990
S
66NO_DEFAULT = object()
67
7105440c
YCH
68ENGLISH_MONTH_NAMES = [
69 'January', 'February', 'March', 'April', 'May', 'June',
70 'July', 'August', 'September', 'October', 'November', 'December']
71
72
d77c3dfd 73def preferredencoding():
59ae15a5 74 """Get preferred encoding.
d77c3dfd 75
59ae15a5
PH
76 Returns the best encoding scheme for the system, based on
77 locale.getpreferredencoding() and some further tweaks.
78 """
79 try:
80 pref = locale.getpreferredencoding()
28e614de 81 'TEST'.encode(pref)
70a1165b 82 except Exception:
59ae15a5 83 pref = 'UTF-8'
bae611f2 84
59ae15a5 85 return pref
d77c3dfd 86
f4bfd65f 87
181c8655 88def write_json_file(obj, fn):
1394646a 89 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 90
92120217 91 fn = encodeFilename(fn)
61ee5aeb 92 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
93 encoding = get_filesystem_encoding()
94 # os.path.basename returns a bytes object, but NamedTemporaryFile
95 # will fail if the filename contains non ascii characters unless we
96 # use a unicode object
97 path_basename = lambda f: os.path.basename(fn).decode(encoding)
98 # the same for os.path.dirname
99 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
100 else:
101 path_basename = os.path.basename
102 path_dirname = os.path.dirname
103
73159f99
S
104 args = {
105 'suffix': '.tmp',
ec5f6016
JMF
106 'prefix': path_basename(fn) + '.',
107 'dir': path_dirname(fn),
73159f99
S
108 'delete': False,
109 }
110
181c8655
PH
111 # In Python 2.x, json.dump expects a bytestream.
112 # In Python 3.x, it writes to a character stream
113 if sys.version_info < (3, 0):
73159f99 114 args['mode'] = 'wb'
181c8655 115 else:
73159f99
S
116 args.update({
117 'mode': 'w',
118 'encoding': 'utf-8',
119 })
120
c86b6142 121 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
122
123 try:
124 with tf:
125 json.dump(obj, tf)
1394646a
IK
126 if sys.platform == 'win32':
127 # Need to remove existing file on Windows, else os.rename raises
128 # WindowsError or FileExistsError.
129 try:
130 os.unlink(fn)
131 except OSError:
132 pass
181c8655 133 os.rename(tf.name, fn)
70a1165b 134 except Exception:
181c8655
PH
135 try:
136 os.remove(tf.name)
137 except OSError:
138 pass
139 raise
140
141
142if sys.version_info >= (2, 7):
ee114368 143 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 144 """ Find the xpath xpath[@key=val] """
5d2354f1 145 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
146 if val:
147 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
148 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
149 return node.find(expr)
150else:
ee114368 151 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
152 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
153 # .//node does not match if a node is a direct child of . !
8f9312c3 154 if isinstance(xpath, compat_str):
4eefbfdb
PH
155 xpath = xpath.encode('ascii')
156
59ae56fa 157 for f in node.findall(xpath):
ee114368
S
158 if key not in f.attrib:
159 continue
160 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
161 return f
162 return None
163
d7e66d39
JMF
164# On python2.6 the xml.etree.ElementTree.Element methods don't support
165# the namespace parameter
5f6a1245
JW
166
167
d7e66d39
JMF
168def xpath_with_ns(path, ns_map):
169 components = [c.split(':') for c in path.split('/')]
170 replaced = []
171 for c in components:
172 if len(c) == 1:
173 replaced.append(c[0])
174 else:
175 ns, tag = c
176 replaced.append('{%s}%s' % (ns_map[ns], tag))
177 return '/'.join(replaced)
178
d77c3dfd 179
a41fb80c 180def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
d74bebd5
PH
181 if sys.version_info < (2, 7): # Crazy 2.6
182 xpath = xpath.encode('ascii')
183
bf0ff932 184 n = node.find(xpath)
8e636da4 185 if n is None:
bf42a990
S
186 if default is not NO_DEFAULT:
187 return default
188 elif fatal:
bf0ff932
PH
189 name = xpath if name is None else name
190 raise ExtractorError('Could not find XML element %s' % name)
191 else:
192 return None
a41fb80c
S
193 return n
194
195
196def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
197 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
198 if n is None or n == default:
199 return n
200 if n.text is None:
201 if default is not NO_DEFAULT:
202 return default
203 elif fatal:
204 name = xpath if name is None else name
205 raise ExtractorError('Could not find XML element\'s text %s' % name)
206 else:
207 return None
208 return n.text
a41fb80c
S
209
210
211def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
212 n = find_xpath_attr(node, xpath, key)
213 if n is None:
214 if default is not NO_DEFAULT:
215 return default
216 elif fatal:
217 name = '%s[@%s]' % (xpath, key) if name is None else name
218 raise ExtractorError('Could not find XML attribute %s' % name)
219 else:
220 return None
221 return n.attrib[key]
bf0ff932
PH
222
223
9e6dd238 224def get_element_by_id(id, html):
43e8fafd
ND
225 """Return the content of the tag with the specified ID in the passed HTML document"""
226 return get_element_by_attribute("id", id, html)
227
12ea2f30 228
43e8fafd
ND
229def get_element_by_attribute(attribute, value, html):
230 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 231
38285056
PH
232 m = re.search(r'''(?xs)
233 <([a-zA-Z0-9:._-]+)
234 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
235 \s+%s=['"]?%s['"]?
236 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
237 \s*>
238 (?P<content>.*?)
239 </\1>
240 ''' % (re.escape(attribute), re.escape(value)), html)
241
242 if not m:
243 return None
244 res = m.group('content')
245
246 if res.startswith('"') or res.startswith("'"):
247 res = res[1:-1]
a921f407 248
38285056 249 return unescapeHTML(res)
a921f407 250
9e6dd238
FV
251
252def clean_html(html):
59ae15a5 253 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
254
255 if html is None: # Convenience for sanitizing descriptions etc.
256 return html
257
59ae15a5
PH
258 # Newline vs <br />
259 html = html.replace('\n', ' ')
6b3aef80
FV
260 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
261 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
262 # Strip html tags
263 html = re.sub('<.*?>', '', html)
264 # Replace html entities
265 html = unescapeHTML(html)
7decf895 266 return html.strip()
9e6dd238
FV
267
268
d77c3dfd 269def sanitize_open(filename, open_mode):
59ae15a5
PH
270 """Try to open the given filename, and slightly tweak it if this fails.
271
272 Attempts to open the given filename. If this fails, it tries to change
273 the filename slightly, step by step, until it's either able to open it
274 or it fails and raises a final exception, like the standard open()
275 function.
276
277 It returns the tuple (stream, definitive_file_name).
278 """
279 try:
28e614de 280 if filename == '-':
59ae15a5
PH
281 if sys.platform == 'win32':
282 import msvcrt
283 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 284 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
285 stream = open(encodeFilename(filename), open_mode)
286 return (stream, filename)
287 except (IOError, OSError) as err:
f45c185f
PH
288 if err.errno in (errno.EACCES,):
289 raise
59ae15a5 290
f45c185f 291 # In case of error, try to remove win32 forbidden chars
d55de57b 292 alt_filename = sanitize_path(filename)
f45c185f
PH
293 if alt_filename == filename:
294 raise
295 else:
296 # An exception here should be caught in the caller
d55de57b 297 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 298 return (stream, alt_filename)
d77c3dfd
FV
299
300
301def timeconvert(timestr):
59ae15a5
PH
302 """Convert RFC 2822 defined time string into system timestamp"""
303 timestamp = None
304 timetuple = email.utils.parsedate_tz(timestr)
305 if timetuple is not None:
306 timestamp = email.utils.mktime_tz(timetuple)
307 return timestamp
1c469a94 308
5f6a1245 309
796173d0 310def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
311 """Sanitizes a string so it could be used as part of a filename.
312 If restricted is set, use a stricter subset of allowed characters.
796173d0 313 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
314 """
315 def replace_insane(char):
316 if char == '?' or ord(char) < 32 or ord(char) == 127:
317 return ''
318 elif char == '"':
319 return '' if restricted else '\''
320 elif char == ':':
321 return '_-' if restricted else ' -'
322 elif char in '\\/|*<>':
323 return '_'
627dcfff 324 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
325 return '_'
326 if restricted and ord(char) > 127:
327 return '_'
328 return char
329
2aeb06d6
PH
330 # Handle timestamps
331 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 332 result = ''.join(map(replace_insane, s))
796173d0
PH
333 if not is_id:
334 while '__' in result:
335 result = result.replace('__', '_')
336 result = result.strip('_')
337 # Common case of "Foreign band name - English song title"
338 if restricted and result.startswith('-_'):
339 result = result[2:]
5a42414b
PH
340 if result.startswith('-'):
341 result = '_' + result[len('-'):]
a7440261 342 result = result.lstrip('.')
796173d0
PH
343 if not result:
344 result = '_'
59ae15a5 345 return result
d77c3dfd 346
5f6a1245 347
a2aaf4db
S
348def sanitize_path(s):
349 """Sanitizes and normalizes path on Windows"""
350 if sys.platform != 'win32':
351 return s
be531ef1
S
352 drive_or_unc, _ = os.path.splitdrive(s)
353 if sys.version_info < (2, 7) and not drive_or_unc:
354 drive_or_unc, _ = os.path.splitunc(s)
355 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
356 if drive_or_unc:
a2aaf4db
S
357 norm_path.pop(0)
358 sanitized_path = [
2ebfeaca 359 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|\.$)', '#', path_part)
a2aaf4db 360 for path_part in norm_path]
be531ef1
S
361 if drive_or_unc:
362 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
363 return os.path.join(*sanitized_path)
364
365
d77c3dfd 366def orderedSet(iterable):
59ae15a5
PH
367 """ Remove all duplicates from the input iterable """
368 res = []
369 for el in iterable:
370 if el not in res:
371 res.append(el)
372 return res
d77c3dfd 373
912b38b4 374
4e408e47
PH
375def _htmlentity_transform(entity):
376 """Transforms an HTML entity to a character."""
377 # Known non-numeric HTML entity
378 if entity in compat_html_entities.name2codepoint:
379 return compat_chr(compat_html_entities.name2codepoint[entity])
380
91757b0f 381 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
382 if mobj is not None:
383 numstr = mobj.group(1)
28e614de 384 if numstr.startswith('x'):
4e408e47 385 base = 16
28e614de 386 numstr = '0%s' % numstr
4e408e47
PH
387 else:
388 base = 10
389 return compat_chr(int(numstr, base))
390
391 # Unknown entity in name, return its literal representation
28e614de 392 return ('&%s;' % entity)
4e408e47
PH
393
394
d77c3dfd 395def unescapeHTML(s):
912b38b4
PH
396 if s is None:
397 return None
398 assert type(s) == compat_str
d77c3dfd 399
4e408e47
PH
400 return re.sub(
401 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 402
8bf48f23 403
aa49acd1
S
404def get_subprocess_encoding():
405 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
406 # For subprocess calls, encode with locale encoding
407 # Refer to http://stackoverflow.com/a/9951851/35070
408 encoding = preferredencoding()
409 else:
410 encoding = sys.getfilesystemencoding()
411 if encoding is None:
412 encoding = 'utf-8'
413 return encoding
414
415
8bf48f23 416def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
417 """
418 @param s The name of the file
419 """
d77c3dfd 420
8bf48f23 421 assert type(s) == compat_str
d77c3dfd 422
59ae15a5
PH
423 # Python 3 has a Unicode API
424 if sys.version_info >= (3, 0):
425 return s
0f00efed 426
aa49acd1
S
427 # Pass '' directly to use Unicode APIs on Windows 2000 and up
428 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
429 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
430 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
431 return s
432
433 return s.encode(get_subprocess_encoding(), 'ignore')
434
435
436def decodeFilename(b, for_subprocess=False):
437
438 if sys.version_info >= (3, 0):
439 return b
440
441 if not isinstance(b, bytes):
442 return b
443
444 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 445
f07b74fc
PH
446
447def encodeArgument(s):
448 if not isinstance(s, compat_str):
449 # Legacy code that uses byte strings
450 # Uncomment the following line after fixing all post processors
7af808a5 451 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
452 s = s.decode('ascii')
453 return encodeFilename(s, True)
454
455
aa49acd1
S
456def decodeArgument(b):
457 return decodeFilename(b, True)
458
459
8271226a
PH
460def decodeOption(optval):
461 if optval is None:
462 return optval
463 if isinstance(optval, bytes):
464 optval = optval.decode(preferredencoding())
465
466 assert isinstance(optval, compat_str)
467 return optval
1c256f70 468
5f6a1245 469
4539dd30
PH
470def formatSeconds(secs):
471 if secs > 3600:
472 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
473 elif secs > 60:
474 return '%d:%02d' % (secs // 60, secs % 60)
475 else:
476 return '%d' % secs
477
a0ddb8a2 478
be4a824d
PH
479def make_HTTPS_handler(params, **kwargs):
480 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 481 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 482 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 483 if opts_no_check_certificate:
be5f2c19 484 context.check_hostname = False
0db261ba 485 context.verify_mode = ssl.CERT_NONE
a2366922 486 try:
be4a824d 487 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
488 except TypeError:
489 # Python 2.7.8
490 # (create_default_context present but HTTPSHandler has no context=)
491 pass
492
493 if sys.version_info < (3, 2):
d7932313 494 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 495 else: # Python < 3.4
d7932313 496 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 497 context.verify_mode = (ssl.CERT_NONE
dca08720 498 if opts_no_check_certificate
ea6d901e 499 else ssl.CERT_REQUIRED)
303b479e 500 context.set_default_verify_paths()
be4a824d 501 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 502
732ea2f0 503
08f2a92c
JMF
504def bug_reports_message():
505 if ytdl_is_updateable():
506 update_cmd = 'type youtube-dl -U to update'
507 else:
508 update_cmd = 'see https://yt-dl.org/update on how to update'
509 msg = '; please report this issue on https://yt-dl.org/bug .'
510 msg += ' Make sure you are using the latest version; %s.' % update_cmd
511 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
512 return msg
513
514
1c256f70
PH
515class ExtractorError(Exception):
516 """Error during info extraction."""
5f6a1245 517
d11271dd 518 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
519 """ tb, if given, is the original traceback (so that it can be printed out).
520 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
521 """
522
523 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
524 expected = True
d11271dd
PH
525 if video_id is not None:
526 msg = video_id + ': ' + msg
410f3e73 527 if cause:
28e614de 528 msg += ' (caused by %r)' % cause
9a82b238 529 if not expected:
08f2a92c 530 msg += bug_reports_message()
1c256f70 531 super(ExtractorError, self).__init__(msg)
d5979c5d 532
1c256f70 533 self.traceback = tb
8cc83b8d 534 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 535 self.cause = cause
d11271dd 536 self.video_id = video_id
1c256f70 537
01951dda
PH
538 def format_traceback(self):
539 if self.traceback is None:
540 return None
28e614de 541 return ''.join(traceback.format_tb(self.traceback))
01951dda 542
1c256f70 543
416c7fcb
PH
544class UnsupportedError(ExtractorError):
545 def __init__(self, url):
546 super(UnsupportedError, self).__init__(
547 'Unsupported URL: %s' % url, expected=True)
548 self.url = url
549
550
55b3e45b
JMF
551class RegexNotFoundError(ExtractorError):
552 """Error when a regex didn't match"""
553 pass
554
555
d77c3dfd 556class DownloadError(Exception):
59ae15a5 557 """Download Error exception.
d77c3dfd 558
59ae15a5
PH
559 This exception may be thrown by FileDownloader objects if they are not
560 configured to continue on errors. They will contain the appropriate
561 error message.
562 """
5f6a1245 563
8cc83b8d
FV
564 def __init__(self, msg, exc_info=None):
565 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
566 super(DownloadError, self).__init__(msg)
567 self.exc_info = exc_info
d77c3dfd
FV
568
569
570class SameFileError(Exception):
59ae15a5 571 """Same File exception.
d77c3dfd 572
59ae15a5
PH
573 This exception will be thrown by FileDownloader objects if they detect
574 multiple files would have to be downloaded to the same file on disk.
575 """
576 pass
d77c3dfd
FV
577
578
579class PostProcessingError(Exception):
59ae15a5 580 """Post Processing exception.
d77c3dfd 581
59ae15a5
PH
582 This exception may be raised by PostProcessor's .run() method to
583 indicate an error in the postprocessing task.
584 """
5f6a1245 585
7851b379
PH
586 def __init__(self, msg):
587 self.msg = msg
d77c3dfd 588
5f6a1245 589
d77c3dfd 590class MaxDownloadsReached(Exception):
59ae15a5
PH
591 """ --max-downloads limit has been reached. """
592 pass
d77c3dfd
FV
593
594
595class UnavailableVideoError(Exception):
59ae15a5 596 """Unavailable Format exception.
d77c3dfd 597
59ae15a5
PH
598 This exception will be thrown when a video is requested
599 in a format that is not available for that video.
600 """
601 pass
d77c3dfd
FV
602
603
604class ContentTooShortError(Exception):
59ae15a5 605 """Content Too Short exception.
d77c3dfd 606
59ae15a5
PH
607 This exception may be raised by FileDownloader objects when a file they
608 download is too small for what the server announced first, indicating
609 the connection was probably interrupted.
610 """
d77c3dfd 611
59ae15a5 612 def __init__(self, downloaded, expected):
2c7ed247 613 # Both in bytes
59ae15a5
PH
614 self.downloaded = downloaded
615 self.expected = expected
d77c3dfd 616
5f6a1245 617
c5a59d93 618def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
619 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
620 # expected HTTP responses to meet HTTP/1.0 or later (see also
621 # https://github.com/rg3/youtube-dl/issues/6727)
622 if sys.version_info < (3, 0):
5a1a2e94 623 kwargs[b'strict'] = True
be4a824d
PH
624 hc = http_class(*args, **kwargs)
625 source_address = ydl_handler._params.get('source_address')
626 if source_address is not None:
627 sa = (source_address, 0)
628 if hasattr(hc, 'source_address'): # Python 2.7+
629 hc.source_address = sa
630 else: # Python 2.6
631 def _hc_connect(self, *args, **kwargs):
632 sock = compat_socket_create_connection(
633 (self.host, self.port), self.timeout, sa)
634 if is_https:
d7932313
PH
635 self.sock = ssl.wrap_socket(
636 sock, self.key_file, self.cert_file,
637 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
638 else:
639 self.sock = sock
640 hc.connect = functools.partial(_hc_connect, hc)
641
642 return hc
643
644
acebc9cd 645class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
646 """Handler for HTTP requests and responses.
647
648 This class, when installed with an OpenerDirector, automatically adds
649 the standard headers to every HTTP request and handles gzipped and
650 deflated responses from web servers. If compression is to be avoided in
651 a particular request, the original request in the program code only has
652 to include the HTTP header "Youtubedl-No-Compression", which will be
653 removed before making the real request.
654
655 Part of this code was copied from:
656
657 http://techknack.net/python-urllib2-handlers/
658
659 Andrew Rowls, the author of that code, agreed to release it to the
660 public domain.
661 """
662
be4a824d
PH
663 def __init__(self, params, *args, **kwargs):
664 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
665 self._params = params
666
667 def http_open(self, req):
668 return self.do_open(functools.partial(
c5a59d93 669 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
670 req)
671
59ae15a5
PH
672 @staticmethod
673 def deflate(data):
674 try:
675 return zlib.decompress(data, -zlib.MAX_WBITS)
676 except zlib.error:
677 return zlib.decompress(data)
678
679 @staticmethod
680 def addinfourl_wrapper(stream, headers, url, code):
681 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
682 return compat_urllib_request.addinfourl(stream, headers, url, code)
683 ret = compat_urllib_request.addinfourl(stream, headers, url)
684 ret.code = code
685 return ret
686
acebc9cd 687 def http_request(self, req):
51f267d9
S
688 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
689 # always respected by websites, some tend to give out URLs with non percent-encoded
690 # non-ASCII characters (see telemb.py, ard.py [#3412])
691 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
692 # To work around aforementioned issue we will replace request's original URL with
693 # percent-encoded one
694 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
695 # the code of this workaround has been moved here from YoutubeDL.urlopen()
696 url = req.get_full_url()
697 url_escaped = escape_url(url)
698
699 # Substitute URL if any change after escaping
700 if url != url_escaped:
701 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
702 new_req = req_type(
703 url_escaped, data=req.data, headers=req.headers,
704 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
705 new_req.timeout = req.timeout
706 req = new_req
707
33ac271b 708 for h, v in std_headers.items():
3d5f7a39
JK
709 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
710 # The dict keys are capitalized because of this bug by urllib
711 if h.capitalize() not in req.headers:
33ac271b 712 req.add_header(h, v)
59ae15a5
PH
713 if 'Youtubedl-no-compression' in req.headers:
714 if 'Accept-encoding' in req.headers:
715 del req.headers['Accept-encoding']
716 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
717
718 if sys.version_info < (2, 7) and '#' in req.get_full_url():
719 # Python 2.6 is brain-dead when it comes to fragments
720 req._Request__original = req._Request__original.partition('#')[0]
721 req._Request__r_type = req._Request__r_type.partition('#')[0]
722
59ae15a5
PH
723 return req
724
acebc9cd 725 def http_response(self, req, resp):
59ae15a5
PH
726 old_resp = resp
727 # gzip
728 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
729 content = resp.read()
730 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
731 try:
732 uncompressed = io.BytesIO(gz.read())
733 except IOError as original_ioerror:
734 # There may be junk add the end of the file
735 # See http://stackoverflow.com/q/4928560/35070 for details
736 for i in range(1, 1024):
737 try:
738 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
739 uncompressed = io.BytesIO(gz.read())
740 except IOError:
741 continue
742 break
743 else:
744 raise original_ioerror
745 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
746 resp.msg = old_resp.msg
747 # deflate
748 if resp.headers.get('Content-encoding', '') == 'deflate':
749 gz = io.BytesIO(self.deflate(resp.read()))
750 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
751 resp.msg = old_resp.msg
ad729172
S
752 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
753 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
754 if 300 <= resp.code < 400:
755 location = resp.headers.get('Location')
756 if location:
757 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
758 if sys.version_info >= (3, 0):
759 location = location.encode('iso-8859-1').decode('utf-8')
760 location_escaped = escape_url(location)
761 if location != location_escaped:
762 del resp.headers['Location']
763 resp.headers['Location'] = location_escaped
59ae15a5 764 return resp
0f8d03f8 765
acebc9cd
PH
766 https_request = http_request
767 https_response = http_response
bf50b038 768
5de90176 769
be4a824d
PH
770class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
771 def __init__(self, params, https_conn_class=None, *args, **kwargs):
772 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
773 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
774 self._params = params
775
776 def https_open(self, req):
4f264c02
JMF
777 kwargs = {}
778 if hasattr(self, '_context'): # python > 2.6
779 kwargs['context'] = self._context
780 if hasattr(self, '_check_hostname'): # python 3.x
781 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
782 return self.do_open(functools.partial(
783 _create_http_connection, self, self._https_conn_class, True),
4f264c02 784 req, **kwargs)
be4a824d
PH
785
786
a6420bf5
S
787class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
788 def __init__(self, cookiejar=None):
789 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
790
791 def http_response(self, request, response):
792 # Python 2 will choke on next HTTP request in row if there are non-ASCII
793 # characters in Set-Cookie HTTP header of last response (see
794 # https://github.com/rg3/youtube-dl/issues/6769).
795 # In order to at least prevent crashing we will percent encode Set-Cookie
796 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
797 # if sys.version_info < (3, 0) and response.headers:
798 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
799 # set_cookie = response.headers.get(set_cookie_header)
800 # if set_cookie:
801 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
802 # if set_cookie != set_cookie_escaped:
803 # del response.headers[set_cookie_header]
804 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
805 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
806
807 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
808 https_response = http_response
809
810
08b38d54 811def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
812 """ Return a UNIX timestamp from the given date """
813
814 if date_str is None:
815 return None
816
08b38d54
PH
817 if timezone is None:
818 m = re.search(
819 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
820 date_str)
821 if not m:
912b38b4
PH
822 timezone = datetime.timedelta()
823 else:
08b38d54
PH
824 date_str = date_str[:-len(m.group(0))]
825 if not m.group('sign'):
826 timezone = datetime.timedelta()
827 else:
828 sign = 1 if m.group('sign') == '+' else -1
829 timezone = datetime.timedelta(
830 hours=sign * int(m.group('hours')),
831 minutes=sign * int(m.group('minutes')))
6ad4013d 832 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 833 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
834 return calendar.timegm(dt.timetuple())
835
836
42bdd9d0 837def unified_strdate(date_str, day_first=True):
bf50b038 838 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
839
840 if date_str is None:
841 return None
bf50b038 842 upload_date = None
5f6a1245 843 # Replace commas
026fcc04 844 date_str = date_str.replace(',', ' ')
bf50b038 845 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
846 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
847 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 848 # Remove AM/PM + timezone
9bb8e0a3 849 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 850
19e1d359
JMF
851 format_expressions = [
852 '%d %B %Y',
0f99566c 853 '%d %b %Y',
19e1d359
JMF
854 '%B %d %Y',
855 '%b %d %Y',
78ff59d0
PP
856 '%b %dst %Y %I:%M%p',
857 '%b %dnd %Y %I:%M%p',
858 '%b %dth %Y %I:%M%p',
a69801e2 859 '%Y %m %d',
19e1d359 860 '%Y-%m-%d',
fe556f1b 861 '%Y/%m/%d',
19e1d359 862 '%Y/%m/%d %H:%M:%S',
5d73273f 863 '%Y-%m-%d %H:%M:%S',
e9be9a6a 864 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 865 '%d.%m.%Y %H:%M',
b047de6f 866 '%d.%m.%Y %H.%M',
19e1d359 867 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
868 '%Y-%m-%dT%H:%M:%S.%fZ',
869 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 870 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 871 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 872 '%Y-%m-%dT%H:%M',
19e1d359 873 ]
42bdd9d0
PH
874 if day_first:
875 format_expressions.extend([
79c21abb 876 '%d-%m-%Y',
776dc399
S
877 '%d.%m.%Y',
878 '%d/%m/%Y',
879 '%d/%m/%y',
42bdd9d0
PH
880 '%d/%m/%Y %H:%M:%S',
881 ])
882 else:
883 format_expressions.extend([
79c21abb 884 '%m-%d-%Y',
776dc399
S
885 '%m.%d.%Y',
886 '%m/%d/%Y',
887 '%m/%d/%y',
42bdd9d0
PH
888 '%m/%d/%Y %H:%M:%S',
889 ])
bf50b038
JMF
890 for expression in format_expressions:
891 try:
892 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 893 except ValueError:
bf50b038 894 pass
42393ce2
PH
895 if upload_date is None:
896 timetuple = email.utils.parsedate_tz(date_str)
897 if timetuple:
898 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
899 return upload_date
900
5f6a1245 901
28e614de 902def determine_ext(url, default_ext='unknown_video'):
f4776371
S
903 if url is None:
904 return default_ext
28e614de 905 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
906 if re.match(r'^[A-Za-z0-9]+$', guess):
907 return guess
908 else:
cbdbb766 909 return default_ext
73e79f2a 910
5f6a1245 911
d4051a8e 912def subtitles_filename(filename, sub_lang, sub_format):
28e614de 913 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 914
5f6a1245 915
bd558525 916def date_from_str(date_str):
37254abc
JMF
917 """
918 Return a datetime object from a string in the format YYYYMMDD or
919 (now|today)[+-][0-9](day|week|month|year)(s)?"""
920 today = datetime.date.today()
f8795e10 921 if date_str in ('now', 'today'):
37254abc 922 return today
f8795e10
PH
923 if date_str == 'yesterday':
924 return today - datetime.timedelta(days=1)
37254abc
JMF
925 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
926 if match is not None:
927 sign = match.group('sign')
928 time = int(match.group('time'))
929 if sign == '-':
930 time = -time
931 unit = match.group('unit')
5f6a1245 932 # A bad aproximation?
37254abc
JMF
933 if unit == 'month':
934 unit = 'day'
935 time *= 30
936 elif unit == 'year':
937 unit = 'day'
938 time *= 365
939 unit += 's'
940 delta = datetime.timedelta(**{unit: time})
941 return today + delta
bd558525 942 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
943
944
e63fc1be 945def hyphenate_date(date_str):
946 """
947 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
948 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
949 if match is not None:
950 return '-'.join(match.groups())
951 else:
952 return date_str
953
5f6a1245 954
bd558525
JMF
955class DateRange(object):
956 """Represents a time interval between two dates"""
5f6a1245 957
bd558525
JMF
958 def __init__(self, start=None, end=None):
959 """start and end must be strings in the format accepted by date"""
960 if start is not None:
961 self.start = date_from_str(start)
962 else:
963 self.start = datetime.datetime.min.date()
964 if end is not None:
965 self.end = date_from_str(end)
966 else:
967 self.end = datetime.datetime.max.date()
37254abc 968 if self.start > self.end:
bd558525 969 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 970
bd558525
JMF
971 @classmethod
972 def day(cls, day):
973 """Returns a range that only contains the given day"""
5f6a1245
JW
974 return cls(day, day)
975
bd558525
JMF
976 def __contains__(self, date):
977 """Check if the date is in the range"""
37254abc
JMF
978 if not isinstance(date, datetime.date):
979 date = date_from_str(date)
980 return self.start <= date <= self.end
5f6a1245 981
bd558525 982 def __str__(self):
5f6a1245 983 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
984
985
986def platform_name():
987 """ Returns the platform name as a compat_str """
988 res = platform.platform()
989 if isinstance(res, bytes):
990 res = res.decode(preferredencoding())
991
992 assert isinstance(res, compat_str)
993 return res
c257baff
PH
994
995
b58ddb32
PH
996def _windows_write_string(s, out):
997 """ Returns True if the string was written using special methods,
998 False if it has yet to be written out."""
999 # Adapted from http://stackoverflow.com/a/3259271/35070
1000
1001 import ctypes
1002 import ctypes.wintypes
1003
1004 WIN_OUTPUT_IDS = {
1005 1: -11,
1006 2: -12,
1007 }
1008
a383a98a
PH
1009 try:
1010 fileno = out.fileno()
1011 except AttributeError:
1012 # If the output stream doesn't have a fileno, it's virtual
1013 return False
aa42e873
PH
1014 except io.UnsupportedOperation:
1015 # Some strange Windows pseudo files?
1016 return False
b58ddb32
PH
1017 if fileno not in WIN_OUTPUT_IDS:
1018 return False
1019
e2f89ec7 1020 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1021 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 1022 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
1023 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1024
e2f89ec7 1025 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1026 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1027 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 1028 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
1029 written = ctypes.wintypes.DWORD(0)
1030
6ac4e806 1031 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
1032 FILE_TYPE_CHAR = 0x0002
1033 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1034 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1035 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1036 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 1037 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
1038 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1039
1040 def not_a_console(handle):
1041 if handle == INVALID_HANDLE_VALUE or handle is None:
1042 return True
8fb3ac36
PH
1043 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1044 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1045
1046 if not_a_console(h):
1047 return False
1048
d1b9c912
PH
1049 def next_nonbmp_pos(s):
1050 try:
1051 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1052 except StopIteration:
1053 return len(s)
1054
1055 while s:
1056 count = min(next_nonbmp_pos(s), 1024)
1057
b58ddb32 1058 ret = WriteConsoleW(
d1b9c912 1059 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1060 if ret == 0:
1061 raise OSError('Failed to write string')
d1b9c912
PH
1062 if not count: # We just wrote a non-BMP character
1063 assert written.value == 2
1064 s = s[1:]
1065 else:
1066 assert written.value > 0
1067 s = s[written.value:]
b58ddb32
PH
1068 return True
1069
1070
734f90bb 1071def write_string(s, out=None, encoding=None):
7459e3a2
PH
1072 if out is None:
1073 out = sys.stderr
8bf48f23 1074 assert type(s) == compat_str
7459e3a2 1075
b58ddb32
PH
1076 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1077 if _windows_write_string(s, out):
1078 return
1079
7459e3a2
PH
1080 if ('b' in getattr(out, 'mode', '') or
1081 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1082 byt = s.encode(encoding or preferredencoding(), 'ignore')
1083 out.write(byt)
1084 elif hasattr(out, 'buffer'):
1085 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1086 byt = s.encode(enc, 'ignore')
1087 out.buffer.write(byt)
1088 else:
8bf48f23 1089 out.write(s)
7459e3a2
PH
1090 out.flush()
1091
1092
48ea9cea
PH
1093def bytes_to_intlist(bs):
1094 if not bs:
1095 return []
1096 if isinstance(bs[0], int): # Python 3
1097 return list(bs)
1098 else:
1099 return [ord(c) for c in bs]
1100
c257baff 1101
cba892fa 1102def intlist_to_bytes(xs):
1103 if not xs:
1104 return b''
eb4157fd 1105 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1106
1107
c1c9a79c
PH
1108# Cross-platform file locking
1109if sys.platform == 'win32':
1110 import ctypes.wintypes
1111 import msvcrt
1112
1113 class OVERLAPPED(ctypes.Structure):
1114 _fields_ = [
1115 ('Internal', ctypes.wintypes.LPVOID),
1116 ('InternalHigh', ctypes.wintypes.LPVOID),
1117 ('Offset', ctypes.wintypes.DWORD),
1118 ('OffsetHigh', ctypes.wintypes.DWORD),
1119 ('hEvent', ctypes.wintypes.HANDLE),
1120 ]
1121
1122 kernel32 = ctypes.windll.kernel32
1123 LockFileEx = kernel32.LockFileEx
1124 LockFileEx.argtypes = [
1125 ctypes.wintypes.HANDLE, # hFile
1126 ctypes.wintypes.DWORD, # dwFlags
1127 ctypes.wintypes.DWORD, # dwReserved
1128 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1129 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1130 ctypes.POINTER(OVERLAPPED) # Overlapped
1131 ]
1132 LockFileEx.restype = ctypes.wintypes.BOOL
1133 UnlockFileEx = kernel32.UnlockFileEx
1134 UnlockFileEx.argtypes = [
1135 ctypes.wintypes.HANDLE, # hFile
1136 ctypes.wintypes.DWORD, # dwReserved
1137 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1138 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1139 ctypes.POINTER(OVERLAPPED) # Overlapped
1140 ]
1141 UnlockFileEx.restype = ctypes.wintypes.BOOL
1142 whole_low = 0xffffffff
1143 whole_high = 0x7fffffff
1144
1145 def _lock_file(f, exclusive):
1146 overlapped = OVERLAPPED()
1147 overlapped.Offset = 0
1148 overlapped.OffsetHigh = 0
1149 overlapped.hEvent = 0
1150 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1151 handle = msvcrt.get_osfhandle(f.fileno())
1152 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1153 whole_low, whole_high, f._lock_file_overlapped_p):
1154 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1155
1156 def _unlock_file(f):
1157 assert f._lock_file_overlapped_p
1158 handle = msvcrt.get_osfhandle(f.fileno())
1159 if not UnlockFileEx(handle, 0,
1160 whole_low, whole_high, f._lock_file_overlapped_p):
1161 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1162
1163else:
1164 import fcntl
1165
1166 def _lock_file(f, exclusive):
2582bebe 1167 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1168
1169 def _unlock_file(f):
2582bebe 1170 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1171
1172
1173class locked_file(object):
1174 def __init__(self, filename, mode, encoding=None):
1175 assert mode in ['r', 'a', 'w']
1176 self.f = io.open(filename, mode, encoding=encoding)
1177 self.mode = mode
1178
1179 def __enter__(self):
1180 exclusive = self.mode != 'r'
1181 try:
1182 _lock_file(self.f, exclusive)
1183 except IOError:
1184 self.f.close()
1185 raise
1186 return self
1187
1188 def __exit__(self, etype, value, traceback):
1189 try:
1190 _unlock_file(self.f)
1191 finally:
1192 self.f.close()
1193
1194 def __iter__(self):
1195 return iter(self.f)
1196
1197 def write(self, *args):
1198 return self.f.write(*args)
1199
1200 def read(self, *args):
1201 return self.f.read(*args)
4eb7f1d1
JMF
1202
1203
4644ac55
S
1204def get_filesystem_encoding():
1205 encoding = sys.getfilesystemencoding()
1206 return encoding if encoding is not None else 'utf-8'
1207
1208
4eb7f1d1 1209def shell_quote(args):
a6a173c2 1210 quoted_args = []
4644ac55 1211 encoding = get_filesystem_encoding()
a6a173c2
JMF
1212 for a in args:
1213 if isinstance(a, bytes):
1214 # We may get a filename encoded with 'encodeFilename'
1215 a = a.decode(encoding)
1216 quoted_args.append(pipes.quote(a))
28e614de 1217 return ' '.join(quoted_args)
9d4660ca
PH
1218
1219
1220def smuggle_url(url, data):
1221 """ Pass additional data in a URL for internal use. """
1222
1223 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1224 {'__youtubedl_smuggle': json.dumps(data)})
1225 return url + '#' + sdata
9d4660ca
PH
1226
1227
79f82953 1228def unsmuggle_url(smug_url, default=None):
83e865a3 1229 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1230 return smug_url, default
28e614de
PH
1231 url, _, sdata = smug_url.rpartition('#')
1232 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1233 data = json.loads(jsond)
1234 return url, data
02dbf93f
PH
1235
1236
02dbf93f
PH
1237def format_bytes(bytes):
1238 if bytes is None:
28e614de 1239 return 'N/A'
02dbf93f
PH
1240 if type(bytes) is str:
1241 bytes = float(bytes)
1242 if bytes == 0.0:
1243 exponent = 0
1244 else:
1245 exponent = int(math.log(bytes, 1024.0))
28e614de 1246 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1247 converted = float(bytes) / float(1024 ** exponent)
28e614de 1248 return '%.2f%s' % (converted, suffix)
f53c966a 1249
1c088fa8 1250
be64b5b0
PH
1251def parse_filesize(s):
1252 if s is None:
1253 return None
1254
1255 # The lower-case forms are of course incorrect and inofficial,
1256 # but we support those too
1257 _UNIT_TABLE = {
1258 'B': 1,
1259 'b': 1,
1260 'KiB': 1024,
1261 'KB': 1000,
1262 'kB': 1024,
1263 'Kb': 1000,
1264 'MiB': 1024 ** 2,
1265 'MB': 1000 ** 2,
1266 'mB': 1024 ** 2,
1267 'Mb': 1000 ** 2,
1268 'GiB': 1024 ** 3,
1269 'GB': 1000 ** 3,
1270 'gB': 1024 ** 3,
1271 'Gb': 1000 ** 3,
1272 'TiB': 1024 ** 4,
1273 'TB': 1000 ** 4,
1274 'tB': 1024 ** 4,
1275 'Tb': 1000 ** 4,
1276 'PiB': 1024 ** 5,
1277 'PB': 1000 ** 5,
1278 'pB': 1024 ** 5,
1279 'Pb': 1000 ** 5,
1280 'EiB': 1024 ** 6,
1281 'EB': 1000 ** 6,
1282 'eB': 1024 ** 6,
1283 'Eb': 1000 ** 6,
1284 'ZiB': 1024 ** 7,
1285 'ZB': 1000 ** 7,
1286 'zB': 1024 ** 7,
1287 'Zb': 1000 ** 7,
1288 'YiB': 1024 ** 8,
1289 'YB': 1000 ** 8,
1290 'yB': 1024 ** 8,
1291 'Yb': 1000 ** 8,
1292 }
1293
1294 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1295 m = re.match(
1296 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1297 if not m:
1298 return None
1299
4349c07d
PH
1300 num_str = m.group('num').replace(',', '.')
1301 mult = _UNIT_TABLE[m.group('unit')]
1302 return int(float(num_str) * mult)
be64b5b0
PH
1303
1304
caefb1de
PH
1305def month_by_name(name):
1306 """ Return the number of a month by (locale-independently) English name """
1307
caefb1de 1308 try:
7105440c
YCH
1309 return ENGLISH_MONTH_NAMES.index(name) + 1
1310 except ValueError:
1311 return None
1312
1313
1314def month_by_abbreviation(abbrev):
1315 """ Return the number of a month by (locale-independently) English
1316 abbreviations """
1317
1318 try:
1319 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1320 except ValueError:
1321 return None
18258362
JMF
1322
1323
5aafe895 1324def fix_xml_ampersands(xml_str):
18258362 1325 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1326 return re.sub(
1327 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1328 '&amp;',
5aafe895 1329 xml_str)
e3946f98
PH
1330
1331
1332def setproctitle(title):
8bf48f23 1333 assert isinstance(title, compat_str)
e3946f98
PH
1334 try:
1335 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1336 except OSError:
1337 return
6eefe533
PH
1338 title_bytes = title.encode('utf-8')
1339 buf = ctypes.create_string_buffer(len(title_bytes))
1340 buf.value = title_bytes
e3946f98 1341 try:
6eefe533 1342 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1343 except AttributeError:
1344 return # Strange libc, just skip this
d7dda168
PH
1345
1346
1347def remove_start(s, start):
1348 if s.startswith(start):
1349 return s[len(start):]
1350 return s
29eb5174
PH
1351
1352
2b9faf55
PH
1353def remove_end(s, end):
1354 if s.endswith(end):
1355 return s[:-len(end)]
1356 return s
1357
1358
29eb5174 1359def url_basename(url):
9b8aaeed 1360 path = compat_urlparse.urlparse(url).path
28e614de 1361 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1362
1363
1364class HEADRequest(compat_urllib_request.Request):
1365 def get_method(self):
1366 return "HEAD"
7217e148
PH
1367
1368
9732d77e 1369def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1370 if get_attr:
1371 if v is not None:
1372 v = getattr(v, get_attr, None)
9572013d
PH
1373 if v == '':
1374 v = None
1812afb7
S
1375 if v is None:
1376 return default
1377 try:
1378 return int(v) * invscale // scale
1379 except ValueError:
af98f8ff 1380 return default
9732d77e 1381
9572013d 1382
40a90862
JMF
1383def str_or_none(v, default=None):
1384 return default if v is None else compat_str(v)
1385
9732d77e
PH
1386
1387def str_to_int(int_str):
48d4681e 1388 """ A more relaxed version of int_or_none """
9732d77e
PH
1389 if int_str is None:
1390 return None
28e614de 1391 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1392 return int(int_str)
608d11f5
PH
1393
1394
9732d77e 1395def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1396 if v is None:
1397 return default
1398 try:
1399 return float(v) * invscale / scale
1400 except ValueError:
1401 return default
43f775e4
PH
1402
1403
608d11f5 1404def parse_duration(s):
8f9312c3 1405 if not isinstance(s, compat_basestring):
608d11f5
PH
1406 return None
1407
ca7b3246
S
1408 s = s.strip()
1409
608d11f5 1410 m = re.match(
9d22a7df 1411 r'''(?ix)(?:P?T)?
e8df5cee 1412 (?:
9c29bc69 1413 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1414 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1415
9c29bc69 1416 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1417 (?:
8f4b58d7
PH
1418 (?:
1419 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1420 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1421 )?
6a68bb57
PH
1422 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1423 )?
e8df5cee
PH
1424 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1425 )$''', s)
608d11f5
PH
1426 if not m:
1427 return None
e8df5cee
PH
1428 res = 0
1429 if m.group('only_mins'):
1430 return float_or_none(m.group('only_mins'), invscale=60)
1431 if m.group('only_hours'):
1432 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1433 if m.group('secs'):
1434 res += int(m.group('secs'))
3e675fab
PH
1435 if m.group('mins_reversed'):
1436 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1437 if m.group('mins'):
1438 res += int(m.group('mins')) * 60
e8df5cee
PH
1439 if m.group('hours'):
1440 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1441 if m.group('hours_reversed'):
1442 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1443 if m.group('days'):
1444 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1445 if m.group('ms'):
1446 res += float(m.group('ms'))
608d11f5 1447 return res
91d7d0b3
JMF
1448
1449
e65e4c88 1450def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1451 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1452 return (
1453 '{0}.{1}{2}'.format(name, ext, real_ext)
1454 if not expected_real_ext or real_ext[1:] == expected_real_ext
1455 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1456
1457
b3ed15b7
S
1458def replace_extension(filename, ext, expected_real_ext=None):
1459 name, real_ext = os.path.splitext(filename)
1460 return '{0}.{1}'.format(
1461 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1462 ext)
1463
1464
d70ad093
PH
1465def check_executable(exe, args=[]):
1466 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1467 args can be a list of arguments for a short output (like -version) """
1468 try:
1469 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1470 except OSError:
1471 return False
1472 return exe
b7ab0590
PH
1473
1474
95807118 1475def get_exe_version(exe, args=['--version'],
cae97f65 1476 version_re=None, unrecognized='present'):
95807118
PH
1477 """ Returns the version of the specified executable,
1478 or False if the executable is not present """
1479 try:
cae97f65 1480 out, _ = subprocess.Popen(
54116803 1481 [encodeArgument(exe)] + args,
95807118
PH
1482 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1483 except OSError:
1484 return False
cae97f65
PH
1485 if isinstance(out, bytes): # Python 2.x
1486 out = out.decode('ascii', 'ignore')
1487 return detect_exe_version(out, version_re, unrecognized)
1488
1489
1490def detect_exe_version(output, version_re=None, unrecognized='present'):
1491 assert isinstance(output, compat_str)
1492 if version_re is None:
1493 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1494 m = re.search(version_re, output)
95807118
PH
1495 if m:
1496 return m.group(1)
1497 else:
1498 return unrecognized
1499
1500
b7ab0590 1501class PagedList(object):
dd26ced1
PH
1502 def __len__(self):
1503 # This is only useful for tests
1504 return len(self.getslice())
1505
9c44d242
PH
1506
1507class OnDemandPagedList(PagedList):
1508 def __init__(self, pagefunc, pagesize):
1509 self._pagefunc = pagefunc
1510 self._pagesize = pagesize
1511
b7ab0590
PH
1512 def getslice(self, start=0, end=None):
1513 res = []
1514 for pagenum in itertools.count(start // self._pagesize):
1515 firstid = pagenum * self._pagesize
1516 nextfirstid = pagenum * self._pagesize + self._pagesize
1517 if start >= nextfirstid:
1518 continue
1519
1520 page_results = list(self._pagefunc(pagenum))
1521
1522 startv = (
1523 start % self._pagesize
1524 if firstid <= start < nextfirstid
1525 else 0)
1526
1527 endv = (
1528 ((end - 1) % self._pagesize) + 1
1529 if (end is not None and firstid <= end <= nextfirstid)
1530 else None)
1531
1532 if startv != 0 or endv is not None:
1533 page_results = page_results[startv:endv]
1534 res.extend(page_results)
1535
1536 # A little optimization - if current page is not "full", ie. does
1537 # not contain page_size videos then we can assume that this page
1538 # is the last one - there are no more ids on further pages -
1539 # i.e. no need to query again.
1540 if len(page_results) + startv < self._pagesize:
1541 break
1542
1543 # If we got the whole page, but the next page is not interesting,
1544 # break out early as well
1545 if end == nextfirstid:
1546 break
1547 return res
81c2f20b
PH
1548
1549
9c44d242
PH
1550class InAdvancePagedList(PagedList):
1551 def __init__(self, pagefunc, pagecount, pagesize):
1552 self._pagefunc = pagefunc
1553 self._pagecount = pagecount
1554 self._pagesize = pagesize
1555
1556 def getslice(self, start=0, end=None):
1557 res = []
1558 start_page = start // self._pagesize
1559 end_page = (
1560 self._pagecount if end is None else (end // self._pagesize + 1))
1561 skip_elems = start - start_page * self._pagesize
1562 only_more = None if end is None else end - start
1563 for pagenum in range(start_page, end_page):
1564 page = list(self._pagefunc(pagenum))
1565 if skip_elems:
1566 page = page[skip_elems:]
1567 skip_elems = None
1568 if only_more is not None:
1569 if len(page) < only_more:
1570 only_more -= len(page)
1571 else:
1572 page = page[:only_more]
1573 res.extend(page)
1574 break
1575 res.extend(page)
1576 return res
1577
1578
81c2f20b 1579def uppercase_escape(s):
676eb3f2 1580 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1581 return re.sub(
a612753d 1582 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1583 lambda m: unicode_escape(m.group(0))[0],
1584 s)
0fe2ff78
YCH
1585
1586
1587def lowercase_escape(s):
1588 unicode_escape = codecs.getdecoder('unicode_escape')
1589 return re.sub(
1590 r'\\u[0-9a-fA-F]{4}',
1591 lambda m: unicode_escape(m.group(0))[0],
1592 s)
b53466e1 1593
d05cfe06
S
1594
1595def escape_rfc3986(s):
1596 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1597 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1598 s = s.encode('utf-8')
ecc0c5ee 1599 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1600
1601
1602def escape_url(url):
1603 """Escape URL as suggested by RFC 3986"""
1604 url_parsed = compat_urllib_parse_urlparse(url)
1605 return url_parsed._replace(
1606 path=escape_rfc3986(url_parsed.path),
1607 params=escape_rfc3986(url_parsed.params),
1608 query=escape_rfc3986(url_parsed.query),
1609 fragment=escape_rfc3986(url_parsed.fragment)
1610 ).geturl()
1611
b53466e1 1612try:
28e614de 1613 struct.pack('!I', 0)
b53466e1
PH
1614except TypeError:
1615 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1616 def struct_pack(spec, *args):
1617 if isinstance(spec, compat_str):
1618 spec = spec.encode('ascii')
1619 return struct.pack(spec, *args)
1620
1621 def struct_unpack(spec, *args):
1622 if isinstance(spec, compat_str):
1623 spec = spec.encode('ascii')
1624 return struct.unpack(spec, *args)
1625else:
1626 struct_pack = struct.pack
1627 struct_unpack = struct.unpack
62e609ab
PH
1628
1629
1630def read_batch_urls(batch_fd):
1631 def fixup(url):
1632 if not isinstance(url, compat_str):
1633 url = url.decode('utf-8', 'replace')
28e614de 1634 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1635 if url.startswith(BOM_UTF8):
1636 url = url[len(BOM_UTF8):]
1637 url = url.strip()
1638 if url.startswith(('#', ';', ']')):
1639 return False
1640 return url
1641
1642 with contextlib.closing(batch_fd) as fd:
1643 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1644
1645
1646def urlencode_postdata(*args, **kargs):
1647 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1648
1649
16392824
S
1650def encode_dict(d, encoding='utf-8'):
1651 return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
1652
1653
0990305d
PH
1654try:
1655 etree_iter = xml.etree.ElementTree.Element.iter
1656except AttributeError: # Python <=2.6
1657 etree_iter = lambda n: n.findall('.//*')
1658
1659
bcf89ce6
PH
1660def parse_xml(s):
1661 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1662 def doctype(self, name, pubid, system):
1663 pass # Ignore doctypes
1664
1665 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1666 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1667 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1668 # Fix up XML parser in Python 2.x
1669 if sys.version_info < (3, 0):
1670 for n in etree_iter(tree):
1671 if n.text is not None:
1672 if not isinstance(n.text, compat_str):
1673 n.text = n.text.decode('utf-8')
1674 return tree
e68301af
PH
1675
1676
a1a530b0
PH
1677US_RATINGS = {
1678 'G': 0,
1679 'PG': 10,
1680 'PG-13': 13,
1681 'R': 16,
1682 'NC': 18,
1683}
fac55558
PH
1684
1685
146c80e2
S
1686def parse_age_limit(s):
1687 if s is None:
d838b1bd 1688 return None
146c80e2 1689 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1690 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1691
1692
fac55558 1693def strip_jsonp(code):
609a61e3
PH
1694 return re.sub(
1695 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1696
1697
e05f6939
PH
1698def js_to_json(code):
1699 def fix_kv(m):
e7b6d122
PH
1700 v = m.group(0)
1701 if v in ('true', 'false', 'null'):
1702 return v
1703 if v.startswith('"'):
d01949dc
S
1704 v = re.sub(r"\\'", "'", v[1:-1])
1705 elif v.startswith("'"):
e7b6d122
PH
1706 v = v[1:-1]
1707 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1708 '\\\\': '\\\\',
1709 "\\'": "'",
1710 '"': '\\"',
1711 }[m.group(0)], v)
1712 return '"%s"' % v
e05f6939
PH
1713
1714 res = re.sub(r'''(?x)
d305dd73
PH
1715 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1716 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1717 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1718 ''', fix_kv, code)
ba9e68f4 1719 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1720 return res
1721
1722
478c2c61
PH
1723def qualities(quality_ids):
1724 """ Get a numeric quality value out of a list of possible values """
1725 def q(qid):
1726 try:
1727 return quality_ids.index(qid)
1728 except ValueError:
1729 return -1
1730 return q
1731
acd69589
PH
1732
1733DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1734
a020a0dc
PH
1735
1736def limit_length(s, length):
1737 """ Add ellipses to overly long strings """
1738 if s is None:
1739 return None
1740 ELLIPSES = '...'
1741 if len(s) > length:
1742 return s[:length - len(ELLIPSES)] + ELLIPSES
1743 return s
48844745
PH
1744
1745
1746def version_tuple(v):
5f9b8394 1747 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1748
1749
1750def is_outdated_version(version, limit, assume_new=True):
1751 if not version:
1752 return not assume_new
1753 try:
1754 return version_tuple(version) < version_tuple(limit)
1755 except ValueError:
1756 return not assume_new
732ea2f0
PH
1757
1758
1759def ytdl_is_updateable():
1760 """ Returns if youtube-dl can be updated with -U """
1761 from zipimport import zipimporter
1762
1763 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1764
1765
1766def args_to_str(args):
1767 # Get a short string representation for a subprocess command
1768 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1769
1770
c460bdd5
PH
1771def mimetype2ext(mt):
1772 _, _, res = mt.rpartition('/')
1773
1774 return {
1775 'x-ms-wmv': 'wmv',
1776 'x-mp4-fragmented': 'mp4',
ecee5724 1777 'ttml+xml': 'ttml',
c460bdd5
PH
1778 }.get(res, res)
1779
1780
2ccd1b10
PH
1781def urlhandle_detect_ext(url_handle):
1782 try:
1783 url_handle.headers
1784 getheader = lambda h: url_handle.headers[h]
1785 except AttributeError: # Python < 3
1786 getheader = url_handle.info().getheader
1787
b55ee18f
PH
1788 cd = getheader('Content-Disposition')
1789 if cd:
1790 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1791 if m:
1792 e = determine_ext(m.group('filename'), default_ext=None)
1793 if e:
1794 return e
1795
c460bdd5 1796 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1797
1798
1e399778
YCH
1799def encode_data_uri(data, mime_type):
1800 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1801
1802
05900629
PH
1803def age_restricted(content_limit, age_limit):
1804 """ Returns True iff the content should be blocked """
1805
1806 if age_limit is None: # No limit set
1807 return False
1808 if content_limit is None:
1809 return False # Content available for everyone
1810 return age_limit < content_limit
61ca9a80
PH
1811
1812
1813def is_html(first_bytes):
1814 """ Detect whether a file contains HTML by examining its first bytes. """
1815
1816 BOMS = [
1817 (b'\xef\xbb\xbf', 'utf-8'),
1818 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1819 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1820 (b'\xff\xfe', 'utf-16-le'),
1821 (b'\xfe\xff', 'utf-16-be'),
1822 ]
1823 for bom, enc in BOMS:
1824 if first_bytes.startswith(bom):
1825 s = first_bytes[len(bom):].decode(enc, 'replace')
1826 break
1827 else:
1828 s = first_bytes.decode('utf-8', 'replace')
1829
1830 return re.match(r'^\s*<', s)
a055469f
PH
1831
1832
1833def determine_protocol(info_dict):
1834 protocol = info_dict.get('protocol')
1835 if protocol is not None:
1836 return protocol
1837
1838 url = info_dict['url']
1839 if url.startswith('rtmp'):
1840 return 'rtmp'
1841 elif url.startswith('mms'):
1842 return 'mms'
1843 elif url.startswith('rtsp'):
1844 return 'rtsp'
1845
1846 ext = determine_ext(url)
1847 if ext == 'm3u8':
1848 return 'm3u8'
1849 elif ext == 'f4m':
1850 return 'f4m'
1851
1852 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1853
1854
1855def render_table(header_row, data):
1856 """ Render a list of rows, each as a list of values """
1857 table = [header_row] + data
1858 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1859 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1860 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1861
1862
1863def _match_one(filter_part, dct):
1864 COMPARISON_OPERATORS = {
1865 '<': operator.lt,
1866 '<=': operator.le,
1867 '>': operator.gt,
1868 '>=': operator.ge,
1869 '=': operator.eq,
1870 '!=': operator.ne,
1871 }
1872 operator_rex = re.compile(r'''(?x)\s*
1873 (?P<key>[a-z_]+)
1874 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1875 (?:
1876 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1877 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1878 )
1879 \s*$
1880 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1881 m = operator_rex.search(filter_part)
1882 if m:
1883 op = COMPARISON_OPERATORS[m.group('op')]
1884 if m.group('strval') is not None:
1885 if m.group('op') not in ('=', '!='):
1886 raise ValueError(
1887 'Operator %s does not support string values!' % m.group('op'))
1888 comparison_value = m.group('strval')
1889 else:
1890 try:
1891 comparison_value = int(m.group('intval'))
1892 except ValueError:
1893 comparison_value = parse_filesize(m.group('intval'))
1894 if comparison_value is None:
1895 comparison_value = parse_filesize(m.group('intval') + 'B')
1896 if comparison_value is None:
1897 raise ValueError(
1898 'Invalid integer value %r in filter part %r' % (
1899 m.group('intval'), filter_part))
1900 actual_value = dct.get(m.group('key'))
1901 if actual_value is None:
1902 return m.group('none_inclusive')
1903 return op(actual_value, comparison_value)
1904
1905 UNARY_OPERATORS = {
1906 '': lambda v: v is not None,
1907 '!': lambda v: v is None,
1908 }
1909 operator_rex = re.compile(r'''(?x)\s*
1910 (?P<op>%s)\s*(?P<key>[a-z_]+)
1911 \s*$
1912 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1913 m = operator_rex.search(filter_part)
1914 if m:
1915 op = UNARY_OPERATORS[m.group('op')]
1916 actual_value = dct.get(m.group('key'))
1917 return op(actual_value)
1918
1919 raise ValueError('Invalid filter part %r' % filter_part)
1920
1921
1922def match_str(filter_str, dct):
1923 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1924
1925 return all(
1926 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1927
1928
1929def match_filter_func(filter_str):
1930 def _match_func(info_dict):
1931 if match_str(filter_str, info_dict):
1932 return None
1933 else:
1934 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1935 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1936 return _match_func
91410c9b
PH
1937
1938
bf6427d2
YCH
1939def parse_dfxp_time_expr(time_expr):
1940 if not time_expr:
1941 return 0.0
1942
1943 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1944 if mobj:
1945 return float(mobj.group('time_offset'))
1946
1947 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1948 if mobj:
1949 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1950
1951
c1c924ab
YCH
1952def srt_subtitles_timecode(seconds):
1953 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
1954
1955
1956def dfxp2srt(dfxp_data):
4e335771
YCH
1957 _x = functools.partial(xpath_with_ns, ns_map={
1958 'ttml': 'http://www.w3.org/ns/ttml',
1959 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
1960 })
bf6427d2
YCH
1961
1962 def parse_node(node):
1963 str_or_empty = functools.partial(str_or_none, default='')
1964
1965 out = str_or_empty(node.text)
1966
1967 for child in node:
4e335771 1968 if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
bf6427d2 1969 out += '\n' + str_or_empty(child.tail)
4e335771 1970 elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
bf6427d2
YCH
1971 out += str_or_empty(parse_node(child))
1972 else:
1973 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1974
1975 return out
1976
1977 dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
1978 out = []
4e335771 1979 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
1980
1981 if not paras:
1982 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
1983
1984 for para, index in zip(paras, itertools.count(1)):
7dff0363
YCH
1985 begin_time = parse_dfxp_time_expr(para.attrib['begin'])
1986 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
1987 if not end_time:
1988 end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
bf6427d2
YCH
1989 out.append('%d\n%s --> %s\n%s\n\n' % (
1990 index,
c1c924ab
YCH
1991 srt_subtitles_timecode(begin_time),
1992 srt_subtitles_timecode(end_time),
bf6427d2
YCH
1993 parse_node(para)))
1994
1995 return ''.join(out)
1996
1997
66e289ba
S
1998def cli_option(params, command_option, param):
1999 param = params.get(param)
2000 return [command_option, param] if param is not None else []
2001
2002
2003def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2004 param = params.get(param)
2005 assert isinstance(param, bool)
2006 if separator:
2007 return [command_option + separator + (true_value if param else false_value)]
2008 return [command_option, true_value if param else false_value]
2009
2010
2011def cli_valueless_option(params, command_option, param, expected_value=True):
2012 param = params.get(param)
2013 return [command_option] if param == expected_value else []
2014
2015
2016def cli_configuration_args(params, param, default=[]):
2017 ex_args = params.get(param)
2018 if ex_args is None:
2019 return default
2020 assert isinstance(ex_args, list)
2021 return ex_args
2022
2023
39672624
YCH
2024class ISO639Utils(object):
2025 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2026 _lang_map = {
2027 'aa': 'aar',
2028 'ab': 'abk',
2029 'ae': 'ave',
2030 'af': 'afr',
2031 'ak': 'aka',
2032 'am': 'amh',
2033 'an': 'arg',
2034 'ar': 'ara',
2035 'as': 'asm',
2036 'av': 'ava',
2037 'ay': 'aym',
2038 'az': 'aze',
2039 'ba': 'bak',
2040 'be': 'bel',
2041 'bg': 'bul',
2042 'bh': 'bih',
2043 'bi': 'bis',
2044 'bm': 'bam',
2045 'bn': 'ben',
2046 'bo': 'bod',
2047 'br': 'bre',
2048 'bs': 'bos',
2049 'ca': 'cat',
2050 'ce': 'che',
2051 'ch': 'cha',
2052 'co': 'cos',
2053 'cr': 'cre',
2054 'cs': 'ces',
2055 'cu': 'chu',
2056 'cv': 'chv',
2057 'cy': 'cym',
2058 'da': 'dan',
2059 'de': 'deu',
2060 'dv': 'div',
2061 'dz': 'dzo',
2062 'ee': 'ewe',
2063 'el': 'ell',
2064 'en': 'eng',
2065 'eo': 'epo',
2066 'es': 'spa',
2067 'et': 'est',
2068 'eu': 'eus',
2069 'fa': 'fas',
2070 'ff': 'ful',
2071 'fi': 'fin',
2072 'fj': 'fij',
2073 'fo': 'fao',
2074 'fr': 'fra',
2075 'fy': 'fry',
2076 'ga': 'gle',
2077 'gd': 'gla',
2078 'gl': 'glg',
2079 'gn': 'grn',
2080 'gu': 'guj',
2081 'gv': 'glv',
2082 'ha': 'hau',
2083 'he': 'heb',
2084 'hi': 'hin',
2085 'ho': 'hmo',
2086 'hr': 'hrv',
2087 'ht': 'hat',
2088 'hu': 'hun',
2089 'hy': 'hye',
2090 'hz': 'her',
2091 'ia': 'ina',
2092 'id': 'ind',
2093 'ie': 'ile',
2094 'ig': 'ibo',
2095 'ii': 'iii',
2096 'ik': 'ipk',
2097 'io': 'ido',
2098 'is': 'isl',
2099 'it': 'ita',
2100 'iu': 'iku',
2101 'ja': 'jpn',
2102 'jv': 'jav',
2103 'ka': 'kat',
2104 'kg': 'kon',
2105 'ki': 'kik',
2106 'kj': 'kua',
2107 'kk': 'kaz',
2108 'kl': 'kal',
2109 'km': 'khm',
2110 'kn': 'kan',
2111 'ko': 'kor',
2112 'kr': 'kau',
2113 'ks': 'kas',
2114 'ku': 'kur',
2115 'kv': 'kom',
2116 'kw': 'cor',
2117 'ky': 'kir',
2118 'la': 'lat',
2119 'lb': 'ltz',
2120 'lg': 'lug',
2121 'li': 'lim',
2122 'ln': 'lin',
2123 'lo': 'lao',
2124 'lt': 'lit',
2125 'lu': 'lub',
2126 'lv': 'lav',
2127 'mg': 'mlg',
2128 'mh': 'mah',
2129 'mi': 'mri',
2130 'mk': 'mkd',
2131 'ml': 'mal',
2132 'mn': 'mon',
2133 'mr': 'mar',
2134 'ms': 'msa',
2135 'mt': 'mlt',
2136 'my': 'mya',
2137 'na': 'nau',
2138 'nb': 'nob',
2139 'nd': 'nde',
2140 'ne': 'nep',
2141 'ng': 'ndo',
2142 'nl': 'nld',
2143 'nn': 'nno',
2144 'no': 'nor',
2145 'nr': 'nbl',
2146 'nv': 'nav',
2147 'ny': 'nya',
2148 'oc': 'oci',
2149 'oj': 'oji',
2150 'om': 'orm',
2151 'or': 'ori',
2152 'os': 'oss',
2153 'pa': 'pan',
2154 'pi': 'pli',
2155 'pl': 'pol',
2156 'ps': 'pus',
2157 'pt': 'por',
2158 'qu': 'que',
2159 'rm': 'roh',
2160 'rn': 'run',
2161 'ro': 'ron',
2162 'ru': 'rus',
2163 'rw': 'kin',
2164 'sa': 'san',
2165 'sc': 'srd',
2166 'sd': 'snd',
2167 'se': 'sme',
2168 'sg': 'sag',
2169 'si': 'sin',
2170 'sk': 'slk',
2171 'sl': 'slv',
2172 'sm': 'smo',
2173 'sn': 'sna',
2174 'so': 'som',
2175 'sq': 'sqi',
2176 'sr': 'srp',
2177 'ss': 'ssw',
2178 'st': 'sot',
2179 'su': 'sun',
2180 'sv': 'swe',
2181 'sw': 'swa',
2182 'ta': 'tam',
2183 'te': 'tel',
2184 'tg': 'tgk',
2185 'th': 'tha',
2186 'ti': 'tir',
2187 'tk': 'tuk',
2188 'tl': 'tgl',
2189 'tn': 'tsn',
2190 'to': 'ton',
2191 'tr': 'tur',
2192 'ts': 'tso',
2193 'tt': 'tat',
2194 'tw': 'twi',
2195 'ty': 'tah',
2196 'ug': 'uig',
2197 'uk': 'ukr',
2198 'ur': 'urd',
2199 'uz': 'uzb',
2200 've': 'ven',
2201 'vi': 'vie',
2202 'vo': 'vol',
2203 'wa': 'wln',
2204 'wo': 'wol',
2205 'xh': 'xho',
2206 'yi': 'yid',
2207 'yo': 'yor',
2208 'za': 'zha',
2209 'zh': 'zho',
2210 'zu': 'zul',
2211 }
2212
2213 @classmethod
2214 def short2long(cls, code):
2215 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2216 return cls._lang_map.get(code[:2])
2217
2218 @classmethod
2219 def long2short(cls, code):
2220 """Convert language code from ISO 639-2/T to ISO 639-1"""
2221 for short_name, long_name in cls._lang_map.items():
2222 if long_name == code:
2223 return short_name
2224
2225
4eb10f66
YCH
2226class ISO3166Utils(object):
2227 # From http://data.okfn.org/data/core/country-list
2228 _country_map = {
2229 'AF': 'Afghanistan',
2230 'AX': 'Åland Islands',
2231 'AL': 'Albania',
2232 'DZ': 'Algeria',
2233 'AS': 'American Samoa',
2234 'AD': 'Andorra',
2235 'AO': 'Angola',
2236 'AI': 'Anguilla',
2237 'AQ': 'Antarctica',
2238 'AG': 'Antigua and Barbuda',
2239 'AR': 'Argentina',
2240 'AM': 'Armenia',
2241 'AW': 'Aruba',
2242 'AU': 'Australia',
2243 'AT': 'Austria',
2244 'AZ': 'Azerbaijan',
2245 'BS': 'Bahamas',
2246 'BH': 'Bahrain',
2247 'BD': 'Bangladesh',
2248 'BB': 'Barbados',
2249 'BY': 'Belarus',
2250 'BE': 'Belgium',
2251 'BZ': 'Belize',
2252 'BJ': 'Benin',
2253 'BM': 'Bermuda',
2254 'BT': 'Bhutan',
2255 'BO': 'Bolivia, Plurinational State of',
2256 'BQ': 'Bonaire, Sint Eustatius and Saba',
2257 'BA': 'Bosnia and Herzegovina',
2258 'BW': 'Botswana',
2259 'BV': 'Bouvet Island',
2260 'BR': 'Brazil',
2261 'IO': 'British Indian Ocean Territory',
2262 'BN': 'Brunei Darussalam',
2263 'BG': 'Bulgaria',
2264 'BF': 'Burkina Faso',
2265 'BI': 'Burundi',
2266 'KH': 'Cambodia',
2267 'CM': 'Cameroon',
2268 'CA': 'Canada',
2269 'CV': 'Cape Verde',
2270 'KY': 'Cayman Islands',
2271 'CF': 'Central African Republic',
2272 'TD': 'Chad',
2273 'CL': 'Chile',
2274 'CN': 'China',
2275 'CX': 'Christmas Island',
2276 'CC': 'Cocos (Keeling) Islands',
2277 'CO': 'Colombia',
2278 'KM': 'Comoros',
2279 'CG': 'Congo',
2280 'CD': 'Congo, the Democratic Republic of the',
2281 'CK': 'Cook Islands',
2282 'CR': 'Costa Rica',
2283 'CI': 'Côte d\'Ivoire',
2284 'HR': 'Croatia',
2285 'CU': 'Cuba',
2286 'CW': 'Curaçao',
2287 'CY': 'Cyprus',
2288 'CZ': 'Czech Republic',
2289 'DK': 'Denmark',
2290 'DJ': 'Djibouti',
2291 'DM': 'Dominica',
2292 'DO': 'Dominican Republic',
2293 'EC': 'Ecuador',
2294 'EG': 'Egypt',
2295 'SV': 'El Salvador',
2296 'GQ': 'Equatorial Guinea',
2297 'ER': 'Eritrea',
2298 'EE': 'Estonia',
2299 'ET': 'Ethiopia',
2300 'FK': 'Falkland Islands (Malvinas)',
2301 'FO': 'Faroe Islands',
2302 'FJ': 'Fiji',
2303 'FI': 'Finland',
2304 'FR': 'France',
2305 'GF': 'French Guiana',
2306 'PF': 'French Polynesia',
2307 'TF': 'French Southern Territories',
2308 'GA': 'Gabon',
2309 'GM': 'Gambia',
2310 'GE': 'Georgia',
2311 'DE': 'Germany',
2312 'GH': 'Ghana',
2313 'GI': 'Gibraltar',
2314 'GR': 'Greece',
2315 'GL': 'Greenland',
2316 'GD': 'Grenada',
2317 'GP': 'Guadeloupe',
2318 'GU': 'Guam',
2319 'GT': 'Guatemala',
2320 'GG': 'Guernsey',
2321 'GN': 'Guinea',
2322 'GW': 'Guinea-Bissau',
2323 'GY': 'Guyana',
2324 'HT': 'Haiti',
2325 'HM': 'Heard Island and McDonald Islands',
2326 'VA': 'Holy See (Vatican City State)',
2327 'HN': 'Honduras',
2328 'HK': 'Hong Kong',
2329 'HU': 'Hungary',
2330 'IS': 'Iceland',
2331 'IN': 'India',
2332 'ID': 'Indonesia',
2333 'IR': 'Iran, Islamic Republic of',
2334 'IQ': 'Iraq',
2335 'IE': 'Ireland',
2336 'IM': 'Isle of Man',
2337 'IL': 'Israel',
2338 'IT': 'Italy',
2339 'JM': 'Jamaica',
2340 'JP': 'Japan',
2341 'JE': 'Jersey',
2342 'JO': 'Jordan',
2343 'KZ': 'Kazakhstan',
2344 'KE': 'Kenya',
2345 'KI': 'Kiribati',
2346 'KP': 'Korea, Democratic People\'s Republic of',
2347 'KR': 'Korea, Republic of',
2348 'KW': 'Kuwait',
2349 'KG': 'Kyrgyzstan',
2350 'LA': 'Lao People\'s Democratic Republic',
2351 'LV': 'Latvia',
2352 'LB': 'Lebanon',
2353 'LS': 'Lesotho',
2354 'LR': 'Liberia',
2355 'LY': 'Libya',
2356 'LI': 'Liechtenstein',
2357 'LT': 'Lithuania',
2358 'LU': 'Luxembourg',
2359 'MO': 'Macao',
2360 'MK': 'Macedonia, the Former Yugoslav Republic of',
2361 'MG': 'Madagascar',
2362 'MW': 'Malawi',
2363 'MY': 'Malaysia',
2364 'MV': 'Maldives',
2365 'ML': 'Mali',
2366 'MT': 'Malta',
2367 'MH': 'Marshall Islands',
2368 'MQ': 'Martinique',
2369 'MR': 'Mauritania',
2370 'MU': 'Mauritius',
2371 'YT': 'Mayotte',
2372 'MX': 'Mexico',
2373 'FM': 'Micronesia, Federated States of',
2374 'MD': 'Moldova, Republic of',
2375 'MC': 'Monaco',
2376 'MN': 'Mongolia',
2377 'ME': 'Montenegro',
2378 'MS': 'Montserrat',
2379 'MA': 'Morocco',
2380 'MZ': 'Mozambique',
2381 'MM': 'Myanmar',
2382 'NA': 'Namibia',
2383 'NR': 'Nauru',
2384 'NP': 'Nepal',
2385 'NL': 'Netherlands',
2386 'NC': 'New Caledonia',
2387 'NZ': 'New Zealand',
2388 'NI': 'Nicaragua',
2389 'NE': 'Niger',
2390 'NG': 'Nigeria',
2391 'NU': 'Niue',
2392 'NF': 'Norfolk Island',
2393 'MP': 'Northern Mariana Islands',
2394 'NO': 'Norway',
2395 'OM': 'Oman',
2396 'PK': 'Pakistan',
2397 'PW': 'Palau',
2398 'PS': 'Palestine, State of',
2399 'PA': 'Panama',
2400 'PG': 'Papua New Guinea',
2401 'PY': 'Paraguay',
2402 'PE': 'Peru',
2403 'PH': 'Philippines',
2404 'PN': 'Pitcairn',
2405 'PL': 'Poland',
2406 'PT': 'Portugal',
2407 'PR': 'Puerto Rico',
2408 'QA': 'Qatar',
2409 'RE': 'Réunion',
2410 'RO': 'Romania',
2411 'RU': 'Russian Federation',
2412 'RW': 'Rwanda',
2413 'BL': 'Saint Barthélemy',
2414 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2415 'KN': 'Saint Kitts and Nevis',
2416 'LC': 'Saint Lucia',
2417 'MF': 'Saint Martin (French part)',
2418 'PM': 'Saint Pierre and Miquelon',
2419 'VC': 'Saint Vincent and the Grenadines',
2420 'WS': 'Samoa',
2421 'SM': 'San Marino',
2422 'ST': 'Sao Tome and Principe',
2423 'SA': 'Saudi Arabia',
2424 'SN': 'Senegal',
2425 'RS': 'Serbia',
2426 'SC': 'Seychelles',
2427 'SL': 'Sierra Leone',
2428 'SG': 'Singapore',
2429 'SX': 'Sint Maarten (Dutch part)',
2430 'SK': 'Slovakia',
2431 'SI': 'Slovenia',
2432 'SB': 'Solomon Islands',
2433 'SO': 'Somalia',
2434 'ZA': 'South Africa',
2435 'GS': 'South Georgia and the South Sandwich Islands',
2436 'SS': 'South Sudan',
2437 'ES': 'Spain',
2438 'LK': 'Sri Lanka',
2439 'SD': 'Sudan',
2440 'SR': 'Suriname',
2441 'SJ': 'Svalbard and Jan Mayen',
2442 'SZ': 'Swaziland',
2443 'SE': 'Sweden',
2444 'CH': 'Switzerland',
2445 'SY': 'Syrian Arab Republic',
2446 'TW': 'Taiwan, Province of China',
2447 'TJ': 'Tajikistan',
2448 'TZ': 'Tanzania, United Republic of',
2449 'TH': 'Thailand',
2450 'TL': 'Timor-Leste',
2451 'TG': 'Togo',
2452 'TK': 'Tokelau',
2453 'TO': 'Tonga',
2454 'TT': 'Trinidad and Tobago',
2455 'TN': 'Tunisia',
2456 'TR': 'Turkey',
2457 'TM': 'Turkmenistan',
2458 'TC': 'Turks and Caicos Islands',
2459 'TV': 'Tuvalu',
2460 'UG': 'Uganda',
2461 'UA': 'Ukraine',
2462 'AE': 'United Arab Emirates',
2463 'GB': 'United Kingdom',
2464 'US': 'United States',
2465 'UM': 'United States Minor Outlying Islands',
2466 'UY': 'Uruguay',
2467 'UZ': 'Uzbekistan',
2468 'VU': 'Vanuatu',
2469 'VE': 'Venezuela, Bolivarian Republic of',
2470 'VN': 'Viet Nam',
2471 'VG': 'Virgin Islands, British',
2472 'VI': 'Virgin Islands, U.S.',
2473 'WF': 'Wallis and Futuna',
2474 'EH': 'Western Sahara',
2475 'YE': 'Yemen',
2476 'ZM': 'Zambia',
2477 'ZW': 'Zimbabwe',
2478 }
2479
2480 @classmethod
2481 def short2full(cls, code):
2482 """Convert an ISO 3166-2 country code to the corresponding full name"""
2483 return cls._country_map.get(code.upper())
2484
2485
91410c9b 2486class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2487 def __init__(self, proxies=None):
2488 # Set default handlers
2489 for type in ('http', 'https'):
2490 setattr(self, '%s_open' % type,
2491 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2492 meth(r, proxy, type))
2493 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2494
91410c9b 2495 def proxy_open(self, req, proxy, type):
2461f79d 2496 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2497 if req_proxy is not None:
2498 proxy = req_proxy
2461f79d
PH
2499 del req.headers['Ytdl-request-proxy']
2500
2501 if proxy == '__noproxy__':
2502 return None # No Proxy
91410c9b
PH
2503 return compat_urllib_request.ProxyHandler.proxy_open(
2504 self, req, proxy, type)