]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[videofy.me] fix info extraction
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
912b38b4 7import calendar
676eb3f2 8import codecs
62e609ab 9import contextlib
e3946f98 10import ctypes
c496ca96
PH
11import datetime
12import email.utils
f45c185f 13import errno
be4a824d 14import functools
d77c3dfd 15import gzip
b7ab0590 16import itertools
03f9daab 17import io
f4bfd65f 18import json
d77c3dfd 19import locale
02dbf93f 20import math
347de493 21import operator
d77c3dfd 22import os
4eb7f1d1 23import pipes
c496ca96 24import platform
d77c3dfd 25import re
13ebea79 26import ssl
c496ca96 27import socket
b53466e1 28import struct
1c088fa8 29import subprocess
d77c3dfd 30import sys
181c8655 31import tempfile
01951dda 32import traceback
bcf89ce6 33import xml.etree.ElementTree
d77c3dfd 34import zlib
d77c3dfd 35
8c25f81b 36from .compat import (
8f9312c3 37 compat_basestring,
8c25f81b 38 compat_chr,
36e6f62c 39 compat_etree_fromstring,
8c25f81b 40 compat_html_entities,
be4a824d 41 compat_http_client,
c86b6142 42 compat_kwargs,
8c25f81b 43 compat_parse_qs,
be4a824d 44 compat_socket_create_connection,
8c25f81b
PH
45 compat_str,
46 compat_urllib_error,
47 compat_urllib_parse,
48 compat_urllib_parse_urlparse,
49 compat_urllib_request,
50 compat_urlparse,
7d4111ed 51 shlex_quote,
8c25f81b 52)
4644ac55
S
53
54
468e2e92
FV
55# This is not clearly defined otherwise
56compiled_regex_type = type(re.compile(''))
57
3e669f36 58std_headers = {
18313934 59 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
59ae15a5
PH
60 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
61 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
62 'Accept-Encoding': 'gzip, deflate',
63 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 64}
f427df17 65
5f6a1245 66
bf42a990
S
67NO_DEFAULT = object()
68
7105440c
YCH
69ENGLISH_MONTH_NAMES = [
70 'January', 'February', 'March', 'April', 'May', 'June',
71 'July', 'August', 'September', 'October', 'November', 'December']
72
73
d77c3dfd 74def preferredencoding():
59ae15a5 75 """Get preferred encoding.
d77c3dfd 76
59ae15a5
PH
77 Returns the best encoding scheme for the system, based on
78 locale.getpreferredencoding() and some further tweaks.
79 """
80 try:
81 pref = locale.getpreferredencoding()
28e614de 82 'TEST'.encode(pref)
70a1165b 83 except Exception:
59ae15a5 84 pref = 'UTF-8'
bae611f2 85
59ae15a5 86 return pref
d77c3dfd 87
f4bfd65f 88
181c8655 89def write_json_file(obj, fn):
1394646a 90 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 91
92120217 92 fn = encodeFilename(fn)
61ee5aeb 93 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
94 encoding = get_filesystem_encoding()
95 # os.path.basename returns a bytes object, but NamedTemporaryFile
96 # will fail if the filename contains non ascii characters unless we
97 # use a unicode object
98 path_basename = lambda f: os.path.basename(fn).decode(encoding)
99 # the same for os.path.dirname
100 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
101 else:
102 path_basename = os.path.basename
103 path_dirname = os.path.dirname
104
73159f99
S
105 args = {
106 'suffix': '.tmp',
ec5f6016
JMF
107 'prefix': path_basename(fn) + '.',
108 'dir': path_dirname(fn),
73159f99
S
109 'delete': False,
110 }
111
181c8655
PH
112 # In Python 2.x, json.dump expects a bytestream.
113 # In Python 3.x, it writes to a character stream
114 if sys.version_info < (3, 0):
73159f99 115 args['mode'] = 'wb'
181c8655 116 else:
73159f99
S
117 args.update({
118 'mode': 'w',
119 'encoding': 'utf-8',
120 })
121
c86b6142 122 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
123
124 try:
125 with tf:
126 json.dump(obj, tf)
1394646a
IK
127 if sys.platform == 'win32':
128 # Need to remove existing file on Windows, else os.rename raises
129 # WindowsError or FileExistsError.
130 try:
131 os.unlink(fn)
132 except OSError:
133 pass
181c8655 134 os.rename(tf.name, fn)
70a1165b 135 except Exception:
181c8655
PH
136 try:
137 os.remove(tf.name)
138 except OSError:
139 pass
140 raise
141
142
143if sys.version_info >= (2, 7):
ee114368 144 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 145 """ Find the xpath xpath[@key=val] """
5d2354f1 146 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
147 if val:
148 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
149 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
150 return node.find(expr)
151else:
ee114368 152 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
153 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
154 # .//node does not match if a node is a direct child of . !
8f9312c3 155 if isinstance(xpath, compat_str):
4eefbfdb
PH
156 xpath = xpath.encode('ascii')
157
59ae56fa 158 for f in node.findall(xpath):
ee114368
S
159 if key not in f.attrib:
160 continue
161 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
162 return f
163 return None
164
d7e66d39
JMF
165# On python2.6 the xml.etree.ElementTree.Element methods don't support
166# the namespace parameter
5f6a1245
JW
167
168
d7e66d39
JMF
169def xpath_with_ns(path, ns_map):
170 components = [c.split(':') for c in path.split('/')]
171 replaced = []
172 for c in components:
173 if len(c) == 1:
174 replaced.append(c[0])
175 else:
176 ns, tag = c
177 replaced.append('{%s}%s' % (ns_map[ns], tag))
178 return '/'.join(replaced)
179
d77c3dfd 180
a41fb80c 181def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745
S
182 def _find_xpath(xpath):
183 if sys.version_info < (2, 7): # Crazy 2.6
184 xpath = xpath.encode('ascii')
185 return node.find(xpath)
186
187 if isinstance(xpath, (str, compat_str)):
188 n = _find_xpath(xpath)
189 else:
190 for xp in xpath:
191 n = _find_xpath(xp)
192 if n is not None:
193 break
d74bebd5 194
8e636da4 195 if n is None:
bf42a990
S
196 if default is not NO_DEFAULT:
197 return default
198 elif fatal:
bf0ff932
PH
199 name = xpath if name is None else name
200 raise ExtractorError('Could not find XML element %s' % name)
201 else:
202 return None
a41fb80c
S
203 return n
204
205
206def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
207 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
208 if n is None or n == default:
209 return n
210 if n.text is None:
211 if default is not NO_DEFAULT:
212 return default
213 elif fatal:
214 name = xpath if name is None else name
215 raise ExtractorError('Could not find XML element\'s text %s' % name)
216 else:
217 return None
218 return n.text
a41fb80c
S
219
220
221def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
222 n = find_xpath_attr(node, xpath, key)
223 if n is None:
224 if default is not NO_DEFAULT:
225 return default
226 elif fatal:
227 name = '%s[@%s]' % (xpath, key) if name is None else name
228 raise ExtractorError('Could not find XML attribute %s' % name)
229 else:
230 return None
231 return n.attrib[key]
bf0ff932
PH
232
233
9e6dd238 234def get_element_by_id(id, html):
43e8fafd
ND
235 """Return the content of the tag with the specified ID in the passed HTML document"""
236 return get_element_by_attribute("id", id, html)
237
12ea2f30 238
43e8fafd
ND
239def get_element_by_attribute(attribute, value, html):
240 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 241
38285056
PH
242 m = re.search(r'''(?xs)
243 <([a-zA-Z0-9:._-]+)
244 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
245 \s+%s=['"]?%s['"]?
246 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
247 \s*>
248 (?P<content>.*?)
249 </\1>
250 ''' % (re.escape(attribute), re.escape(value)), html)
251
252 if not m:
253 return None
254 res = m.group('content')
255
256 if res.startswith('"') or res.startswith("'"):
257 res = res[1:-1]
a921f407 258
38285056 259 return unescapeHTML(res)
a921f407 260
9e6dd238
FV
261
262def clean_html(html):
59ae15a5 263 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
264
265 if html is None: # Convenience for sanitizing descriptions etc.
266 return html
267
59ae15a5
PH
268 # Newline vs <br />
269 html = html.replace('\n', ' ')
6b3aef80
FV
270 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
271 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
272 # Strip html tags
273 html = re.sub('<.*?>', '', html)
274 # Replace html entities
275 html = unescapeHTML(html)
7decf895 276 return html.strip()
9e6dd238
FV
277
278
d77c3dfd 279def sanitize_open(filename, open_mode):
59ae15a5
PH
280 """Try to open the given filename, and slightly tweak it if this fails.
281
282 Attempts to open the given filename. If this fails, it tries to change
283 the filename slightly, step by step, until it's either able to open it
284 or it fails and raises a final exception, like the standard open()
285 function.
286
287 It returns the tuple (stream, definitive_file_name).
288 """
289 try:
28e614de 290 if filename == '-':
59ae15a5
PH
291 if sys.platform == 'win32':
292 import msvcrt
293 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 294 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
295 stream = open(encodeFilename(filename), open_mode)
296 return (stream, filename)
297 except (IOError, OSError) as err:
f45c185f
PH
298 if err.errno in (errno.EACCES,):
299 raise
59ae15a5 300
f45c185f 301 # In case of error, try to remove win32 forbidden chars
d55de57b 302 alt_filename = sanitize_path(filename)
f45c185f
PH
303 if alt_filename == filename:
304 raise
305 else:
306 # An exception here should be caught in the caller
d55de57b 307 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 308 return (stream, alt_filename)
d77c3dfd
FV
309
310
311def timeconvert(timestr):
59ae15a5
PH
312 """Convert RFC 2822 defined time string into system timestamp"""
313 timestamp = None
314 timetuple = email.utils.parsedate_tz(timestr)
315 if timetuple is not None:
316 timestamp = email.utils.mktime_tz(timetuple)
317 return timestamp
1c469a94 318
5f6a1245 319
796173d0 320def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
321 """Sanitizes a string so it could be used as part of a filename.
322 If restricted is set, use a stricter subset of allowed characters.
796173d0 323 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
324 """
325 def replace_insane(char):
326 if char == '?' or ord(char) < 32 or ord(char) == 127:
327 return ''
328 elif char == '"':
329 return '' if restricted else '\''
330 elif char == ':':
331 return '_-' if restricted else ' -'
332 elif char in '\\/|*<>':
333 return '_'
627dcfff 334 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
335 return '_'
336 if restricted and ord(char) > 127:
337 return '_'
338 return char
339
2aeb06d6
PH
340 # Handle timestamps
341 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 342 result = ''.join(map(replace_insane, s))
796173d0
PH
343 if not is_id:
344 while '__' in result:
345 result = result.replace('__', '_')
346 result = result.strip('_')
347 # Common case of "Foreign band name - English song title"
348 if restricted and result.startswith('-_'):
349 result = result[2:]
5a42414b
PH
350 if result.startswith('-'):
351 result = '_' + result[len('-'):]
a7440261 352 result = result.lstrip('.')
796173d0
PH
353 if not result:
354 result = '_'
59ae15a5 355 return result
d77c3dfd 356
5f6a1245 357
a2aaf4db
S
358def sanitize_path(s):
359 """Sanitizes and normalizes path on Windows"""
360 if sys.platform != 'win32':
361 return s
be531ef1
S
362 drive_or_unc, _ = os.path.splitdrive(s)
363 if sys.version_info < (2, 7) and not drive_or_unc:
364 drive_or_unc, _ = os.path.splitunc(s)
365 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
366 if drive_or_unc:
a2aaf4db
S
367 norm_path.pop(0)
368 sanitized_path = [
c90d16cf 369 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
a2aaf4db 370 for path_part in norm_path]
be531ef1
S
371 if drive_or_unc:
372 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
373 return os.path.join(*sanitized_path)
374
375
d77c3dfd 376def orderedSet(iterable):
59ae15a5
PH
377 """ Remove all duplicates from the input iterable """
378 res = []
379 for el in iterable:
380 if el not in res:
381 res.append(el)
382 return res
d77c3dfd 383
912b38b4 384
4e408e47
PH
385def _htmlentity_transform(entity):
386 """Transforms an HTML entity to a character."""
387 # Known non-numeric HTML entity
388 if entity in compat_html_entities.name2codepoint:
389 return compat_chr(compat_html_entities.name2codepoint[entity])
390
91757b0f 391 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
392 if mobj is not None:
393 numstr = mobj.group(1)
28e614de 394 if numstr.startswith('x'):
4e408e47 395 base = 16
28e614de 396 numstr = '0%s' % numstr
4e408e47
PH
397 else:
398 base = 10
399 return compat_chr(int(numstr, base))
400
401 # Unknown entity in name, return its literal representation
28e614de 402 return ('&%s;' % entity)
4e408e47
PH
403
404
d77c3dfd 405def unescapeHTML(s):
912b38b4
PH
406 if s is None:
407 return None
408 assert type(s) == compat_str
d77c3dfd 409
4e408e47
PH
410 return re.sub(
411 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 412
8bf48f23 413
aa49acd1
S
414def get_subprocess_encoding():
415 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
416 # For subprocess calls, encode with locale encoding
417 # Refer to http://stackoverflow.com/a/9951851/35070
418 encoding = preferredencoding()
419 else:
420 encoding = sys.getfilesystemencoding()
421 if encoding is None:
422 encoding = 'utf-8'
423 return encoding
424
425
8bf48f23 426def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
427 """
428 @param s The name of the file
429 """
d77c3dfd 430
8bf48f23 431 assert type(s) == compat_str
d77c3dfd 432
59ae15a5
PH
433 # Python 3 has a Unicode API
434 if sys.version_info >= (3, 0):
435 return s
0f00efed 436
aa49acd1
S
437 # Pass '' directly to use Unicode APIs on Windows 2000 and up
438 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
439 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
440 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
441 return s
442
443 return s.encode(get_subprocess_encoding(), 'ignore')
444
445
446def decodeFilename(b, for_subprocess=False):
447
448 if sys.version_info >= (3, 0):
449 return b
450
451 if not isinstance(b, bytes):
452 return b
453
454 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 455
f07b74fc
PH
456
457def encodeArgument(s):
458 if not isinstance(s, compat_str):
459 # Legacy code that uses byte strings
460 # Uncomment the following line after fixing all post processors
7af808a5 461 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
462 s = s.decode('ascii')
463 return encodeFilename(s, True)
464
465
aa49acd1
S
466def decodeArgument(b):
467 return decodeFilename(b, True)
468
469
8271226a
PH
470def decodeOption(optval):
471 if optval is None:
472 return optval
473 if isinstance(optval, bytes):
474 optval = optval.decode(preferredencoding())
475
476 assert isinstance(optval, compat_str)
477 return optval
1c256f70 478
5f6a1245 479
4539dd30
PH
480def formatSeconds(secs):
481 if secs > 3600:
482 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
483 elif secs > 60:
484 return '%d:%02d' % (secs // 60, secs % 60)
485 else:
486 return '%d' % secs
487
a0ddb8a2 488
be4a824d
PH
489def make_HTTPS_handler(params, **kwargs):
490 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 491 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 492 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 493 if opts_no_check_certificate:
be5f2c19 494 context.check_hostname = False
0db261ba 495 context.verify_mode = ssl.CERT_NONE
a2366922 496 try:
be4a824d 497 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
498 except TypeError:
499 # Python 2.7.8
500 # (create_default_context present but HTTPSHandler has no context=)
501 pass
502
503 if sys.version_info < (3, 2):
d7932313 504 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 505 else: # Python < 3.4
d7932313 506 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 507 context.verify_mode = (ssl.CERT_NONE
dca08720 508 if opts_no_check_certificate
ea6d901e 509 else ssl.CERT_REQUIRED)
303b479e 510 context.set_default_verify_paths()
be4a824d 511 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 512
732ea2f0 513
08f2a92c
JMF
514def bug_reports_message():
515 if ytdl_is_updateable():
516 update_cmd = 'type youtube-dl -U to update'
517 else:
518 update_cmd = 'see https://yt-dl.org/update on how to update'
519 msg = '; please report this issue on https://yt-dl.org/bug .'
520 msg += ' Make sure you are using the latest version; %s.' % update_cmd
521 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
522 return msg
523
524
1c256f70
PH
525class ExtractorError(Exception):
526 """Error during info extraction."""
5f6a1245 527
d11271dd 528 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
529 """ tb, if given, is the original traceback (so that it can be printed out).
530 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
531 """
532
533 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
534 expected = True
d11271dd
PH
535 if video_id is not None:
536 msg = video_id + ': ' + msg
410f3e73 537 if cause:
28e614de 538 msg += ' (caused by %r)' % cause
9a82b238 539 if not expected:
08f2a92c 540 msg += bug_reports_message()
1c256f70 541 super(ExtractorError, self).__init__(msg)
d5979c5d 542
1c256f70 543 self.traceback = tb
8cc83b8d 544 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 545 self.cause = cause
d11271dd 546 self.video_id = video_id
1c256f70 547
01951dda
PH
548 def format_traceback(self):
549 if self.traceback is None:
550 return None
28e614de 551 return ''.join(traceback.format_tb(self.traceback))
01951dda 552
1c256f70 553
416c7fcb
PH
554class UnsupportedError(ExtractorError):
555 def __init__(self, url):
556 super(UnsupportedError, self).__init__(
557 'Unsupported URL: %s' % url, expected=True)
558 self.url = url
559
560
55b3e45b
JMF
561class RegexNotFoundError(ExtractorError):
562 """Error when a regex didn't match"""
563 pass
564
565
d77c3dfd 566class DownloadError(Exception):
59ae15a5 567 """Download Error exception.
d77c3dfd 568
59ae15a5
PH
569 This exception may be thrown by FileDownloader objects if they are not
570 configured to continue on errors. They will contain the appropriate
571 error message.
572 """
5f6a1245 573
8cc83b8d
FV
574 def __init__(self, msg, exc_info=None):
575 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
576 super(DownloadError, self).__init__(msg)
577 self.exc_info = exc_info
d77c3dfd
FV
578
579
580class SameFileError(Exception):
59ae15a5 581 """Same File exception.
d77c3dfd 582
59ae15a5
PH
583 This exception will be thrown by FileDownloader objects if they detect
584 multiple files would have to be downloaded to the same file on disk.
585 """
586 pass
d77c3dfd
FV
587
588
589class PostProcessingError(Exception):
59ae15a5 590 """Post Processing exception.
d77c3dfd 591
59ae15a5
PH
592 This exception may be raised by PostProcessor's .run() method to
593 indicate an error in the postprocessing task.
594 """
5f6a1245 595
7851b379
PH
596 def __init__(self, msg):
597 self.msg = msg
d77c3dfd 598
5f6a1245 599
d77c3dfd 600class MaxDownloadsReached(Exception):
59ae15a5
PH
601 """ --max-downloads limit has been reached. """
602 pass
d77c3dfd
FV
603
604
605class UnavailableVideoError(Exception):
59ae15a5 606 """Unavailable Format exception.
d77c3dfd 607
59ae15a5
PH
608 This exception will be thrown when a video is requested
609 in a format that is not available for that video.
610 """
611 pass
d77c3dfd
FV
612
613
614class ContentTooShortError(Exception):
59ae15a5 615 """Content Too Short exception.
d77c3dfd 616
59ae15a5
PH
617 This exception may be raised by FileDownloader objects when a file they
618 download is too small for what the server announced first, indicating
619 the connection was probably interrupted.
620 """
d77c3dfd 621
59ae15a5 622 def __init__(self, downloaded, expected):
2c7ed247 623 # Both in bytes
59ae15a5
PH
624 self.downloaded = downloaded
625 self.expected = expected
d77c3dfd 626
5f6a1245 627
c5a59d93 628def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
629 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
630 # expected HTTP responses to meet HTTP/1.0 or later (see also
631 # https://github.com/rg3/youtube-dl/issues/6727)
632 if sys.version_info < (3, 0):
5a1a2e94 633 kwargs[b'strict'] = True
be4a824d
PH
634 hc = http_class(*args, **kwargs)
635 source_address = ydl_handler._params.get('source_address')
636 if source_address is not None:
637 sa = (source_address, 0)
638 if hasattr(hc, 'source_address'): # Python 2.7+
639 hc.source_address = sa
640 else: # Python 2.6
641 def _hc_connect(self, *args, **kwargs):
642 sock = compat_socket_create_connection(
643 (self.host, self.port), self.timeout, sa)
644 if is_https:
d7932313
PH
645 self.sock = ssl.wrap_socket(
646 sock, self.key_file, self.cert_file,
647 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
648 else:
649 self.sock = sock
650 hc.connect = functools.partial(_hc_connect, hc)
651
652 return hc
653
654
acebc9cd 655class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
656 """Handler for HTTP requests and responses.
657
658 This class, when installed with an OpenerDirector, automatically adds
659 the standard headers to every HTTP request and handles gzipped and
660 deflated responses from web servers. If compression is to be avoided in
661 a particular request, the original request in the program code only has
662 to include the HTTP header "Youtubedl-No-Compression", which will be
663 removed before making the real request.
664
665 Part of this code was copied from:
666
667 http://techknack.net/python-urllib2-handlers/
668
669 Andrew Rowls, the author of that code, agreed to release it to the
670 public domain.
671 """
672
be4a824d
PH
673 def __init__(self, params, *args, **kwargs):
674 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
675 self._params = params
676
677 def http_open(self, req):
678 return self.do_open(functools.partial(
c5a59d93 679 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
680 req)
681
59ae15a5
PH
682 @staticmethod
683 def deflate(data):
684 try:
685 return zlib.decompress(data, -zlib.MAX_WBITS)
686 except zlib.error:
687 return zlib.decompress(data)
688
689 @staticmethod
690 def addinfourl_wrapper(stream, headers, url, code):
691 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
692 return compat_urllib_request.addinfourl(stream, headers, url, code)
693 ret = compat_urllib_request.addinfourl(stream, headers, url)
694 ret.code = code
695 return ret
696
acebc9cd 697 def http_request(self, req):
51f267d9
S
698 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
699 # always respected by websites, some tend to give out URLs with non percent-encoded
700 # non-ASCII characters (see telemb.py, ard.py [#3412])
701 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
702 # To work around aforementioned issue we will replace request's original URL with
703 # percent-encoded one
704 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
705 # the code of this workaround has been moved here from YoutubeDL.urlopen()
706 url = req.get_full_url()
707 url_escaped = escape_url(url)
708
709 # Substitute URL if any change after escaping
710 if url != url_escaped:
711 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
712 new_req = req_type(
713 url_escaped, data=req.data, headers=req.headers,
714 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
715 new_req.timeout = req.timeout
716 req = new_req
717
33ac271b 718 for h, v in std_headers.items():
3d5f7a39
JK
719 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
720 # The dict keys are capitalized because of this bug by urllib
721 if h.capitalize() not in req.headers:
33ac271b 722 req.add_header(h, v)
59ae15a5
PH
723 if 'Youtubedl-no-compression' in req.headers:
724 if 'Accept-encoding' in req.headers:
725 del req.headers['Accept-encoding']
726 del req.headers['Youtubedl-no-compression']
989b4b2b
PH
727
728 if sys.version_info < (2, 7) and '#' in req.get_full_url():
729 # Python 2.6 is brain-dead when it comes to fragments
730 req._Request__original = req._Request__original.partition('#')[0]
731 req._Request__r_type = req._Request__r_type.partition('#')[0]
732
59ae15a5
PH
733 return req
734
acebc9cd 735 def http_response(self, req, resp):
59ae15a5
PH
736 old_resp = resp
737 # gzip
738 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
739 content = resp.read()
740 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
741 try:
742 uncompressed = io.BytesIO(gz.read())
743 except IOError as original_ioerror:
744 # There may be junk add the end of the file
745 # See http://stackoverflow.com/q/4928560/35070 for details
746 for i in range(1, 1024):
747 try:
748 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
749 uncompressed = io.BytesIO(gz.read())
750 except IOError:
751 continue
752 break
753 else:
754 raise original_ioerror
755 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
756 resp.msg = old_resp.msg
757 # deflate
758 if resp.headers.get('Content-encoding', '') == 'deflate':
759 gz = io.BytesIO(self.deflate(resp.read()))
760 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
761 resp.msg = old_resp.msg
ad729172
S
762 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
763 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
764 if 300 <= resp.code < 400:
765 location = resp.headers.get('Location')
766 if location:
767 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
768 if sys.version_info >= (3, 0):
769 location = location.encode('iso-8859-1').decode('utf-8')
770 location_escaped = escape_url(location)
771 if location != location_escaped:
772 del resp.headers['Location']
773 resp.headers['Location'] = location_escaped
59ae15a5 774 return resp
0f8d03f8 775
acebc9cd
PH
776 https_request = http_request
777 https_response = http_response
bf50b038 778
5de90176 779
be4a824d
PH
780class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
781 def __init__(self, params, https_conn_class=None, *args, **kwargs):
782 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
783 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
784 self._params = params
785
786 def https_open(self, req):
4f264c02
JMF
787 kwargs = {}
788 if hasattr(self, '_context'): # python > 2.6
789 kwargs['context'] = self._context
790 if hasattr(self, '_check_hostname'): # python 3.x
791 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
792 return self.do_open(functools.partial(
793 _create_http_connection, self, self._https_conn_class, True),
4f264c02 794 req, **kwargs)
be4a824d
PH
795
796
a6420bf5
S
797class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
798 def __init__(self, cookiejar=None):
799 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
800
801 def http_response(self, request, response):
802 # Python 2 will choke on next HTTP request in row if there are non-ASCII
803 # characters in Set-Cookie HTTP header of last response (see
804 # https://github.com/rg3/youtube-dl/issues/6769).
805 # In order to at least prevent crashing we will percent encode Set-Cookie
806 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
807 # if sys.version_info < (3, 0) and response.headers:
808 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
809 # set_cookie = response.headers.get(set_cookie_header)
810 # if set_cookie:
811 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
812 # if set_cookie != set_cookie_escaped:
813 # del response.headers[set_cookie_header]
814 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
815 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
816
817 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
818 https_response = http_response
819
820
08b38d54 821def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
822 """ Return a UNIX timestamp from the given date """
823
824 if date_str is None:
825 return None
826
52c3a6e4
S
827 date_str = re.sub(r'\.[0-9]+', '', date_str)
828
08b38d54
PH
829 if timezone is None:
830 m = re.search(
52c3a6e4 831 r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
08b38d54
PH
832 date_str)
833 if not m:
912b38b4
PH
834 timezone = datetime.timedelta()
835 else:
08b38d54
PH
836 date_str = date_str[:-len(m.group(0))]
837 if not m.group('sign'):
838 timezone = datetime.timedelta()
839 else:
840 sign = 1 if m.group('sign') == '+' else -1
841 timezone = datetime.timedelta(
842 hours=sign * int(m.group('hours')),
843 minutes=sign * int(m.group('minutes')))
52c3a6e4
S
844 try:
845 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
846 dt = datetime.datetime.strptime(date_str, date_format) - timezone
847 return calendar.timegm(dt.timetuple())
848 except ValueError:
849 pass
912b38b4
PH
850
851
42bdd9d0 852def unified_strdate(date_str, day_first=True):
bf50b038 853 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
854
855 if date_str is None:
856 return None
bf50b038 857 upload_date = None
5f6a1245 858 # Replace commas
026fcc04 859 date_str = date_str.replace(',', ' ')
bf50b038 860 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
861 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
862 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 863 # Remove AM/PM + timezone
9bb8e0a3 864 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 865
19e1d359
JMF
866 format_expressions = [
867 '%d %B %Y',
0f99566c 868 '%d %b %Y',
19e1d359
JMF
869 '%B %d %Y',
870 '%b %d %Y',
78ff59d0
PP
871 '%b %dst %Y %I:%M%p',
872 '%b %dnd %Y %I:%M%p',
873 '%b %dth %Y %I:%M%p',
a69801e2 874 '%Y %m %d',
19e1d359 875 '%Y-%m-%d',
fe556f1b 876 '%Y/%m/%d',
19e1d359 877 '%Y/%m/%d %H:%M:%S',
5d73273f 878 '%Y-%m-%d %H:%M:%S',
e9be9a6a 879 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 880 '%d.%m.%Y %H:%M',
b047de6f 881 '%d.%m.%Y %H.%M',
19e1d359 882 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
883 '%Y-%m-%dT%H:%M:%S.%fZ',
884 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 885 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 886 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 887 '%Y-%m-%dT%H:%M',
19e1d359 888 ]
42bdd9d0
PH
889 if day_first:
890 format_expressions.extend([
79c21abb 891 '%d-%m-%Y',
776dc399
S
892 '%d.%m.%Y',
893 '%d/%m/%Y',
894 '%d/%m/%y',
42bdd9d0
PH
895 '%d/%m/%Y %H:%M:%S',
896 ])
897 else:
898 format_expressions.extend([
79c21abb 899 '%m-%d-%Y',
776dc399
S
900 '%m.%d.%Y',
901 '%m/%d/%Y',
902 '%m/%d/%y',
42bdd9d0
PH
903 '%m/%d/%Y %H:%M:%S',
904 ])
bf50b038
JMF
905 for expression in format_expressions:
906 try:
907 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 908 except ValueError:
bf50b038 909 pass
42393ce2
PH
910 if upload_date is None:
911 timetuple = email.utils.parsedate_tz(date_str)
912 if timetuple:
913 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
ae12bc3e 914 return compat_str(upload_date)
bf50b038 915
5f6a1245 916
28e614de 917def determine_ext(url, default_ext='unknown_video'):
f4776371
S
918 if url is None:
919 return default_ext
28e614de 920 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
921 if re.match(r'^[A-Za-z0-9]+$', guess):
922 return guess
923 else:
cbdbb766 924 return default_ext
73e79f2a 925
5f6a1245 926
d4051a8e 927def subtitles_filename(filename, sub_lang, sub_format):
28e614de 928 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 929
5f6a1245 930
bd558525 931def date_from_str(date_str):
37254abc
JMF
932 """
933 Return a datetime object from a string in the format YYYYMMDD or
934 (now|today)[+-][0-9](day|week|month|year)(s)?"""
935 today = datetime.date.today()
f8795e10 936 if date_str in ('now', 'today'):
37254abc 937 return today
f8795e10
PH
938 if date_str == 'yesterday':
939 return today - datetime.timedelta(days=1)
37254abc
JMF
940 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
941 if match is not None:
942 sign = match.group('sign')
943 time = int(match.group('time'))
944 if sign == '-':
945 time = -time
946 unit = match.group('unit')
5f6a1245 947 # A bad aproximation?
37254abc
JMF
948 if unit == 'month':
949 unit = 'day'
950 time *= 30
951 elif unit == 'year':
952 unit = 'day'
953 time *= 365
954 unit += 's'
955 delta = datetime.timedelta(**{unit: time})
956 return today + delta
bd558525 957 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
5f6a1245
JW
958
959
e63fc1be 960def hyphenate_date(date_str):
961 """
962 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
963 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
964 if match is not None:
965 return '-'.join(match.groups())
966 else:
967 return date_str
968
5f6a1245 969
bd558525
JMF
970class DateRange(object):
971 """Represents a time interval between two dates"""
5f6a1245 972
bd558525
JMF
973 def __init__(self, start=None, end=None):
974 """start and end must be strings in the format accepted by date"""
975 if start is not None:
976 self.start = date_from_str(start)
977 else:
978 self.start = datetime.datetime.min.date()
979 if end is not None:
980 self.end = date_from_str(end)
981 else:
982 self.end = datetime.datetime.max.date()
37254abc 983 if self.start > self.end:
bd558525 984 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 985
bd558525
JMF
986 @classmethod
987 def day(cls, day):
988 """Returns a range that only contains the given day"""
5f6a1245
JW
989 return cls(day, day)
990
bd558525
JMF
991 def __contains__(self, date):
992 """Check if the date is in the range"""
37254abc
JMF
993 if not isinstance(date, datetime.date):
994 date = date_from_str(date)
995 return self.start <= date <= self.end
5f6a1245 996
bd558525 997 def __str__(self):
5f6a1245 998 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
999
1000
1001def platform_name():
1002 """ Returns the platform name as a compat_str """
1003 res = platform.platform()
1004 if isinstance(res, bytes):
1005 res = res.decode(preferredencoding())
1006
1007 assert isinstance(res, compat_str)
1008 return res
c257baff
PH
1009
1010
b58ddb32
PH
1011def _windows_write_string(s, out):
1012 """ Returns True if the string was written using special methods,
1013 False if it has yet to be written out."""
1014 # Adapted from http://stackoverflow.com/a/3259271/35070
1015
1016 import ctypes
1017 import ctypes.wintypes
1018
1019 WIN_OUTPUT_IDS = {
1020 1: -11,
1021 2: -12,
1022 }
1023
a383a98a
PH
1024 try:
1025 fileno = out.fileno()
1026 except AttributeError:
1027 # If the output stream doesn't have a fileno, it's virtual
1028 return False
aa42e873
PH
1029 except io.UnsupportedOperation:
1030 # Some strange Windows pseudo files?
1031 return False
b58ddb32
PH
1032 if fileno not in WIN_OUTPUT_IDS:
1033 return False
1034
e2f89ec7 1035 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1036 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
6ac4e806 1037 (b"GetStdHandle", ctypes.windll.kernel32))
b58ddb32
PH
1038 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1039
e2f89ec7 1040 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1041 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1042 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
6ac4e806 1043 ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
b58ddb32
PH
1044 written = ctypes.wintypes.DWORD(0)
1045
6ac4e806 1046 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
b58ddb32
PH
1047 FILE_TYPE_CHAR = 0x0002
1048 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1049 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1050 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1051 ctypes.POINTER(ctypes.wintypes.DWORD))(
6ac4e806 1052 (b"GetConsoleMode", ctypes.windll.kernel32))
b58ddb32
PH
1053 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1054
1055 def not_a_console(handle):
1056 if handle == INVALID_HANDLE_VALUE or handle is None:
1057 return True
8fb3ac36
PH
1058 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1059 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1060
1061 if not_a_console(h):
1062 return False
1063
d1b9c912
PH
1064 def next_nonbmp_pos(s):
1065 try:
1066 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1067 except StopIteration:
1068 return len(s)
1069
1070 while s:
1071 count = min(next_nonbmp_pos(s), 1024)
1072
b58ddb32 1073 ret = WriteConsoleW(
d1b9c912 1074 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1075 if ret == 0:
1076 raise OSError('Failed to write string')
d1b9c912
PH
1077 if not count: # We just wrote a non-BMP character
1078 assert written.value == 2
1079 s = s[1:]
1080 else:
1081 assert written.value > 0
1082 s = s[written.value:]
b58ddb32
PH
1083 return True
1084
1085
734f90bb 1086def write_string(s, out=None, encoding=None):
7459e3a2
PH
1087 if out is None:
1088 out = sys.stderr
8bf48f23 1089 assert type(s) == compat_str
7459e3a2 1090
b58ddb32
PH
1091 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1092 if _windows_write_string(s, out):
1093 return
1094
7459e3a2
PH
1095 if ('b' in getattr(out, 'mode', '') or
1096 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1097 byt = s.encode(encoding or preferredencoding(), 'ignore')
1098 out.write(byt)
1099 elif hasattr(out, 'buffer'):
1100 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1101 byt = s.encode(enc, 'ignore')
1102 out.buffer.write(byt)
1103 else:
8bf48f23 1104 out.write(s)
7459e3a2
PH
1105 out.flush()
1106
1107
48ea9cea
PH
1108def bytes_to_intlist(bs):
1109 if not bs:
1110 return []
1111 if isinstance(bs[0], int): # Python 3
1112 return list(bs)
1113 else:
1114 return [ord(c) for c in bs]
1115
c257baff 1116
cba892fa 1117def intlist_to_bytes(xs):
1118 if not xs:
1119 return b''
eb4157fd 1120 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1121
1122
c1c9a79c
PH
1123# Cross-platform file locking
1124if sys.platform == 'win32':
1125 import ctypes.wintypes
1126 import msvcrt
1127
1128 class OVERLAPPED(ctypes.Structure):
1129 _fields_ = [
1130 ('Internal', ctypes.wintypes.LPVOID),
1131 ('InternalHigh', ctypes.wintypes.LPVOID),
1132 ('Offset', ctypes.wintypes.DWORD),
1133 ('OffsetHigh', ctypes.wintypes.DWORD),
1134 ('hEvent', ctypes.wintypes.HANDLE),
1135 ]
1136
1137 kernel32 = ctypes.windll.kernel32
1138 LockFileEx = kernel32.LockFileEx
1139 LockFileEx.argtypes = [
1140 ctypes.wintypes.HANDLE, # hFile
1141 ctypes.wintypes.DWORD, # dwFlags
1142 ctypes.wintypes.DWORD, # dwReserved
1143 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1144 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1145 ctypes.POINTER(OVERLAPPED) # Overlapped
1146 ]
1147 LockFileEx.restype = ctypes.wintypes.BOOL
1148 UnlockFileEx = kernel32.UnlockFileEx
1149 UnlockFileEx.argtypes = [
1150 ctypes.wintypes.HANDLE, # hFile
1151 ctypes.wintypes.DWORD, # dwReserved
1152 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1153 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1154 ctypes.POINTER(OVERLAPPED) # Overlapped
1155 ]
1156 UnlockFileEx.restype = ctypes.wintypes.BOOL
1157 whole_low = 0xffffffff
1158 whole_high = 0x7fffffff
1159
1160 def _lock_file(f, exclusive):
1161 overlapped = OVERLAPPED()
1162 overlapped.Offset = 0
1163 overlapped.OffsetHigh = 0
1164 overlapped.hEvent = 0
1165 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1166 handle = msvcrt.get_osfhandle(f.fileno())
1167 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1168 whole_low, whole_high, f._lock_file_overlapped_p):
1169 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1170
1171 def _unlock_file(f):
1172 assert f._lock_file_overlapped_p
1173 handle = msvcrt.get_osfhandle(f.fileno())
1174 if not UnlockFileEx(handle, 0,
1175 whole_low, whole_high, f._lock_file_overlapped_p):
1176 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1177
1178else:
1179 import fcntl
1180
1181 def _lock_file(f, exclusive):
2582bebe 1182 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1183
1184 def _unlock_file(f):
2582bebe 1185 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1186
1187
1188class locked_file(object):
1189 def __init__(self, filename, mode, encoding=None):
1190 assert mode in ['r', 'a', 'w']
1191 self.f = io.open(filename, mode, encoding=encoding)
1192 self.mode = mode
1193
1194 def __enter__(self):
1195 exclusive = self.mode != 'r'
1196 try:
1197 _lock_file(self.f, exclusive)
1198 except IOError:
1199 self.f.close()
1200 raise
1201 return self
1202
1203 def __exit__(self, etype, value, traceback):
1204 try:
1205 _unlock_file(self.f)
1206 finally:
1207 self.f.close()
1208
1209 def __iter__(self):
1210 return iter(self.f)
1211
1212 def write(self, *args):
1213 return self.f.write(*args)
1214
1215 def read(self, *args):
1216 return self.f.read(*args)
4eb7f1d1
JMF
1217
1218
4644ac55
S
1219def get_filesystem_encoding():
1220 encoding = sys.getfilesystemencoding()
1221 return encoding if encoding is not None else 'utf-8'
1222
1223
4eb7f1d1 1224def shell_quote(args):
a6a173c2 1225 quoted_args = []
4644ac55 1226 encoding = get_filesystem_encoding()
a6a173c2
JMF
1227 for a in args:
1228 if isinstance(a, bytes):
1229 # We may get a filename encoded with 'encodeFilename'
1230 a = a.decode(encoding)
1231 quoted_args.append(pipes.quote(a))
28e614de 1232 return ' '.join(quoted_args)
9d4660ca
PH
1233
1234
1235def smuggle_url(url, data):
1236 """ Pass additional data in a URL for internal use. """
1237
1238 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1239 {'__youtubedl_smuggle': json.dumps(data)})
1240 return url + '#' + sdata
9d4660ca
PH
1241
1242
79f82953 1243def unsmuggle_url(smug_url, default=None):
83e865a3 1244 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1245 return smug_url, default
28e614de
PH
1246 url, _, sdata = smug_url.rpartition('#')
1247 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1248 data = json.loads(jsond)
1249 return url, data
02dbf93f
PH
1250
1251
02dbf93f
PH
1252def format_bytes(bytes):
1253 if bytes is None:
28e614de 1254 return 'N/A'
02dbf93f
PH
1255 if type(bytes) is str:
1256 bytes = float(bytes)
1257 if bytes == 0.0:
1258 exponent = 0
1259 else:
1260 exponent = int(math.log(bytes, 1024.0))
28e614de 1261 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1262 converted = float(bytes) / float(1024 ** exponent)
28e614de 1263 return '%.2f%s' % (converted, suffix)
f53c966a 1264
1c088fa8 1265
be64b5b0
PH
1266def parse_filesize(s):
1267 if s is None:
1268 return None
1269
1270 # The lower-case forms are of course incorrect and inofficial,
1271 # but we support those too
1272 _UNIT_TABLE = {
1273 'B': 1,
1274 'b': 1,
1275 'KiB': 1024,
1276 'KB': 1000,
1277 'kB': 1024,
1278 'Kb': 1000,
1279 'MiB': 1024 ** 2,
1280 'MB': 1000 ** 2,
1281 'mB': 1024 ** 2,
1282 'Mb': 1000 ** 2,
1283 'GiB': 1024 ** 3,
1284 'GB': 1000 ** 3,
1285 'gB': 1024 ** 3,
1286 'Gb': 1000 ** 3,
1287 'TiB': 1024 ** 4,
1288 'TB': 1000 ** 4,
1289 'tB': 1024 ** 4,
1290 'Tb': 1000 ** 4,
1291 'PiB': 1024 ** 5,
1292 'PB': 1000 ** 5,
1293 'pB': 1024 ** 5,
1294 'Pb': 1000 ** 5,
1295 'EiB': 1024 ** 6,
1296 'EB': 1000 ** 6,
1297 'eB': 1024 ** 6,
1298 'Eb': 1000 ** 6,
1299 'ZiB': 1024 ** 7,
1300 'ZB': 1000 ** 7,
1301 'zB': 1024 ** 7,
1302 'Zb': 1000 ** 7,
1303 'YiB': 1024 ** 8,
1304 'YB': 1000 ** 8,
1305 'yB': 1024 ** 8,
1306 'Yb': 1000 ** 8,
1307 }
1308
1309 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1310 m = re.match(
1311 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1312 if not m:
1313 return None
1314
4349c07d
PH
1315 num_str = m.group('num').replace(',', '.')
1316 mult = _UNIT_TABLE[m.group('unit')]
1317 return int(float(num_str) * mult)
be64b5b0
PH
1318
1319
caefb1de
PH
1320def month_by_name(name):
1321 """ Return the number of a month by (locale-independently) English name """
1322
caefb1de 1323 try:
7105440c
YCH
1324 return ENGLISH_MONTH_NAMES.index(name) + 1
1325 except ValueError:
1326 return None
1327
1328
1329def month_by_abbreviation(abbrev):
1330 """ Return the number of a month by (locale-independently) English
1331 abbreviations """
1332
1333 try:
1334 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1335 except ValueError:
1336 return None
18258362
JMF
1337
1338
5aafe895 1339def fix_xml_ampersands(xml_str):
18258362 1340 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1341 return re.sub(
1342 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1343 '&amp;',
5aafe895 1344 xml_str)
e3946f98
PH
1345
1346
1347def setproctitle(title):
8bf48f23 1348 assert isinstance(title, compat_str)
e3946f98
PH
1349 try:
1350 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1351 except OSError:
1352 return
6eefe533
PH
1353 title_bytes = title.encode('utf-8')
1354 buf = ctypes.create_string_buffer(len(title_bytes))
1355 buf.value = title_bytes
e3946f98 1356 try:
6eefe533 1357 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1358 except AttributeError:
1359 return # Strange libc, just skip this
d7dda168
PH
1360
1361
1362def remove_start(s, start):
1363 if s.startswith(start):
1364 return s[len(start):]
1365 return s
29eb5174
PH
1366
1367
2b9faf55
PH
1368def remove_end(s, end):
1369 if s.endswith(end):
1370 return s[:-len(end)]
1371 return s
1372
1373
29eb5174 1374def url_basename(url):
9b8aaeed 1375 path = compat_urlparse.urlparse(url).path
28e614de 1376 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1377
1378
1379class HEADRequest(compat_urllib_request.Request):
1380 def get_method(self):
1381 return "HEAD"
7217e148
PH
1382
1383
9732d77e 1384def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1385 if get_attr:
1386 if v is not None:
1387 v = getattr(v, get_attr, None)
9572013d
PH
1388 if v == '':
1389 v = None
1812afb7
S
1390 if v is None:
1391 return default
1392 try:
1393 return int(v) * invscale // scale
1394 except ValueError:
af98f8ff 1395 return default
9732d77e 1396
9572013d 1397
40a90862
JMF
1398def str_or_none(v, default=None):
1399 return default if v is None else compat_str(v)
1400
9732d77e
PH
1401
1402def str_to_int(int_str):
48d4681e 1403 """ A more relaxed version of int_or_none """
9732d77e
PH
1404 if int_str is None:
1405 return None
28e614de 1406 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1407 return int(int_str)
608d11f5
PH
1408
1409
9732d77e 1410def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1411 if v is None:
1412 return default
1413 try:
1414 return float(v) * invscale / scale
1415 except ValueError:
1416 return default
43f775e4
PH
1417
1418
608d11f5 1419def parse_duration(s):
8f9312c3 1420 if not isinstance(s, compat_basestring):
608d11f5
PH
1421 return None
1422
ca7b3246
S
1423 s = s.strip()
1424
608d11f5 1425 m = re.match(
9d22a7df 1426 r'''(?ix)(?:P?T)?
e8df5cee 1427 (?:
9c29bc69 1428 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1429 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1430
9c29bc69 1431 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1432 (?:
8f4b58d7
PH
1433 (?:
1434 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1435 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1436 )?
6a68bb57
PH
1437 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1438 )?
e8df5cee
PH
1439 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1440 )$''', s)
608d11f5
PH
1441 if not m:
1442 return None
e8df5cee
PH
1443 res = 0
1444 if m.group('only_mins'):
1445 return float_or_none(m.group('only_mins'), invscale=60)
1446 if m.group('only_hours'):
1447 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1448 if m.group('secs'):
1449 res += int(m.group('secs'))
3e675fab
PH
1450 if m.group('mins_reversed'):
1451 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1452 if m.group('mins'):
1453 res += int(m.group('mins')) * 60
e8df5cee
PH
1454 if m.group('hours'):
1455 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1456 if m.group('hours_reversed'):
1457 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1458 if m.group('days'):
1459 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1460 if m.group('ms'):
1461 res += float(m.group('ms'))
608d11f5 1462 return res
91d7d0b3
JMF
1463
1464
e65e4c88 1465def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1466 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1467 return (
1468 '{0}.{1}{2}'.format(name, ext, real_ext)
1469 if not expected_real_ext or real_ext[1:] == expected_real_ext
1470 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1471
1472
b3ed15b7
S
1473def replace_extension(filename, ext, expected_real_ext=None):
1474 name, real_ext = os.path.splitext(filename)
1475 return '{0}.{1}'.format(
1476 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1477 ext)
1478
1479
d70ad093
PH
1480def check_executable(exe, args=[]):
1481 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1482 args can be a list of arguments for a short output (like -version) """
1483 try:
1484 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1485 except OSError:
1486 return False
1487 return exe
b7ab0590
PH
1488
1489
95807118 1490def get_exe_version(exe, args=['--version'],
cae97f65 1491 version_re=None, unrecognized='present'):
95807118
PH
1492 """ Returns the version of the specified executable,
1493 or False if the executable is not present """
1494 try:
cae97f65 1495 out, _ = subprocess.Popen(
54116803 1496 [encodeArgument(exe)] + args,
95807118
PH
1497 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1498 except OSError:
1499 return False
cae97f65
PH
1500 if isinstance(out, bytes): # Python 2.x
1501 out = out.decode('ascii', 'ignore')
1502 return detect_exe_version(out, version_re, unrecognized)
1503
1504
1505def detect_exe_version(output, version_re=None, unrecognized='present'):
1506 assert isinstance(output, compat_str)
1507 if version_re is None:
1508 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1509 m = re.search(version_re, output)
95807118
PH
1510 if m:
1511 return m.group(1)
1512 else:
1513 return unrecognized
1514
1515
b7ab0590 1516class PagedList(object):
dd26ced1
PH
1517 def __len__(self):
1518 # This is only useful for tests
1519 return len(self.getslice())
1520
9c44d242
PH
1521
1522class OnDemandPagedList(PagedList):
1523 def __init__(self, pagefunc, pagesize):
1524 self._pagefunc = pagefunc
1525 self._pagesize = pagesize
1526
b7ab0590
PH
1527 def getslice(self, start=0, end=None):
1528 res = []
1529 for pagenum in itertools.count(start // self._pagesize):
1530 firstid = pagenum * self._pagesize
1531 nextfirstid = pagenum * self._pagesize + self._pagesize
1532 if start >= nextfirstid:
1533 continue
1534
1535 page_results = list(self._pagefunc(pagenum))
1536
1537 startv = (
1538 start % self._pagesize
1539 if firstid <= start < nextfirstid
1540 else 0)
1541
1542 endv = (
1543 ((end - 1) % self._pagesize) + 1
1544 if (end is not None and firstid <= end <= nextfirstid)
1545 else None)
1546
1547 if startv != 0 or endv is not None:
1548 page_results = page_results[startv:endv]
1549 res.extend(page_results)
1550
1551 # A little optimization - if current page is not "full", ie. does
1552 # not contain page_size videos then we can assume that this page
1553 # is the last one - there are no more ids on further pages -
1554 # i.e. no need to query again.
1555 if len(page_results) + startv < self._pagesize:
1556 break
1557
1558 # If we got the whole page, but the next page is not interesting,
1559 # break out early as well
1560 if end == nextfirstid:
1561 break
1562 return res
81c2f20b
PH
1563
1564
9c44d242
PH
1565class InAdvancePagedList(PagedList):
1566 def __init__(self, pagefunc, pagecount, pagesize):
1567 self._pagefunc = pagefunc
1568 self._pagecount = pagecount
1569 self._pagesize = pagesize
1570
1571 def getslice(self, start=0, end=None):
1572 res = []
1573 start_page = start // self._pagesize
1574 end_page = (
1575 self._pagecount if end is None else (end // self._pagesize + 1))
1576 skip_elems = start - start_page * self._pagesize
1577 only_more = None if end is None else end - start
1578 for pagenum in range(start_page, end_page):
1579 page = list(self._pagefunc(pagenum))
1580 if skip_elems:
1581 page = page[skip_elems:]
1582 skip_elems = None
1583 if only_more is not None:
1584 if len(page) < only_more:
1585 only_more -= len(page)
1586 else:
1587 page = page[:only_more]
1588 res.extend(page)
1589 break
1590 res.extend(page)
1591 return res
1592
1593
81c2f20b 1594def uppercase_escape(s):
676eb3f2 1595 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1596 return re.sub(
a612753d 1597 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1598 lambda m: unicode_escape(m.group(0))[0],
1599 s)
0fe2ff78
YCH
1600
1601
1602def lowercase_escape(s):
1603 unicode_escape = codecs.getdecoder('unicode_escape')
1604 return re.sub(
1605 r'\\u[0-9a-fA-F]{4}',
1606 lambda m: unicode_escape(m.group(0))[0],
1607 s)
b53466e1 1608
d05cfe06
S
1609
1610def escape_rfc3986(s):
1611 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1612 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1613 s = s.encode('utf-8')
ecc0c5ee 1614 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1615
1616
1617def escape_url(url):
1618 """Escape URL as suggested by RFC 3986"""
1619 url_parsed = compat_urllib_parse_urlparse(url)
1620 return url_parsed._replace(
1621 path=escape_rfc3986(url_parsed.path),
1622 params=escape_rfc3986(url_parsed.params),
1623 query=escape_rfc3986(url_parsed.query),
1624 fragment=escape_rfc3986(url_parsed.fragment)
1625 ).geturl()
1626
b53466e1 1627try:
28e614de 1628 struct.pack('!I', 0)
b53466e1
PH
1629except TypeError:
1630 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1631 def struct_pack(spec, *args):
1632 if isinstance(spec, compat_str):
1633 spec = spec.encode('ascii')
1634 return struct.pack(spec, *args)
1635
1636 def struct_unpack(spec, *args):
1637 if isinstance(spec, compat_str):
1638 spec = spec.encode('ascii')
1639 return struct.unpack(spec, *args)
1640else:
1641 struct_pack = struct.pack
1642 struct_unpack = struct.unpack
62e609ab
PH
1643
1644
1645def read_batch_urls(batch_fd):
1646 def fixup(url):
1647 if not isinstance(url, compat_str):
1648 url = url.decode('utf-8', 'replace')
28e614de 1649 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1650 if url.startswith(BOM_UTF8):
1651 url = url[len(BOM_UTF8):]
1652 url = url.strip()
1653 if url.startswith(('#', ';', ']')):
1654 return False
1655 return url
1656
1657 with contextlib.closing(batch_fd) as fd:
1658 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1659
1660
1661def urlencode_postdata(*args, **kargs):
1662 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1663
1664
16392824
S
1665def encode_dict(d, encoding='utf-8'):
1666 return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
1667
1668
a1a530b0
PH
1669US_RATINGS = {
1670 'G': 0,
1671 'PG': 10,
1672 'PG-13': 13,
1673 'R': 16,
1674 'NC': 18,
1675}
fac55558
PH
1676
1677
146c80e2
S
1678def parse_age_limit(s):
1679 if s is None:
d838b1bd 1680 return None
146c80e2 1681 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1682 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1683
1684
fac55558 1685def strip_jsonp(code):
609a61e3
PH
1686 return re.sub(
1687 r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1688
1689
e05f6939
PH
1690def js_to_json(code):
1691 def fix_kv(m):
e7b6d122
PH
1692 v = m.group(0)
1693 if v in ('true', 'false', 'null'):
1694 return v
1695 if v.startswith('"'):
d01949dc
S
1696 v = re.sub(r"\\'", "'", v[1:-1])
1697 elif v.startswith("'"):
e7b6d122
PH
1698 v = v[1:-1]
1699 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1700 '\\\\': '\\\\',
1701 "\\'": "'",
1702 '"': '\\"',
1703 }[m.group(0)], v)
1704 return '"%s"' % v
e05f6939
PH
1705
1706 res = re.sub(r'''(?x)
d305dd73
PH
1707 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1708 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1709 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1710 ''', fix_kv, code)
ba9e68f4 1711 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1712 return res
1713
1714
478c2c61
PH
1715def qualities(quality_ids):
1716 """ Get a numeric quality value out of a list of possible values """
1717 def q(qid):
1718 try:
1719 return quality_ids.index(qid)
1720 except ValueError:
1721 return -1
1722 return q
1723
acd69589
PH
1724
1725DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1726
a020a0dc
PH
1727
1728def limit_length(s, length):
1729 """ Add ellipses to overly long strings """
1730 if s is None:
1731 return None
1732 ELLIPSES = '...'
1733 if len(s) > length:
1734 return s[:length - len(ELLIPSES)] + ELLIPSES
1735 return s
48844745
PH
1736
1737
1738def version_tuple(v):
5f9b8394 1739 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1740
1741
1742def is_outdated_version(version, limit, assume_new=True):
1743 if not version:
1744 return not assume_new
1745 try:
1746 return version_tuple(version) < version_tuple(limit)
1747 except ValueError:
1748 return not assume_new
732ea2f0
PH
1749
1750
1751def ytdl_is_updateable():
1752 """ Returns if youtube-dl can be updated with -U """
1753 from zipimport import zipimporter
1754
1755 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1756
1757
1758def args_to_str(args):
1759 # Get a short string representation for a subprocess command
1760 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1761
1762
c460bdd5
PH
1763def mimetype2ext(mt):
1764 _, _, res = mt.rpartition('/')
1765
1766 return {
1767 'x-ms-wmv': 'wmv',
1768 'x-mp4-fragmented': 'mp4',
ecee5724 1769 'ttml+xml': 'ttml',
c460bdd5
PH
1770 }.get(res, res)
1771
1772
2ccd1b10
PH
1773def urlhandle_detect_ext(url_handle):
1774 try:
1775 url_handle.headers
1776 getheader = lambda h: url_handle.headers[h]
1777 except AttributeError: # Python < 3
1778 getheader = url_handle.info().getheader
1779
b55ee18f
PH
1780 cd = getheader('Content-Disposition')
1781 if cd:
1782 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1783 if m:
1784 e = determine_ext(m.group('filename'), default_ext=None)
1785 if e:
1786 return e
1787
c460bdd5 1788 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1789
1790
1e399778
YCH
1791def encode_data_uri(data, mime_type):
1792 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1793
1794
05900629
PH
1795def age_restricted(content_limit, age_limit):
1796 """ Returns True iff the content should be blocked """
1797
1798 if age_limit is None: # No limit set
1799 return False
1800 if content_limit is None:
1801 return False # Content available for everyone
1802 return age_limit < content_limit
61ca9a80
PH
1803
1804
1805def is_html(first_bytes):
1806 """ Detect whether a file contains HTML by examining its first bytes. """
1807
1808 BOMS = [
1809 (b'\xef\xbb\xbf', 'utf-8'),
1810 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1811 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1812 (b'\xff\xfe', 'utf-16-le'),
1813 (b'\xfe\xff', 'utf-16-be'),
1814 ]
1815 for bom, enc in BOMS:
1816 if first_bytes.startswith(bom):
1817 s = first_bytes[len(bom):].decode(enc, 'replace')
1818 break
1819 else:
1820 s = first_bytes.decode('utf-8', 'replace')
1821
1822 return re.match(r'^\s*<', s)
a055469f
PH
1823
1824
1825def determine_protocol(info_dict):
1826 protocol = info_dict.get('protocol')
1827 if protocol is not None:
1828 return protocol
1829
1830 url = info_dict['url']
1831 if url.startswith('rtmp'):
1832 return 'rtmp'
1833 elif url.startswith('mms'):
1834 return 'mms'
1835 elif url.startswith('rtsp'):
1836 return 'rtsp'
1837
1838 ext = determine_ext(url)
1839 if ext == 'm3u8':
1840 return 'm3u8'
1841 elif ext == 'f4m':
1842 return 'f4m'
1843
1844 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1845
1846
1847def render_table(header_row, data):
1848 """ Render a list of rows, each as a list of values """
1849 table = [header_row] + data
1850 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1851 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1852 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1853
1854
1855def _match_one(filter_part, dct):
1856 COMPARISON_OPERATORS = {
1857 '<': operator.lt,
1858 '<=': operator.le,
1859 '>': operator.gt,
1860 '>=': operator.ge,
1861 '=': operator.eq,
1862 '!=': operator.ne,
1863 }
1864 operator_rex = re.compile(r'''(?x)\s*
1865 (?P<key>[a-z_]+)
1866 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1867 (?:
1868 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1869 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1870 )
1871 \s*$
1872 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1873 m = operator_rex.search(filter_part)
1874 if m:
1875 op = COMPARISON_OPERATORS[m.group('op')]
1876 if m.group('strval') is not None:
1877 if m.group('op') not in ('=', '!='):
1878 raise ValueError(
1879 'Operator %s does not support string values!' % m.group('op'))
1880 comparison_value = m.group('strval')
1881 else:
1882 try:
1883 comparison_value = int(m.group('intval'))
1884 except ValueError:
1885 comparison_value = parse_filesize(m.group('intval'))
1886 if comparison_value is None:
1887 comparison_value = parse_filesize(m.group('intval') + 'B')
1888 if comparison_value is None:
1889 raise ValueError(
1890 'Invalid integer value %r in filter part %r' % (
1891 m.group('intval'), filter_part))
1892 actual_value = dct.get(m.group('key'))
1893 if actual_value is None:
1894 return m.group('none_inclusive')
1895 return op(actual_value, comparison_value)
1896
1897 UNARY_OPERATORS = {
1898 '': lambda v: v is not None,
1899 '!': lambda v: v is None,
1900 }
1901 operator_rex = re.compile(r'''(?x)\s*
1902 (?P<op>%s)\s*(?P<key>[a-z_]+)
1903 \s*$
1904 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1905 m = operator_rex.search(filter_part)
1906 if m:
1907 op = UNARY_OPERATORS[m.group('op')]
1908 actual_value = dct.get(m.group('key'))
1909 return op(actual_value)
1910
1911 raise ValueError('Invalid filter part %r' % filter_part)
1912
1913
1914def match_str(filter_str, dct):
1915 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
1916
1917 return all(
1918 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
1919
1920
1921def match_filter_func(filter_str):
1922 def _match_func(info_dict):
1923 if match_str(filter_str, info_dict):
1924 return None
1925 else:
1926 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1927 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
1928 return _match_func
91410c9b
PH
1929
1930
bf6427d2
YCH
1931def parse_dfxp_time_expr(time_expr):
1932 if not time_expr:
1933 return 0.0
1934
1935 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
1936 if mobj:
1937 return float(mobj.group('time_offset'))
1938
1939 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
1940 if mobj:
1941 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
1942
1943
c1c924ab
YCH
1944def srt_subtitles_timecode(seconds):
1945 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
1946
1947
1948def dfxp2srt(dfxp_data):
4e335771
YCH
1949 _x = functools.partial(xpath_with_ns, ns_map={
1950 'ttml': 'http://www.w3.org/ns/ttml',
1951 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
1952 })
bf6427d2
YCH
1953
1954 def parse_node(node):
1955 str_or_empty = functools.partial(str_or_none, default='')
1956
1957 out = str_or_empty(node.text)
1958
1959 for child in node:
4e335771 1960 if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
bf6427d2 1961 out += '\n' + str_or_empty(child.tail)
4e335771 1962 elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
bf6427d2
YCH
1963 out += str_or_empty(parse_node(child))
1964 else:
1965 out += str_or_empty(xml.etree.ElementTree.tostring(child))
1966
1967 return out
1968
36e6f62c 1969 dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
bf6427d2 1970 out = []
4e335771 1971 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
1972
1973 if not paras:
1974 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
1975
1976 for para, index in zip(paras, itertools.count(1)):
7dff0363
YCH
1977 begin_time = parse_dfxp_time_expr(para.attrib['begin'])
1978 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
1979 if not end_time:
1980 end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
bf6427d2
YCH
1981 out.append('%d\n%s --> %s\n%s\n\n' % (
1982 index,
c1c924ab
YCH
1983 srt_subtitles_timecode(begin_time),
1984 srt_subtitles_timecode(end_time),
bf6427d2
YCH
1985 parse_node(para)))
1986
1987 return ''.join(out)
1988
1989
66e289ba
S
1990def cli_option(params, command_option, param):
1991 param = params.get(param)
1992 return [command_option, param] if param is not None else []
1993
1994
1995def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
1996 param = params.get(param)
1997 assert isinstance(param, bool)
1998 if separator:
1999 return [command_option + separator + (true_value if param else false_value)]
2000 return [command_option, true_value if param else false_value]
2001
2002
2003def cli_valueless_option(params, command_option, param, expected_value=True):
2004 param = params.get(param)
2005 return [command_option] if param == expected_value else []
2006
2007
2008def cli_configuration_args(params, param, default=[]):
2009 ex_args = params.get(param)
2010 if ex_args is None:
2011 return default
2012 assert isinstance(ex_args, list)
2013 return ex_args
2014
2015
39672624
YCH
2016class ISO639Utils(object):
2017 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2018 _lang_map = {
2019 'aa': 'aar',
2020 'ab': 'abk',
2021 'ae': 'ave',
2022 'af': 'afr',
2023 'ak': 'aka',
2024 'am': 'amh',
2025 'an': 'arg',
2026 'ar': 'ara',
2027 'as': 'asm',
2028 'av': 'ava',
2029 'ay': 'aym',
2030 'az': 'aze',
2031 'ba': 'bak',
2032 'be': 'bel',
2033 'bg': 'bul',
2034 'bh': 'bih',
2035 'bi': 'bis',
2036 'bm': 'bam',
2037 'bn': 'ben',
2038 'bo': 'bod',
2039 'br': 'bre',
2040 'bs': 'bos',
2041 'ca': 'cat',
2042 'ce': 'che',
2043 'ch': 'cha',
2044 'co': 'cos',
2045 'cr': 'cre',
2046 'cs': 'ces',
2047 'cu': 'chu',
2048 'cv': 'chv',
2049 'cy': 'cym',
2050 'da': 'dan',
2051 'de': 'deu',
2052 'dv': 'div',
2053 'dz': 'dzo',
2054 'ee': 'ewe',
2055 'el': 'ell',
2056 'en': 'eng',
2057 'eo': 'epo',
2058 'es': 'spa',
2059 'et': 'est',
2060 'eu': 'eus',
2061 'fa': 'fas',
2062 'ff': 'ful',
2063 'fi': 'fin',
2064 'fj': 'fij',
2065 'fo': 'fao',
2066 'fr': 'fra',
2067 'fy': 'fry',
2068 'ga': 'gle',
2069 'gd': 'gla',
2070 'gl': 'glg',
2071 'gn': 'grn',
2072 'gu': 'guj',
2073 'gv': 'glv',
2074 'ha': 'hau',
2075 'he': 'heb',
2076 'hi': 'hin',
2077 'ho': 'hmo',
2078 'hr': 'hrv',
2079 'ht': 'hat',
2080 'hu': 'hun',
2081 'hy': 'hye',
2082 'hz': 'her',
2083 'ia': 'ina',
2084 'id': 'ind',
2085 'ie': 'ile',
2086 'ig': 'ibo',
2087 'ii': 'iii',
2088 'ik': 'ipk',
2089 'io': 'ido',
2090 'is': 'isl',
2091 'it': 'ita',
2092 'iu': 'iku',
2093 'ja': 'jpn',
2094 'jv': 'jav',
2095 'ka': 'kat',
2096 'kg': 'kon',
2097 'ki': 'kik',
2098 'kj': 'kua',
2099 'kk': 'kaz',
2100 'kl': 'kal',
2101 'km': 'khm',
2102 'kn': 'kan',
2103 'ko': 'kor',
2104 'kr': 'kau',
2105 'ks': 'kas',
2106 'ku': 'kur',
2107 'kv': 'kom',
2108 'kw': 'cor',
2109 'ky': 'kir',
2110 'la': 'lat',
2111 'lb': 'ltz',
2112 'lg': 'lug',
2113 'li': 'lim',
2114 'ln': 'lin',
2115 'lo': 'lao',
2116 'lt': 'lit',
2117 'lu': 'lub',
2118 'lv': 'lav',
2119 'mg': 'mlg',
2120 'mh': 'mah',
2121 'mi': 'mri',
2122 'mk': 'mkd',
2123 'ml': 'mal',
2124 'mn': 'mon',
2125 'mr': 'mar',
2126 'ms': 'msa',
2127 'mt': 'mlt',
2128 'my': 'mya',
2129 'na': 'nau',
2130 'nb': 'nob',
2131 'nd': 'nde',
2132 'ne': 'nep',
2133 'ng': 'ndo',
2134 'nl': 'nld',
2135 'nn': 'nno',
2136 'no': 'nor',
2137 'nr': 'nbl',
2138 'nv': 'nav',
2139 'ny': 'nya',
2140 'oc': 'oci',
2141 'oj': 'oji',
2142 'om': 'orm',
2143 'or': 'ori',
2144 'os': 'oss',
2145 'pa': 'pan',
2146 'pi': 'pli',
2147 'pl': 'pol',
2148 'ps': 'pus',
2149 'pt': 'por',
2150 'qu': 'que',
2151 'rm': 'roh',
2152 'rn': 'run',
2153 'ro': 'ron',
2154 'ru': 'rus',
2155 'rw': 'kin',
2156 'sa': 'san',
2157 'sc': 'srd',
2158 'sd': 'snd',
2159 'se': 'sme',
2160 'sg': 'sag',
2161 'si': 'sin',
2162 'sk': 'slk',
2163 'sl': 'slv',
2164 'sm': 'smo',
2165 'sn': 'sna',
2166 'so': 'som',
2167 'sq': 'sqi',
2168 'sr': 'srp',
2169 'ss': 'ssw',
2170 'st': 'sot',
2171 'su': 'sun',
2172 'sv': 'swe',
2173 'sw': 'swa',
2174 'ta': 'tam',
2175 'te': 'tel',
2176 'tg': 'tgk',
2177 'th': 'tha',
2178 'ti': 'tir',
2179 'tk': 'tuk',
2180 'tl': 'tgl',
2181 'tn': 'tsn',
2182 'to': 'ton',
2183 'tr': 'tur',
2184 'ts': 'tso',
2185 'tt': 'tat',
2186 'tw': 'twi',
2187 'ty': 'tah',
2188 'ug': 'uig',
2189 'uk': 'ukr',
2190 'ur': 'urd',
2191 'uz': 'uzb',
2192 've': 'ven',
2193 'vi': 'vie',
2194 'vo': 'vol',
2195 'wa': 'wln',
2196 'wo': 'wol',
2197 'xh': 'xho',
2198 'yi': 'yid',
2199 'yo': 'yor',
2200 'za': 'zha',
2201 'zh': 'zho',
2202 'zu': 'zul',
2203 }
2204
2205 @classmethod
2206 def short2long(cls, code):
2207 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2208 return cls._lang_map.get(code[:2])
2209
2210 @classmethod
2211 def long2short(cls, code):
2212 """Convert language code from ISO 639-2/T to ISO 639-1"""
2213 for short_name, long_name in cls._lang_map.items():
2214 if long_name == code:
2215 return short_name
2216
2217
4eb10f66
YCH
2218class ISO3166Utils(object):
2219 # From http://data.okfn.org/data/core/country-list
2220 _country_map = {
2221 'AF': 'Afghanistan',
2222 'AX': 'Åland Islands',
2223 'AL': 'Albania',
2224 'DZ': 'Algeria',
2225 'AS': 'American Samoa',
2226 'AD': 'Andorra',
2227 'AO': 'Angola',
2228 'AI': 'Anguilla',
2229 'AQ': 'Antarctica',
2230 'AG': 'Antigua and Barbuda',
2231 'AR': 'Argentina',
2232 'AM': 'Armenia',
2233 'AW': 'Aruba',
2234 'AU': 'Australia',
2235 'AT': 'Austria',
2236 'AZ': 'Azerbaijan',
2237 'BS': 'Bahamas',
2238 'BH': 'Bahrain',
2239 'BD': 'Bangladesh',
2240 'BB': 'Barbados',
2241 'BY': 'Belarus',
2242 'BE': 'Belgium',
2243 'BZ': 'Belize',
2244 'BJ': 'Benin',
2245 'BM': 'Bermuda',
2246 'BT': 'Bhutan',
2247 'BO': 'Bolivia, Plurinational State of',
2248 'BQ': 'Bonaire, Sint Eustatius and Saba',
2249 'BA': 'Bosnia and Herzegovina',
2250 'BW': 'Botswana',
2251 'BV': 'Bouvet Island',
2252 'BR': 'Brazil',
2253 'IO': 'British Indian Ocean Territory',
2254 'BN': 'Brunei Darussalam',
2255 'BG': 'Bulgaria',
2256 'BF': 'Burkina Faso',
2257 'BI': 'Burundi',
2258 'KH': 'Cambodia',
2259 'CM': 'Cameroon',
2260 'CA': 'Canada',
2261 'CV': 'Cape Verde',
2262 'KY': 'Cayman Islands',
2263 'CF': 'Central African Republic',
2264 'TD': 'Chad',
2265 'CL': 'Chile',
2266 'CN': 'China',
2267 'CX': 'Christmas Island',
2268 'CC': 'Cocos (Keeling) Islands',
2269 'CO': 'Colombia',
2270 'KM': 'Comoros',
2271 'CG': 'Congo',
2272 'CD': 'Congo, the Democratic Republic of the',
2273 'CK': 'Cook Islands',
2274 'CR': 'Costa Rica',
2275 'CI': 'Côte d\'Ivoire',
2276 'HR': 'Croatia',
2277 'CU': 'Cuba',
2278 'CW': 'Curaçao',
2279 'CY': 'Cyprus',
2280 'CZ': 'Czech Republic',
2281 'DK': 'Denmark',
2282 'DJ': 'Djibouti',
2283 'DM': 'Dominica',
2284 'DO': 'Dominican Republic',
2285 'EC': 'Ecuador',
2286 'EG': 'Egypt',
2287 'SV': 'El Salvador',
2288 'GQ': 'Equatorial Guinea',
2289 'ER': 'Eritrea',
2290 'EE': 'Estonia',
2291 'ET': 'Ethiopia',
2292 'FK': 'Falkland Islands (Malvinas)',
2293 'FO': 'Faroe Islands',
2294 'FJ': 'Fiji',
2295 'FI': 'Finland',
2296 'FR': 'France',
2297 'GF': 'French Guiana',
2298 'PF': 'French Polynesia',
2299 'TF': 'French Southern Territories',
2300 'GA': 'Gabon',
2301 'GM': 'Gambia',
2302 'GE': 'Georgia',
2303 'DE': 'Germany',
2304 'GH': 'Ghana',
2305 'GI': 'Gibraltar',
2306 'GR': 'Greece',
2307 'GL': 'Greenland',
2308 'GD': 'Grenada',
2309 'GP': 'Guadeloupe',
2310 'GU': 'Guam',
2311 'GT': 'Guatemala',
2312 'GG': 'Guernsey',
2313 'GN': 'Guinea',
2314 'GW': 'Guinea-Bissau',
2315 'GY': 'Guyana',
2316 'HT': 'Haiti',
2317 'HM': 'Heard Island and McDonald Islands',
2318 'VA': 'Holy See (Vatican City State)',
2319 'HN': 'Honduras',
2320 'HK': 'Hong Kong',
2321 'HU': 'Hungary',
2322 'IS': 'Iceland',
2323 'IN': 'India',
2324 'ID': 'Indonesia',
2325 'IR': 'Iran, Islamic Republic of',
2326 'IQ': 'Iraq',
2327 'IE': 'Ireland',
2328 'IM': 'Isle of Man',
2329 'IL': 'Israel',
2330 'IT': 'Italy',
2331 'JM': 'Jamaica',
2332 'JP': 'Japan',
2333 'JE': 'Jersey',
2334 'JO': 'Jordan',
2335 'KZ': 'Kazakhstan',
2336 'KE': 'Kenya',
2337 'KI': 'Kiribati',
2338 'KP': 'Korea, Democratic People\'s Republic of',
2339 'KR': 'Korea, Republic of',
2340 'KW': 'Kuwait',
2341 'KG': 'Kyrgyzstan',
2342 'LA': 'Lao People\'s Democratic Republic',
2343 'LV': 'Latvia',
2344 'LB': 'Lebanon',
2345 'LS': 'Lesotho',
2346 'LR': 'Liberia',
2347 'LY': 'Libya',
2348 'LI': 'Liechtenstein',
2349 'LT': 'Lithuania',
2350 'LU': 'Luxembourg',
2351 'MO': 'Macao',
2352 'MK': 'Macedonia, the Former Yugoslav Republic of',
2353 'MG': 'Madagascar',
2354 'MW': 'Malawi',
2355 'MY': 'Malaysia',
2356 'MV': 'Maldives',
2357 'ML': 'Mali',
2358 'MT': 'Malta',
2359 'MH': 'Marshall Islands',
2360 'MQ': 'Martinique',
2361 'MR': 'Mauritania',
2362 'MU': 'Mauritius',
2363 'YT': 'Mayotte',
2364 'MX': 'Mexico',
2365 'FM': 'Micronesia, Federated States of',
2366 'MD': 'Moldova, Republic of',
2367 'MC': 'Monaco',
2368 'MN': 'Mongolia',
2369 'ME': 'Montenegro',
2370 'MS': 'Montserrat',
2371 'MA': 'Morocco',
2372 'MZ': 'Mozambique',
2373 'MM': 'Myanmar',
2374 'NA': 'Namibia',
2375 'NR': 'Nauru',
2376 'NP': 'Nepal',
2377 'NL': 'Netherlands',
2378 'NC': 'New Caledonia',
2379 'NZ': 'New Zealand',
2380 'NI': 'Nicaragua',
2381 'NE': 'Niger',
2382 'NG': 'Nigeria',
2383 'NU': 'Niue',
2384 'NF': 'Norfolk Island',
2385 'MP': 'Northern Mariana Islands',
2386 'NO': 'Norway',
2387 'OM': 'Oman',
2388 'PK': 'Pakistan',
2389 'PW': 'Palau',
2390 'PS': 'Palestine, State of',
2391 'PA': 'Panama',
2392 'PG': 'Papua New Guinea',
2393 'PY': 'Paraguay',
2394 'PE': 'Peru',
2395 'PH': 'Philippines',
2396 'PN': 'Pitcairn',
2397 'PL': 'Poland',
2398 'PT': 'Portugal',
2399 'PR': 'Puerto Rico',
2400 'QA': 'Qatar',
2401 'RE': 'Réunion',
2402 'RO': 'Romania',
2403 'RU': 'Russian Federation',
2404 'RW': 'Rwanda',
2405 'BL': 'Saint Barthélemy',
2406 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2407 'KN': 'Saint Kitts and Nevis',
2408 'LC': 'Saint Lucia',
2409 'MF': 'Saint Martin (French part)',
2410 'PM': 'Saint Pierre and Miquelon',
2411 'VC': 'Saint Vincent and the Grenadines',
2412 'WS': 'Samoa',
2413 'SM': 'San Marino',
2414 'ST': 'Sao Tome and Principe',
2415 'SA': 'Saudi Arabia',
2416 'SN': 'Senegal',
2417 'RS': 'Serbia',
2418 'SC': 'Seychelles',
2419 'SL': 'Sierra Leone',
2420 'SG': 'Singapore',
2421 'SX': 'Sint Maarten (Dutch part)',
2422 'SK': 'Slovakia',
2423 'SI': 'Slovenia',
2424 'SB': 'Solomon Islands',
2425 'SO': 'Somalia',
2426 'ZA': 'South Africa',
2427 'GS': 'South Georgia and the South Sandwich Islands',
2428 'SS': 'South Sudan',
2429 'ES': 'Spain',
2430 'LK': 'Sri Lanka',
2431 'SD': 'Sudan',
2432 'SR': 'Suriname',
2433 'SJ': 'Svalbard and Jan Mayen',
2434 'SZ': 'Swaziland',
2435 'SE': 'Sweden',
2436 'CH': 'Switzerland',
2437 'SY': 'Syrian Arab Republic',
2438 'TW': 'Taiwan, Province of China',
2439 'TJ': 'Tajikistan',
2440 'TZ': 'Tanzania, United Republic of',
2441 'TH': 'Thailand',
2442 'TL': 'Timor-Leste',
2443 'TG': 'Togo',
2444 'TK': 'Tokelau',
2445 'TO': 'Tonga',
2446 'TT': 'Trinidad and Tobago',
2447 'TN': 'Tunisia',
2448 'TR': 'Turkey',
2449 'TM': 'Turkmenistan',
2450 'TC': 'Turks and Caicos Islands',
2451 'TV': 'Tuvalu',
2452 'UG': 'Uganda',
2453 'UA': 'Ukraine',
2454 'AE': 'United Arab Emirates',
2455 'GB': 'United Kingdom',
2456 'US': 'United States',
2457 'UM': 'United States Minor Outlying Islands',
2458 'UY': 'Uruguay',
2459 'UZ': 'Uzbekistan',
2460 'VU': 'Vanuatu',
2461 'VE': 'Venezuela, Bolivarian Republic of',
2462 'VN': 'Viet Nam',
2463 'VG': 'Virgin Islands, British',
2464 'VI': 'Virgin Islands, U.S.',
2465 'WF': 'Wallis and Futuna',
2466 'EH': 'Western Sahara',
2467 'YE': 'Yemen',
2468 'ZM': 'Zambia',
2469 'ZW': 'Zimbabwe',
2470 }
2471
2472 @classmethod
2473 def short2full(cls, code):
2474 """Convert an ISO 3166-2 country code to the corresponding full name"""
2475 return cls._country_map.get(code.upper())
2476
2477
91410c9b 2478class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2479 def __init__(self, proxies=None):
2480 # Set default handlers
2481 for type in ('http', 'https'):
2482 setattr(self, '%s_open' % type,
2483 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2484 meth(r, proxy, type))
2485 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2486
91410c9b 2487 def proxy_open(self, req, proxy, type):
2461f79d 2488 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2489 if req_proxy is not None:
2490 proxy = req_proxy
2461f79d
PH
2491 del req.headers['Ytdl-request-proxy']
2492
2493 if proxy == '__noproxy__':
2494 return None # No Proxy
91410c9b
PH
2495 return compat_urllib_request.ProxyHandler.proxy_open(
2496 self, req, proxy, type)