]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[utils] Jython support: tolerate missing fcntl module
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
f45c185f 14import errno
be4a824d 15import functools
d77c3dfd 16import gzip
b7ab0590 17import itertools
03f9daab 18import io
f4bfd65f 19import json
d77c3dfd 20import locale
02dbf93f 21import math
347de493 22import operator
d77c3dfd 23import os
4eb7f1d1 24import pipes
c496ca96 25import platform
d77c3dfd 26import re
13ebea79 27import ssl
c496ca96 28import socket
b53466e1 29import struct
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
8f9312c3 38 compat_basestring,
8c25f81b 39 compat_chr,
36e6f62c 40 compat_etree_fromstring,
8c25f81b 41 compat_html_entities,
be4a824d 42 compat_http_client,
c86b6142 43 compat_kwargs,
8c25f81b 44 compat_parse_qs,
be4a824d 45 compat_socket_create_connection,
8c25f81b
PH
46 compat_str,
47 compat_urllib_error,
48 compat_urllib_parse,
49 compat_urllib_parse_urlparse,
50 compat_urllib_request,
51 compat_urlparse,
7d4111ed 52 shlex_quote,
8c25f81b 53)
4644ac55
S
54
55
468e2e92
FV
56# This is not clearly defined otherwise
57compiled_regex_type = type(re.compile(''))
58
3e669f36 59std_headers = {
9c7b3898 60 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
59ae15a5
PH
61 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
62 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
63 'Accept-Encoding': 'gzip, deflate',
64 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 65}
f427df17 66
5f6a1245 67
bf42a990
S
68NO_DEFAULT = object()
69
7105440c
YCH
70ENGLISH_MONTH_NAMES = [
71 'January', 'February', 'March', 'April', 'May', 'June',
72 'July', 'August', 'September', 'October', 'November', 'December']
73
a7aaa398
S
74KNOWN_EXTENSIONS = (
75 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
76 'flv', 'f4v', 'f4a', 'f4b',
77 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
78 'mkv', 'mka', 'mk3d',
79 'avi', 'divx',
80 'mov',
81 'asf', 'wmv', 'wma',
82 '3gp', '3g2',
83 'mp3',
84 'flac',
85 'ape',
86 'wav',
87 'f4f', 'f4m', 'm3u8', 'smil')
88
7105440c 89
d77c3dfd 90def preferredencoding():
59ae15a5 91 """Get preferred encoding.
d77c3dfd 92
59ae15a5
PH
93 Returns the best encoding scheme for the system, based on
94 locale.getpreferredencoding() and some further tweaks.
95 """
96 try:
97 pref = locale.getpreferredencoding()
28e614de 98 'TEST'.encode(pref)
70a1165b 99 except Exception:
59ae15a5 100 pref = 'UTF-8'
bae611f2 101
59ae15a5 102 return pref
d77c3dfd 103
f4bfd65f 104
181c8655 105def write_json_file(obj, fn):
1394646a 106 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 107
92120217 108 fn = encodeFilename(fn)
61ee5aeb 109 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
110 encoding = get_filesystem_encoding()
111 # os.path.basename returns a bytes object, but NamedTemporaryFile
112 # will fail if the filename contains non ascii characters unless we
113 # use a unicode object
114 path_basename = lambda f: os.path.basename(fn).decode(encoding)
115 # the same for os.path.dirname
116 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
117 else:
118 path_basename = os.path.basename
119 path_dirname = os.path.dirname
120
73159f99
S
121 args = {
122 'suffix': '.tmp',
ec5f6016
JMF
123 'prefix': path_basename(fn) + '.',
124 'dir': path_dirname(fn),
73159f99
S
125 'delete': False,
126 }
127
181c8655
PH
128 # In Python 2.x, json.dump expects a bytestream.
129 # In Python 3.x, it writes to a character stream
130 if sys.version_info < (3, 0):
73159f99 131 args['mode'] = 'wb'
181c8655 132 else:
73159f99
S
133 args.update({
134 'mode': 'w',
135 'encoding': 'utf-8',
136 })
137
c86b6142 138 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
139
140 try:
141 with tf:
142 json.dump(obj, tf)
1394646a
IK
143 if sys.platform == 'win32':
144 # Need to remove existing file on Windows, else os.rename raises
145 # WindowsError or FileExistsError.
146 try:
147 os.unlink(fn)
148 except OSError:
149 pass
181c8655 150 os.rename(tf.name, fn)
70a1165b 151 except Exception:
181c8655
PH
152 try:
153 os.remove(tf.name)
154 except OSError:
155 pass
156 raise
157
158
159if sys.version_info >= (2, 7):
ee114368 160 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 161 """ Find the xpath xpath[@key=val] """
5d2354f1 162 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
163 if val:
164 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
165 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
166 return node.find(expr)
167else:
ee114368 168 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
169 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
170 # .//node does not match if a node is a direct child of . !
8f9312c3 171 if isinstance(xpath, compat_str):
4eefbfdb
PH
172 xpath = xpath.encode('ascii')
173
59ae56fa 174 for f in node.findall(xpath):
ee114368
S
175 if key not in f.attrib:
176 continue
177 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
178 return f
179 return None
180
d7e66d39
JMF
181# On python2.6 the xml.etree.ElementTree.Element methods don't support
182# the namespace parameter
5f6a1245
JW
183
184
d7e66d39
JMF
185def xpath_with_ns(path, ns_map):
186 components = [c.split(':') for c in path.split('/')]
187 replaced = []
188 for c in components:
189 if len(c) == 1:
190 replaced.append(c[0])
191 else:
192 ns, tag = c
193 replaced.append('{%s}%s' % (ns_map[ns], tag))
194 return '/'.join(replaced)
195
d77c3dfd 196
a41fb80c 197def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745
S
198 def _find_xpath(xpath):
199 if sys.version_info < (2, 7): # Crazy 2.6
200 xpath = xpath.encode('ascii')
201 return node.find(xpath)
202
203 if isinstance(xpath, (str, compat_str)):
204 n = _find_xpath(xpath)
205 else:
206 for xp in xpath:
207 n = _find_xpath(xp)
208 if n is not None:
209 break
d74bebd5 210
8e636da4 211 if n is None:
bf42a990
S
212 if default is not NO_DEFAULT:
213 return default
214 elif fatal:
bf0ff932
PH
215 name = xpath if name is None else name
216 raise ExtractorError('Could not find XML element %s' % name)
217 else:
218 return None
a41fb80c
S
219 return n
220
221
222def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
223 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
224 if n is None or n == default:
225 return n
226 if n.text is None:
227 if default is not NO_DEFAULT:
228 return default
229 elif fatal:
230 name = xpath if name is None else name
231 raise ExtractorError('Could not find XML element\'s text %s' % name)
232 else:
233 return None
234 return n.text
a41fb80c
S
235
236
237def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
238 n = find_xpath_attr(node, xpath, key)
239 if n is None:
240 if default is not NO_DEFAULT:
241 return default
242 elif fatal:
243 name = '%s[@%s]' % (xpath, key) if name is None else name
244 raise ExtractorError('Could not find XML attribute %s' % name)
245 else:
246 return None
247 return n.attrib[key]
bf0ff932
PH
248
249
9e6dd238 250def get_element_by_id(id, html):
43e8fafd 251 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 252 return get_element_by_attribute('id', id, html)
43e8fafd 253
12ea2f30 254
43e8fafd
ND
255def get_element_by_attribute(attribute, value, html):
256 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 257
38285056
PH
258 m = re.search(r'''(?xs)
259 <([a-zA-Z0-9:._-]+)
260 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
261 \s+%s=['"]?%s['"]?
262 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
263 \s*>
264 (?P<content>.*?)
265 </\1>
266 ''' % (re.escape(attribute), re.escape(value)), html)
267
268 if not m:
269 return None
270 res = m.group('content')
271
272 if res.startswith('"') or res.startswith("'"):
273 res = res[1:-1]
a921f407 274
38285056 275 return unescapeHTML(res)
a921f407 276
9e6dd238
FV
277
278def clean_html(html):
59ae15a5 279 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
280
281 if html is None: # Convenience for sanitizing descriptions etc.
282 return html
283
59ae15a5
PH
284 # Newline vs <br />
285 html = html.replace('\n', ' ')
6b3aef80
FV
286 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
287 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
288 # Strip html tags
289 html = re.sub('<.*?>', '', html)
290 # Replace html entities
291 html = unescapeHTML(html)
7decf895 292 return html.strip()
9e6dd238
FV
293
294
d77c3dfd 295def sanitize_open(filename, open_mode):
59ae15a5
PH
296 """Try to open the given filename, and slightly tweak it if this fails.
297
298 Attempts to open the given filename. If this fails, it tries to change
299 the filename slightly, step by step, until it's either able to open it
300 or it fails and raises a final exception, like the standard open()
301 function.
302
303 It returns the tuple (stream, definitive_file_name).
304 """
305 try:
28e614de 306 if filename == '-':
59ae15a5
PH
307 if sys.platform == 'win32':
308 import msvcrt
309 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 310 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
311 stream = open(encodeFilename(filename), open_mode)
312 return (stream, filename)
313 except (IOError, OSError) as err:
f45c185f
PH
314 if err.errno in (errno.EACCES,):
315 raise
59ae15a5 316
f45c185f 317 # In case of error, try to remove win32 forbidden chars
d55de57b 318 alt_filename = sanitize_path(filename)
f45c185f
PH
319 if alt_filename == filename:
320 raise
321 else:
322 # An exception here should be caught in the caller
d55de57b 323 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 324 return (stream, alt_filename)
d77c3dfd
FV
325
326
327def timeconvert(timestr):
59ae15a5
PH
328 """Convert RFC 2822 defined time string into system timestamp"""
329 timestamp = None
330 timetuple = email.utils.parsedate_tz(timestr)
331 if timetuple is not None:
332 timestamp = email.utils.mktime_tz(timetuple)
333 return timestamp
1c469a94 334
5f6a1245 335
796173d0 336def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
337 """Sanitizes a string so it could be used as part of a filename.
338 If restricted is set, use a stricter subset of allowed characters.
796173d0 339 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
340 """
341 def replace_insane(char):
342 if char == '?' or ord(char) < 32 or ord(char) == 127:
343 return ''
344 elif char == '"':
345 return '' if restricted else '\''
346 elif char == ':':
347 return '_-' if restricted else ' -'
348 elif char in '\\/|*<>':
349 return '_'
627dcfff 350 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
351 return '_'
352 if restricted and ord(char) > 127:
353 return '_'
354 return char
355
2aeb06d6
PH
356 # Handle timestamps
357 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 358 result = ''.join(map(replace_insane, s))
796173d0
PH
359 if not is_id:
360 while '__' in result:
361 result = result.replace('__', '_')
362 result = result.strip('_')
363 # Common case of "Foreign band name - English song title"
364 if restricted and result.startswith('-_'):
365 result = result[2:]
5a42414b
PH
366 if result.startswith('-'):
367 result = '_' + result[len('-'):]
a7440261 368 result = result.lstrip('.')
796173d0
PH
369 if not result:
370 result = '_'
59ae15a5 371 return result
d77c3dfd 372
5f6a1245 373
a2aaf4db
S
374def sanitize_path(s):
375 """Sanitizes and normalizes path on Windows"""
376 if sys.platform != 'win32':
377 return s
be531ef1
S
378 drive_or_unc, _ = os.path.splitdrive(s)
379 if sys.version_info < (2, 7) and not drive_or_unc:
380 drive_or_unc, _ = os.path.splitunc(s)
381 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
382 if drive_or_unc:
a2aaf4db
S
383 norm_path.pop(0)
384 sanitized_path = [
c90d16cf 385 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
a2aaf4db 386 for path_part in norm_path]
be531ef1
S
387 if drive_or_unc:
388 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
389 return os.path.join(*sanitized_path)
390
391
67dda517
S
392# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
393# unwanted failures due to missing protocol
394def sanitized_Request(url, *args, **kwargs):
395 return compat_urllib_request.Request(
396 'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
397
398
d77c3dfd 399def orderedSet(iterable):
59ae15a5
PH
400 """ Remove all duplicates from the input iterable """
401 res = []
402 for el in iterable:
403 if el not in res:
404 res.append(el)
405 return res
d77c3dfd 406
912b38b4 407
4e408e47
PH
408def _htmlentity_transform(entity):
409 """Transforms an HTML entity to a character."""
410 # Known non-numeric HTML entity
411 if entity in compat_html_entities.name2codepoint:
412 return compat_chr(compat_html_entities.name2codepoint[entity])
413
91757b0f 414 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
415 if mobj is not None:
416 numstr = mobj.group(1)
28e614de 417 if numstr.startswith('x'):
4e408e47 418 base = 16
28e614de 419 numstr = '0%s' % numstr
4e408e47
PH
420 else:
421 base = 10
7aefc49c
S
422 # See https://github.com/rg3/youtube-dl/issues/7518
423 try:
424 return compat_chr(int(numstr, base))
425 except ValueError:
426 pass
4e408e47
PH
427
428 # Unknown entity in name, return its literal representation
7a3f0c00 429 return '&%s;' % entity
4e408e47
PH
430
431
d77c3dfd 432def unescapeHTML(s):
912b38b4
PH
433 if s is None:
434 return None
435 assert type(s) == compat_str
d77c3dfd 436
4e408e47
PH
437 return re.sub(
438 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 439
8bf48f23 440
aa49acd1
S
441def get_subprocess_encoding():
442 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
443 # For subprocess calls, encode with locale encoding
444 # Refer to http://stackoverflow.com/a/9951851/35070
445 encoding = preferredencoding()
446 else:
447 encoding = sys.getfilesystemencoding()
448 if encoding is None:
449 encoding = 'utf-8'
450 return encoding
451
452
8bf48f23 453def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
454 """
455 @param s The name of the file
456 """
d77c3dfd 457
8bf48f23 458 assert type(s) == compat_str
d77c3dfd 459
59ae15a5
PH
460 # Python 3 has a Unicode API
461 if sys.version_info >= (3, 0):
462 return s
0f00efed 463
aa49acd1
S
464 # Pass '' directly to use Unicode APIs on Windows 2000 and up
465 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
466 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
467 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
468 return s
469
470 return s.encode(get_subprocess_encoding(), 'ignore')
471
472
473def decodeFilename(b, for_subprocess=False):
474
475 if sys.version_info >= (3, 0):
476 return b
477
478 if not isinstance(b, bytes):
479 return b
480
481 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 482
f07b74fc
PH
483
484def encodeArgument(s):
485 if not isinstance(s, compat_str):
486 # Legacy code that uses byte strings
487 # Uncomment the following line after fixing all post processors
7af808a5 488 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
489 s = s.decode('ascii')
490 return encodeFilename(s, True)
491
492
aa49acd1
S
493def decodeArgument(b):
494 return decodeFilename(b, True)
495
496
8271226a
PH
497def decodeOption(optval):
498 if optval is None:
499 return optval
500 if isinstance(optval, bytes):
501 optval = optval.decode(preferredencoding())
502
503 assert isinstance(optval, compat_str)
504 return optval
1c256f70 505
5f6a1245 506
4539dd30
PH
507def formatSeconds(secs):
508 if secs > 3600:
509 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
510 elif secs > 60:
511 return '%d:%02d' % (secs // 60, secs % 60)
512 else:
513 return '%d' % secs
514
a0ddb8a2 515
be4a824d
PH
516def make_HTTPS_handler(params, **kwargs):
517 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 518 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 519 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 520 if opts_no_check_certificate:
be5f2c19 521 context.check_hostname = False
0db261ba 522 context.verify_mode = ssl.CERT_NONE
a2366922 523 try:
be4a824d 524 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
525 except TypeError:
526 # Python 2.7.8
527 # (create_default_context present but HTTPSHandler has no context=)
528 pass
529
530 if sys.version_info < (3, 2):
d7932313 531 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 532 else: # Python < 3.4
d7932313 533 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 534 context.verify_mode = (ssl.CERT_NONE
dca08720 535 if opts_no_check_certificate
ea6d901e 536 else ssl.CERT_REQUIRED)
303b479e 537 context.set_default_verify_paths()
be4a824d 538 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 539
732ea2f0 540
08f2a92c
JMF
541def bug_reports_message():
542 if ytdl_is_updateable():
543 update_cmd = 'type youtube-dl -U to update'
544 else:
545 update_cmd = 'see https://yt-dl.org/update on how to update'
546 msg = '; please report this issue on https://yt-dl.org/bug .'
547 msg += ' Make sure you are using the latest version; %s.' % update_cmd
548 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
549 return msg
550
551
1c256f70
PH
552class ExtractorError(Exception):
553 """Error during info extraction."""
5f6a1245 554
d11271dd 555 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
556 """ tb, if given, is the original traceback (so that it can be printed out).
557 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
558 """
559
560 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
561 expected = True
d11271dd
PH
562 if video_id is not None:
563 msg = video_id + ': ' + msg
410f3e73 564 if cause:
28e614de 565 msg += ' (caused by %r)' % cause
9a82b238 566 if not expected:
08f2a92c 567 msg += bug_reports_message()
1c256f70 568 super(ExtractorError, self).__init__(msg)
d5979c5d 569
1c256f70 570 self.traceback = tb
8cc83b8d 571 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 572 self.cause = cause
d11271dd 573 self.video_id = video_id
1c256f70 574
01951dda
PH
575 def format_traceback(self):
576 if self.traceback is None:
577 return None
28e614de 578 return ''.join(traceback.format_tb(self.traceback))
01951dda 579
1c256f70 580
416c7fcb
PH
581class UnsupportedError(ExtractorError):
582 def __init__(self, url):
583 super(UnsupportedError, self).__init__(
584 'Unsupported URL: %s' % url, expected=True)
585 self.url = url
586
587
55b3e45b
JMF
588class RegexNotFoundError(ExtractorError):
589 """Error when a regex didn't match"""
590 pass
591
592
d77c3dfd 593class DownloadError(Exception):
59ae15a5 594 """Download Error exception.
d77c3dfd 595
59ae15a5
PH
596 This exception may be thrown by FileDownloader objects if they are not
597 configured to continue on errors. They will contain the appropriate
598 error message.
599 """
5f6a1245 600
8cc83b8d
FV
601 def __init__(self, msg, exc_info=None):
602 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
603 super(DownloadError, self).__init__(msg)
604 self.exc_info = exc_info
d77c3dfd
FV
605
606
607class SameFileError(Exception):
59ae15a5 608 """Same File exception.
d77c3dfd 609
59ae15a5
PH
610 This exception will be thrown by FileDownloader objects if they detect
611 multiple files would have to be downloaded to the same file on disk.
612 """
613 pass
d77c3dfd
FV
614
615
616class PostProcessingError(Exception):
59ae15a5 617 """Post Processing exception.
d77c3dfd 618
59ae15a5
PH
619 This exception may be raised by PostProcessor's .run() method to
620 indicate an error in the postprocessing task.
621 """
5f6a1245 622
7851b379
PH
623 def __init__(self, msg):
624 self.msg = msg
d77c3dfd 625
5f6a1245 626
d77c3dfd 627class MaxDownloadsReached(Exception):
59ae15a5
PH
628 """ --max-downloads limit has been reached. """
629 pass
d77c3dfd
FV
630
631
632class UnavailableVideoError(Exception):
59ae15a5 633 """Unavailable Format exception.
d77c3dfd 634
59ae15a5
PH
635 This exception will be thrown when a video is requested
636 in a format that is not available for that video.
637 """
638 pass
d77c3dfd
FV
639
640
641class ContentTooShortError(Exception):
59ae15a5 642 """Content Too Short exception.
d77c3dfd 643
59ae15a5
PH
644 This exception may be raised by FileDownloader objects when a file they
645 download is too small for what the server announced first, indicating
646 the connection was probably interrupted.
647 """
d77c3dfd 648
59ae15a5 649 def __init__(self, downloaded, expected):
2c7ed247 650 # Both in bytes
59ae15a5
PH
651 self.downloaded = downloaded
652 self.expected = expected
d77c3dfd 653
5f6a1245 654
c5a59d93 655def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
656 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
657 # expected HTTP responses to meet HTTP/1.0 or later (see also
658 # https://github.com/rg3/youtube-dl/issues/6727)
659 if sys.version_info < (3, 0):
5a1a2e94 660 kwargs[b'strict'] = True
be4a824d
PH
661 hc = http_class(*args, **kwargs)
662 source_address = ydl_handler._params.get('source_address')
663 if source_address is not None:
664 sa = (source_address, 0)
665 if hasattr(hc, 'source_address'): # Python 2.7+
666 hc.source_address = sa
667 else: # Python 2.6
668 def _hc_connect(self, *args, **kwargs):
669 sock = compat_socket_create_connection(
670 (self.host, self.port), self.timeout, sa)
671 if is_https:
d7932313
PH
672 self.sock = ssl.wrap_socket(
673 sock, self.key_file, self.cert_file,
674 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
675 else:
676 self.sock = sock
677 hc.connect = functools.partial(_hc_connect, hc)
678
679 return hc
680
681
87f0e62d 682def handle_youtubedl_headers(headers):
992fc9d6
YCH
683 filtered_headers = headers
684
685 if 'Youtubedl-no-compression' in filtered_headers:
686 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 687 del filtered_headers['Youtubedl-no-compression']
87f0e62d 688
992fc9d6 689 return filtered_headers
87f0e62d
YCH
690
691
acebc9cd 692class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
693 """Handler for HTTP requests and responses.
694
695 This class, when installed with an OpenerDirector, automatically adds
696 the standard headers to every HTTP request and handles gzipped and
697 deflated responses from web servers. If compression is to be avoided in
698 a particular request, the original request in the program code only has
0424ec30 699 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
700 removed before making the real request.
701
702 Part of this code was copied from:
703
704 http://techknack.net/python-urllib2-handlers/
705
706 Andrew Rowls, the author of that code, agreed to release it to the
707 public domain.
708 """
709
be4a824d
PH
710 def __init__(self, params, *args, **kwargs):
711 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
712 self._params = params
713
714 def http_open(self, req):
715 return self.do_open(functools.partial(
c5a59d93 716 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
717 req)
718
59ae15a5
PH
719 @staticmethod
720 def deflate(data):
721 try:
722 return zlib.decompress(data, -zlib.MAX_WBITS)
723 except zlib.error:
724 return zlib.decompress(data)
725
726 @staticmethod
727 def addinfourl_wrapper(stream, headers, url, code):
728 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
729 return compat_urllib_request.addinfourl(stream, headers, url, code)
730 ret = compat_urllib_request.addinfourl(stream, headers, url)
731 ret.code = code
732 return ret
733
acebc9cd 734 def http_request(self, req):
51f267d9
S
735 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
736 # always respected by websites, some tend to give out URLs with non percent-encoded
737 # non-ASCII characters (see telemb.py, ard.py [#3412])
738 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
739 # To work around aforementioned issue we will replace request's original URL with
740 # percent-encoded one
741 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
742 # the code of this workaround has been moved here from YoutubeDL.urlopen()
743 url = req.get_full_url()
744 url_escaped = escape_url(url)
745
746 # Substitute URL if any change after escaping
747 if url != url_escaped:
748 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
749 new_req = req_type(
750 url_escaped, data=req.data, headers=req.headers,
751 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
752 new_req.timeout = req.timeout
753 req = new_req
754
33ac271b 755 for h, v in std_headers.items():
3d5f7a39
JK
756 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
757 # The dict keys are capitalized because of this bug by urllib
758 if h.capitalize() not in req.headers:
33ac271b 759 req.add_header(h, v)
87f0e62d
YCH
760
761 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
762
763 if sys.version_info < (2, 7) and '#' in req.get_full_url():
764 # Python 2.6 is brain-dead when it comes to fragments
765 req._Request__original = req._Request__original.partition('#')[0]
766 req._Request__r_type = req._Request__r_type.partition('#')[0]
767
59ae15a5
PH
768 return req
769
acebc9cd 770 def http_response(self, req, resp):
59ae15a5
PH
771 old_resp = resp
772 # gzip
773 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
774 content = resp.read()
775 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
776 try:
777 uncompressed = io.BytesIO(gz.read())
778 except IOError as original_ioerror:
779 # There may be junk add the end of the file
780 # See http://stackoverflow.com/q/4928560/35070 for details
781 for i in range(1, 1024):
782 try:
783 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
784 uncompressed = io.BytesIO(gz.read())
785 except IOError:
786 continue
787 break
788 else:
789 raise original_ioerror
790 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 791 resp.msg = old_resp.msg
c047270c 792 del resp.headers['Content-encoding']
59ae15a5
PH
793 # deflate
794 if resp.headers.get('Content-encoding', '') == 'deflate':
795 gz = io.BytesIO(self.deflate(resp.read()))
796 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
797 resp.msg = old_resp.msg
c047270c 798 del resp.headers['Content-encoding']
ad729172
S
799 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
800 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
801 if 300 <= resp.code < 400:
802 location = resp.headers.get('Location')
803 if location:
804 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
805 if sys.version_info >= (3, 0):
806 location = location.encode('iso-8859-1').decode('utf-8')
807 location_escaped = escape_url(location)
808 if location != location_escaped:
809 del resp.headers['Location']
810 resp.headers['Location'] = location_escaped
59ae15a5 811 return resp
0f8d03f8 812
acebc9cd
PH
813 https_request = http_request
814 https_response = http_response
bf50b038 815
5de90176 816
be4a824d
PH
817class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
818 def __init__(self, params, https_conn_class=None, *args, **kwargs):
819 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
820 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
821 self._params = params
822
823 def https_open(self, req):
4f264c02
JMF
824 kwargs = {}
825 if hasattr(self, '_context'): # python > 2.6
826 kwargs['context'] = self._context
827 if hasattr(self, '_check_hostname'): # python 3.x
828 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
829 return self.do_open(functools.partial(
830 _create_http_connection, self, self._https_conn_class, True),
4f264c02 831 req, **kwargs)
be4a824d
PH
832
833
a6420bf5
S
834class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
835 def __init__(self, cookiejar=None):
836 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
837
838 def http_response(self, request, response):
839 # Python 2 will choke on next HTTP request in row if there are non-ASCII
840 # characters in Set-Cookie HTTP header of last response (see
841 # https://github.com/rg3/youtube-dl/issues/6769).
842 # In order to at least prevent crashing we will percent encode Set-Cookie
843 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
844 # if sys.version_info < (3, 0) and response.headers:
845 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
846 # set_cookie = response.headers.get(set_cookie_header)
847 # if set_cookie:
848 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
849 # if set_cookie != set_cookie_escaped:
850 # del response.headers[set_cookie_header]
851 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
852 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
853
854 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
855 https_response = http_response
856
857
08b38d54 858def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
859 """ Return a UNIX timestamp from the given date """
860
861 if date_str is None:
862 return None
863
52c3a6e4
S
864 date_str = re.sub(r'\.[0-9]+', '', date_str)
865
08b38d54
PH
866 if timezone is None:
867 m = re.search(
52c3a6e4 868 r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
08b38d54
PH
869 date_str)
870 if not m:
912b38b4
PH
871 timezone = datetime.timedelta()
872 else:
08b38d54
PH
873 date_str = date_str[:-len(m.group(0))]
874 if not m.group('sign'):
875 timezone = datetime.timedelta()
876 else:
877 sign = 1 if m.group('sign') == '+' else -1
878 timezone = datetime.timedelta(
879 hours=sign * int(m.group('hours')),
880 minutes=sign * int(m.group('minutes')))
52c3a6e4
S
881 try:
882 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
883 dt = datetime.datetime.strptime(date_str, date_format) - timezone
884 return calendar.timegm(dt.timetuple())
885 except ValueError:
886 pass
912b38b4
PH
887
888
42bdd9d0 889def unified_strdate(date_str, day_first=True):
bf50b038 890 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
891
892 if date_str is None:
893 return None
bf50b038 894 upload_date = None
5f6a1245 895 # Replace commas
026fcc04 896 date_str = date_str.replace(',', ' ')
bf50b038 897 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
898 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
899 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 900 # Remove AM/PM + timezone
9bb8e0a3 901 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 902
19e1d359
JMF
903 format_expressions = [
904 '%d %B %Y',
0f99566c 905 '%d %b %Y',
19e1d359
JMF
906 '%B %d %Y',
907 '%b %d %Y',
78ff59d0
PP
908 '%b %dst %Y %I:%M%p',
909 '%b %dnd %Y %I:%M%p',
910 '%b %dth %Y %I:%M%p',
a69801e2 911 '%Y %m %d',
19e1d359 912 '%Y-%m-%d',
fe556f1b 913 '%Y/%m/%d',
19e1d359 914 '%Y/%m/%d %H:%M:%S',
5d73273f 915 '%Y-%m-%d %H:%M:%S',
e9be9a6a 916 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 917 '%d.%m.%Y %H:%M',
b047de6f 918 '%d.%m.%Y %H.%M',
19e1d359 919 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
920 '%Y-%m-%dT%H:%M:%S.%fZ',
921 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 922 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 923 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 924 '%Y-%m-%dT%H:%M',
19e1d359 925 ]
42bdd9d0
PH
926 if day_first:
927 format_expressions.extend([
79c21abb 928 '%d-%m-%Y',
776dc399
S
929 '%d.%m.%Y',
930 '%d/%m/%Y',
931 '%d/%m/%y',
42bdd9d0
PH
932 '%d/%m/%Y %H:%M:%S',
933 ])
934 else:
935 format_expressions.extend([
79c21abb 936 '%m-%d-%Y',
776dc399
S
937 '%m.%d.%Y',
938 '%m/%d/%Y',
939 '%m/%d/%y',
42bdd9d0
PH
940 '%m/%d/%Y %H:%M:%S',
941 ])
bf50b038
JMF
942 for expression in format_expressions:
943 try:
944 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 945 except ValueError:
bf50b038 946 pass
42393ce2
PH
947 if upload_date is None:
948 timetuple = email.utils.parsedate_tz(date_str)
949 if timetuple:
950 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
6a750402
JMF
951 if upload_date is not None:
952 return compat_str(upload_date)
bf50b038 953
5f6a1245 954
28e614de 955def determine_ext(url, default_ext='unknown_video'):
f4776371
S
956 if url is None:
957 return default_ext
9cb9a5df 958 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
959 if re.match(r'^[A-Za-z0-9]+$', guess):
960 return guess
a7aaa398
S
961 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
962 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 963 return guess.rstrip('/')
73e79f2a 964 else:
cbdbb766 965 return default_ext
73e79f2a 966
5f6a1245 967
d4051a8e 968def subtitles_filename(filename, sub_lang, sub_format):
28e614de 969 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 970
5f6a1245 971
bd558525 972def date_from_str(date_str):
37254abc
JMF
973 """
974 Return a datetime object from a string in the format YYYYMMDD or
975 (now|today)[+-][0-9](day|week|month|year)(s)?"""
976 today = datetime.date.today()
f8795e10 977 if date_str in ('now', 'today'):
37254abc 978 return today
f8795e10
PH
979 if date_str == 'yesterday':
980 return today - datetime.timedelta(days=1)
37254abc
JMF
981 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
982 if match is not None:
983 sign = match.group('sign')
984 time = int(match.group('time'))
985 if sign == '-':
986 time = -time
987 unit = match.group('unit')
dfb1b146 988 # A bad approximation?
37254abc
JMF
989 if unit == 'month':
990 unit = 'day'
991 time *= 30
992 elif unit == 'year':
993 unit = 'day'
994 time *= 365
995 unit += 's'
996 delta = datetime.timedelta(**{unit: time})
997 return today + delta
611c1dd9 998 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
999
1000
e63fc1be 1001def hyphenate_date(date_str):
1002 """
1003 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1004 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1005 if match is not None:
1006 return '-'.join(match.groups())
1007 else:
1008 return date_str
1009
5f6a1245 1010
bd558525
JMF
1011class DateRange(object):
1012 """Represents a time interval between two dates"""
5f6a1245 1013
bd558525
JMF
1014 def __init__(self, start=None, end=None):
1015 """start and end must be strings in the format accepted by date"""
1016 if start is not None:
1017 self.start = date_from_str(start)
1018 else:
1019 self.start = datetime.datetime.min.date()
1020 if end is not None:
1021 self.end = date_from_str(end)
1022 else:
1023 self.end = datetime.datetime.max.date()
37254abc 1024 if self.start > self.end:
bd558525 1025 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 1026
bd558525
JMF
1027 @classmethod
1028 def day(cls, day):
1029 """Returns a range that only contains the given day"""
5f6a1245
JW
1030 return cls(day, day)
1031
bd558525
JMF
1032 def __contains__(self, date):
1033 """Check if the date is in the range"""
37254abc
JMF
1034 if not isinstance(date, datetime.date):
1035 date = date_from_str(date)
1036 return self.start <= date <= self.end
5f6a1245 1037
bd558525 1038 def __str__(self):
5f6a1245 1039 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
1040
1041
1042def platform_name():
1043 """ Returns the platform name as a compat_str """
1044 res = platform.platform()
1045 if isinstance(res, bytes):
1046 res = res.decode(preferredencoding())
1047
1048 assert isinstance(res, compat_str)
1049 return res
c257baff
PH
1050
1051
b58ddb32
PH
1052def _windows_write_string(s, out):
1053 """ Returns True if the string was written using special methods,
1054 False if it has yet to be written out."""
1055 # Adapted from http://stackoverflow.com/a/3259271/35070
1056
1057 import ctypes
1058 import ctypes.wintypes
1059
1060 WIN_OUTPUT_IDS = {
1061 1: -11,
1062 2: -12,
1063 }
1064
a383a98a
PH
1065 try:
1066 fileno = out.fileno()
1067 except AttributeError:
1068 # If the output stream doesn't have a fileno, it's virtual
1069 return False
aa42e873
PH
1070 except io.UnsupportedOperation:
1071 # Some strange Windows pseudo files?
1072 return False
b58ddb32
PH
1073 if fileno not in WIN_OUTPUT_IDS:
1074 return False
1075
e2f89ec7 1076 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1077 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
611c1dd9 1078 (b'GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
1079 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1080
e2f89ec7 1081 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1082 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1083 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
611c1dd9 1084 ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
1085 written = ctypes.wintypes.DWORD(0)
1086
611c1dd9 1087 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
1088 FILE_TYPE_CHAR = 0x0002
1089 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1090 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1091 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1092 ctypes.POINTER(ctypes.wintypes.DWORD))(
611c1dd9 1093 (b'GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
1094 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1095
1096 def not_a_console(handle):
1097 if handle == INVALID_HANDLE_VALUE or handle is None:
1098 return True
8fb3ac36
PH
1099 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1100 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1101
1102 if not_a_console(h):
1103 return False
1104
d1b9c912
PH
1105 def next_nonbmp_pos(s):
1106 try:
1107 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1108 except StopIteration:
1109 return len(s)
1110
1111 while s:
1112 count = min(next_nonbmp_pos(s), 1024)
1113
b58ddb32 1114 ret = WriteConsoleW(
d1b9c912 1115 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1116 if ret == 0:
1117 raise OSError('Failed to write string')
d1b9c912
PH
1118 if not count: # We just wrote a non-BMP character
1119 assert written.value == 2
1120 s = s[1:]
1121 else:
1122 assert written.value > 0
1123 s = s[written.value:]
b58ddb32
PH
1124 return True
1125
1126
734f90bb 1127def write_string(s, out=None, encoding=None):
7459e3a2
PH
1128 if out is None:
1129 out = sys.stderr
8bf48f23 1130 assert type(s) == compat_str
7459e3a2 1131
b58ddb32
PH
1132 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1133 if _windows_write_string(s, out):
1134 return
1135
7459e3a2
PH
1136 if ('b' in getattr(out, 'mode', '') or
1137 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1138 byt = s.encode(encoding or preferredencoding(), 'ignore')
1139 out.write(byt)
1140 elif hasattr(out, 'buffer'):
1141 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1142 byt = s.encode(enc, 'ignore')
1143 out.buffer.write(byt)
1144 else:
8bf48f23 1145 out.write(s)
7459e3a2
PH
1146 out.flush()
1147
1148
48ea9cea
PH
1149def bytes_to_intlist(bs):
1150 if not bs:
1151 return []
1152 if isinstance(bs[0], int): # Python 3
1153 return list(bs)
1154 else:
1155 return [ord(c) for c in bs]
1156
c257baff 1157
cba892fa 1158def intlist_to_bytes(xs):
1159 if not xs:
1160 return b''
eb4157fd 1161 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1162
1163
c1c9a79c
PH
1164# Cross-platform file locking
1165if sys.platform == 'win32':
1166 import ctypes.wintypes
1167 import msvcrt
1168
1169 class OVERLAPPED(ctypes.Structure):
1170 _fields_ = [
1171 ('Internal', ctypes.wintypes.LPVOID),
1172 ('InternalHigh', ctypes.wintypes.LPVOID),
1173 ('Offset', ctypes.wintypes.DWORD),
1174 ('OffsetHigh', ctypes.wintypes.DWORD),
1175 ('hEvent', ctypes.wintypes.HANDLE),
1176 ]
1177
1178 kernel32 = ctypes.windll.kernel32
1179 LockFileEx = kernel32.LockFileEx
1180 LockFileEx.argtypes = [
1181 ctypes.wintypes.HANDLE, # hFile
1182 ctypes.wintypes.DWORD, # dwFlags
1183 ctypes.wintypes.DWORD, # dwReserved
1184 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1185 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1186 ctypes.POINTER(OVERLAPPED) # Overlapped
1187 ]
1188 LockFileEx.restype = ctypes.wintypes.BOOL
1189 UnlockFileEx = kernel32.UnlockFileEx
1190 UnlockFileEx.argtypes = [
1191 ctypes.wintypes.HANDLE, # hFile
1192 ctypes.wintypes.DWORD, # dwReserved
1193 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1194 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1195 ctypes.POINTER(OVERLAPPED) # Overlapped
1196 ]
1197 UnlockFileEx.restype = ctypes.wintypes.BOOL
1198 whole_low = 0xffffffff
1199 whole_high = 0x7fffffff
1200
1201 def _lock_file(f, exclusive):
1202 overlapped = OVERLAPPED()
1203 overlapped.Offset = 0
1204 overlapped.OffsetHigh = 0
1205 overlapped.hEvent = 0
1206 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1207 handle = msvcrt.get_osfhandle(f.fileno())
1208 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1209 whole_low, whole_high, f._lock_file_overlapped_p):
1210 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1211
1212 def _unlock_file(f):
1213 assert f._lock_file_overlapped_p
1214 handle = msvcrt.get_osfhandle(f.fileno())
1215 if not UnlockFileEx(handle, 0,
1216 whole_low, whole_high, f._lock_file_overlapped_p):
1217 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1218
1219else:
399a76e6
YCH
1220 # Some platforms, such as Jython, is missing fcntl
1221 try:
1222 import fcntl
c1c9a79c 1223
399a76e6
YCH
1224 def _lock_file(f, exclusive):
1225 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 1226
399a76e6
YCH
1227 def _unlock_file(f):
1228 fcntl.flock(f, fcntl.LOCK_UN)
1229 except ImportError:
1230 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
1231
1232 def _lock_file(f, exclusive):
1233 raise IOError(UNSUPPORTED_MSG)
1234
1235 def _unlock_file(f):
1236 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
1237
1238
1239class locked_file(object):
1240 def __init__(self, filename, mode, encoding=None):
1241 assert mode in ['r', 'a', 'w']
1242 self.f = io.open(filename, mode, encoding=encoding)
1243 self.mode = mode
1244
1245 def __enter__(self):
1246 exclusive = self.mode != 'r'
1247 try:
1248 _lock_file(self.f, exclusive)
1249 except IOError:
1250 self.f.close()
1251 raise
1252 return self
1253
1254 def __exit__(self, etype, value, traceback):
1255 try:
1256 _unlock_file(self.f)
1257 finally:
1258 self.f.close()
1259
1260 def __iter__(self):
1261 return iter(self.f)
1262
1263 def write(self, *args):
1264 return self.f.write(*args)
1265
1266 def read(self, *args):
1267 return self.f.read(*args)
4eb7f1d1
JMF
1268
1269
4644ac55
S
1270def get_filesystem_encoding():
1271 encoding = sys.getfilesystemencoding()
1272 return encoding if encoding is not None else 'utf-8'
1273
1274
4eb7f1d1 1275def shell_quote(args):
a6a173c2 1276 quoted_args = []
4644ac55 1277 encoding = get_filesystem_encoding()
a6a173c2
JMF
1278 for a in args:
1279 if isinstance(a, bytes):
1280 # We may get a filename encoded with 'encodeFilename'
1281 a = a.decode(encoding)
1282 quoted_args.append(pipes.quote(a))
28e614de 1283 return ' '.join(quoted_args)
9d4660ca
PH
1284
1285
1286def smuggle_url(url, data):
1287 """ Pass additional data in a URL for internal use. """
1288
1289 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1290 {'__youtubedl_smuggle': json.dumps(data)})
1291 return url + '#' + sdata
9d4660ca
PH
1292
1293
79f82953 1294def unsmuggle_url(smug_url, default=None):
83e865a3 1295 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1296 return smug_url, default
28e614de
PH
1297 url, _, sdata = smug_url.rpartition('#')
1298 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1299 data = json.loads(jsond)
1300 return url, data
02dbf93f
PH
1301
1302
02dbf93f
PH
1303def format_bytes(bytes):
1304 if bytes is None:
28e614de 1305 return 'N/A'
02dbf93f
PH
1306 if type(bytes) is str:
1307 bytes = float(bytes)
1308 if bytes == 0.0:
1309 exponent = 0
1310 else:
1311 exponent = int(math.log(bytes, 1024.0))
28e614de 1312 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1313 converted = float(bytes) / float(1024 ** exponent)
28e614de 1314 return '%.2f%s' % (converted, suffix)
f53c966a 1315
1c088fa8 1316
be64b5b0
PH
1317def parse_filesize(s):
1318 if s is None:
1319 return None
1320
dfb1b146 1321 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
1322 # but we support those too
1323 _UNIT_TABLE = {
1324 'B': 1,
1325 'b': 1,
1326 'KiB': 1024,
1327 'KB': 1000,
1328 'kB': 1024,
1329 'Kb': 1000,
1330 'MiB': 1024 ** 2,
1331 'MB': 1000 ** 2,
1332 'mB': 1024 ** 2,
1333 'Mb': 1000 ** 2,
1334 'GiB': 1024 ** 3,
1335 'GB': 1000 ** 3,
1336 'gB': 1024 ** 3,
1337 'Gb': 1000 ** 3,
1338 'TiB': 1024 ** 4,
1339 'TB': 1000 ** 4,
1340 'tB': 1024 ** 4,
1341 'Tb': 1000 ** 4,
1342 'PiB': 1024 ** 5,
1343 'PB': 1000 ** 5,
1344 'pB': 1024 ** 5,
1345 'Pb': 1000 ** 5,
1346 'EiB': 1024 ** 6,
1347 'EB': 1000 ** 6,
1348 'eB': 1024 ** 6,
1349 'Eb': 1000 ** 6,
1350 'ZiB': 1024 ** 7,
1351 'ZB': 1000 ** 7,
1352 'zB': 1024 ** 7,
1353 'Zb': 1000 ** 7,
1354 'YiB': 1024 ** 8,
1355 'YB': 1000 ** 8,
1356 'yB': 1024 ** 8,
1357 'Yb': 1000 ** 8,
1358 }
1359
1360 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1361 m = re.match(
1362 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1363 if not m:
1364 return None
1365
4349c07d
PH
1366 num_str = m.group('num').replace(',', '.')
1367 mult = _UNIT_TABLE[m.group('unit')]
1368 return int(float(num_str) * mult)
be64b5b0
PH
1369
1370
caefb1de
PH
1371def month_by_name(name):
1372 """ Return the number of a month by (locale-independently) English name """
1373
caefb1de 1374 try:
7105440c
YCH
1375 return ENGLISH_MONTH_NAMES.index(name) + 1
1376 except ValueError:
1377 return None
1378
1379
1380def month_by_abbreviation(abbrev):
1381 """ Return the number of a month by (locale-independently) English
1382 abbreviations """
1383
1384 try:
1385 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1386 except ValueError:
1387 return None
18258362
JMF
1388
1389
5aafe895 1390def fix_xml_ampersands(xml_str):
18258362 1391 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1392 return re.sub(
1393 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1394 '&amp;',
5aafe895 1395 xml_str)
e3946f98
PH
1396
1397
1398def setproctitle(title):
8bf48f23 1399 assert isinstance(title, compat_str)
e3946f98 1400 try:
611c1dd9 1401 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
1402 except OSError:
1403 return
6eefe533
PH
1404 title_bytes = title.encode('utf-8')
1405 buf = ctypes.create_string_buffer(len(title_bytes))
1406 buf.value = title_bytes
e3946f98 1407 try:
6eefe533 1408 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1409 except AttributeError:
1410 return # Strange libc, just skip this
d7dda168
PH
1411
1412
1413def remove_start(s, start):
1414 if s.startswith(start):
1415 return s[len(start):]
1416 return s
29eb5174
PH
1417
1418
2b9faf55
PH
1419def remove_end(s, end):
1420 if s.endswith(end):
1421 return s[:-len(end)]
1422 return s
1423
1424
31b2051e
S
1425def remove_quotes(s):
1426 if s is None or len(s) < 2:
1427 return s
1428 for quote in ('"', "'", ):
1429 if s[0] == quote and s[-1] == quote:
1430 return s[1:-1]
1431 return s
1432
1433
29eb5174 1434def url_basename(url):
9b8aaeed 1435 path = compat_urlparse.urlparse(url).path
28e614de 1436 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1437
1438
1439class HEADRequest(compat_urllib_request.Request):
1440 def get_method(self):
611c1dd9 1441 return 'HEAD'
7217e148
PH
1442
1443
9732d77e 1444def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1445 if get_attr:
1446 if v is not None:
1447 v = getattr(v, get_attr, None)
9572013d
PH
1448 if v == '':
1449 v = None
1812afb7
S
1450 if v is None:
1451 return default
1452 try:
1453 return int(v) * invscale // scale
1454 except ValueError:
af98f8ff 1455 return default
9732d77e 1456
9572013d 1457
40a90862
JMF
1458def str_or_none(v, default=None):
1459 return default if v is None else compat_str(v)
1460
9732d77e
PH
1461
1462def str_to_int(int_str):
48d4681e 1463 """ A more relaxed version of int_or_none """
9732d77e
PH
1464 if int_str is None:
1465 return None
28e614de 1466 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1467 return int(int_str)
608d11f5
PH
1468
1469
9732d77e 1470def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1471 if v is None:
1472 return default
1473 try:
1474 return float(v) * invscale / scale
1475 except ValueError:
1476 return default
43f775e4
PH
1477
1478
608d11f5 1479def parse_duration(s):
8f9312c3 1480 if not isinstance(s, compat_basestring):
608d11f5
PH
1481 return None
1482
ca7b3246
S
1483 s = s.strip()
1484
608d11f5 1485 m = re.match(
9d22a7df 1486 r'''(?ix)(?:P?T)?
e8df5cee 1487 (?:
9c29bc69 1488 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1489 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1490
9c29bc69 1491 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1492 (?:
8f4b58d7
PH
1493 (?:
1494 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1495 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1496 )?
6a68bb57
PH
1497 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1498 )?
e8df5cee
PH
1499 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1500 )$''', s)
608d11f5
PH
1501 if not m:
1502 return None
e8df5cee
PH
1503 res = 0
1504 if m.group('only_mins'):
1505 return float_or_none(m.group('only_mins'), invscale=60)
1506 if m.group('only_hours'):
1507 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1508 if m.group('secs'):
1509 res += int(m.group('secs'))
3e675fab
PH
1510 if m.group('mins_reversed'):
1511 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1512 if m.group('mins'):
1513 res += int(m.group('mins')) * 60
e8df5cee
PH
1514 if m.group('hours'):
1515 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1516 if m.group('hours_reversed'):
1517 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1518 if m.group('days'):
1519 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1520 if m.group('ms'):
1521 res += float(m.group('ms'))
608d11f5 1522 return res
91d7d0b3
JMF
1523
1524
e65e4c88 1525def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1526 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1527 return (
1528 '{0}.{1}{2}'.format(name, ext, real_ext)
1529 if not expected_real_ext or real_ext[1:] == expected_real_ext
1530 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1531
1532
b3ed15b7
S
1533def replace_extension(filename, ext, expected_real_ext=None):
1534 name, real_ext = os.path.splitext(filename)
1535 return '{0}.{1}'.format(
1536 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1537 ext)
1538
1539
d70ad093
PH
1540def check_executable(exe, args=[]):
1541 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1542 args can be a list of arguments for a short output (like -version) """
1543 try:
1544 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1545 except OSError:
1546 return False
1547 return exe
b7ab0590
PH
1548
1549
95807118 1550def get_exe_version(exe, args=['--version'],
cae97f65 1551 version_re=None, unrecognized='present'):
95807118
PH
1552 """ Returns the version of the specified executable,
1553 or False if the executable is not present """
1554 try:
cae97f65 1555 out, _ = subprocess.Popen(
54116803 1556 [encodeArgument(exe)] + args,
95807118
PH
1557 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1558 except OSError:
1559 return False
cae97f65
PH
1560 if isinstance(out, bytes): # Python 2.x
1561 out = out.decode('ascii', 'ignore')
1562 return detect_exe_version(out, version_re, unrecognized)
1563
1564
1565def detect_exe_version(output, version_re=None, unrecognized='present'):
1566 assert isinstance(output, compat_str)
1567 if version_re is None:
1568 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1569 m = re.search(version_re, output)
95807118
PH
1570 if m:
1571 return m.group(1)
1572 else:
1573 return unrecognized
1574
1575
b7ab0590 1576class PagedList(object):
dd26ced1
PH
1577 def __len__(self):
1578 # This is only useful for tests
1579 return len(self.getslice())
1580
9c44d242
PH
1581
1582class OnDemandPagedList(PagedList):
1583 def __init__(self, pagefunc, pagesize):
1584 self._pagefunc = pagefunc
1585 self._pagesize = pagesize
1586
b7ab0590
PH
1587 def getslice(self, start=0, end=None):
1588 res = []
1589 for pagenum in itertools.count(start // self._pagesize):
1590 firstid = pagenum * self._pagesize
1591 nextfirstid = pagenum * self._pagesize + self._pagesize
1592 if start >= nextfirstid:
1593 continue
1594
1595 page_results = list(self._pagefunc(pagenum))
1596
1597 startv = (
1598 start % self._pagesize
1599 if firstid <= start < nextfirstid
1600 else 0)
1601
1602 endv = (
1603 ((end - 1) % self._pagesize) + 1
1604 if (end is not None and firstid <= end <= nextfirstid)
1605 else None)
1606
1607 if startv != 0 or endv is not None:
1608 page_results = page_results[startv:endv]
1609 res.extend(page_results)
1610
1611 # A little optimization - if current page is not "full", ie. does
1612 # not contain page_size videos then we can assume that this page
1613 # is the last one - there are no more ids on further pages -
1614 # i.e. no need to query again.
1615 if len(page_results) + startv < self._pagesize:
1616 break
1617
1618 # If we got the whole page, but the next page is not interesting,
1619 # break out early as well
1620 if end == nextfirstid:
1621 break
1622 return res
81c2f20b
PH
1623
1624
9c44d242
PH
1625class InAdvancePagedList(PagedList):
1626 def __init__(self, pagefunc, pagecount, pagesize):
1627 self._pagefunc = pagefunc
1628 self._pagecount = pagecount
1629 self._pagesize = pagesize
1630
1631 def getslice(self, start=0, end=None):
1632 res = []
1633 start_page = start // self._pagesize
1634 end_page = (
1635 self._pagecount if end is None else (end // self._pagesize + 1))
1636 skip_elems = start - start_page * self._pagesize
1637 only_more = None if end is None else end - start
1638 for pagenum in range(start_page, end_page):
1639 page = list(self._pagefunc(pagenum))
1640 if skip_elems:
1641 page = page[skip_elems:]
1642 skip_elems = None
1643 if only_more is not None:
1644 if len(page) < only_more:
1645 only_more -= len(page)
1646 else:
1647 page = page[:only_more]
1648 res.extend(page)
1649 break
1650 res.extend(page)
1651 return res
1652
1653
81c2f20b 1654def uppercase_escape(s):
676eb3f2 1655 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1656 return re.sub(
a612753d 1657 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1658 lambda m: unicode_escape(m.group(0))[0],
1659 s)
0fe2ff78
YCH
1660
1661
1662def lowercase_escape(s):
1663 unicode_escape = codecs.getdecoder('unicode_escape')
1664 return re.sub(
1665 r'\\u[0-9a-fA-F]{4}',
1666 lambda m: unicode_escape(m.group(0))[0],
1667 s)
b53466e1 1668
d05cfe06
S
1669
1670def escape_rfc3986(s):
1671 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1672 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1673 s = s.encode('utf-8')
ecc0c5ee 1674 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1675
1676
1677def escape_url(url):
1678 """Escape URL as suggested by RFC 3986"""
1679 url_parsed = compat_urllib_parse_urlparse(url)
1680 return url_parsed._replace(
1681 path=escape_rfc3986(url_parsed.path),
1682 params=escape_rfc3986(url_parsed.params),
1683 query=escape_rfc3986(url_parsed.query),
1684 fragment=escape_rfc3986(url_parsed.fragment)
1685 ).geturl()
1686
b53466e1 1687try:
28e614de 1688 struct.pack('!I', 0)
b53466e1
PH
1689except TypeError:
1690 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1691 def struct_pack(spec, *args):
1692 if isinstance(spec, compat_str):
1693 spec = spec.encode('ascii')
1694 return struct.pack(spec, *args)
1695
1696 def struct_unpack(spec, *args):
1697 if isinstance(spec, compat_str):
1698 spec = spec.encode('ascii')
1699 return struct.unpack(spec, *args)
1700else:
1701 struct_pack = struct.pack
1702 struct_unpack = struct.unpack
62e609ab
PH
1703
1704
1705def read_batch_urls(batch_fd):
1706 def fixup(url):
1707 if not isinstance(url, compat_str):
1708 url = url.decode('utf-8', 'replace')
28e614de 1709 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1710 if url.startswith(BOM_UTF8):
1711 url = url[len(BOM_UTF8):]
1712 url = url.strip()
1713 if url.startswith(('#', ';', ']')):
1714 return False
1715 return url
1716
1717 with contextlib.closing(batch_fd) as fd:
1718 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1719
1720
1721def urlencode_postdata(*args, **kargs):
1722 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1723
1724
16392824 1725def encode_dict(d, encoding='utf-8'):
7e1f5447
S
1726 def encode(v):
1727 return v.encode(encoding) if isinstance(v, compat_basestring) else v
1728 return dict((encode(k), encode(v)) for k, v in d.items())
16392824 1729
8e60dc75 1730
86296ad2 1731def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
1732 if isinstance(key_or_keys, (list, tuple)):
1733 for key in key_or_keys:
86296ad2
S
1734 if key not in d or d[key] is None or skip_false_values and not d[key]:
1735 continue
1736 return d[key]
cbecc9b9
S
1737 return default
1738 return d.get(key_or_keys, default)
1739
1740
8e60dc75
S
1741def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
1742 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
1743
16392824 1744
a1a530b0
PH
1745US_RATINGS = {
1746 'G': 0,
1747 'PG': 10,
1748 'PG-13': 13,
1749 'R': 16,
1750 'NC': 18,
1751}
fac55558
PH
1752
1753
146c80e2
S
1754def parse_age_limit(s):
1755 if s is None:
d838b1bd 1756 return None
146c80e2 1757 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d800609c 1758 return int(m.group('age')) if m else US_RATINGS.get(s)
146c80e2
S
1759
1760
fac55558 1761def strip_jsonp(code):
609a61e3 1762 return re.sub(
8411229b 1763 r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1764
1765
e05f6939
PH
1766def js_to_json(code):
1767 def fix_kv(m):
e7b6d122
PH
1768 v = m.group(0)
1769 if v in ('true', 'false', 'null'):
1770 return v
1771 if v.startswith('"'):
d01949dc
S
1772 v = re.sub(r"\\'", "'", v[1:-1])
1773 elif v.startswith("'"):
e7b6d122
PH
1774 v = v[1:-1]
1775 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1776 '\\\\': '\\\\',
1777 "\\'": "'",
1778 '"': '\\"',
1779 }[m.group(0)], v)
1780 return '"%s"' % v
e05f6939
PH
1781
1782 res = re.sub(r'''(?x)
d305dd73
PH
1783 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1784 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1785 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1786 ''', fix_kv, code)
ba9e68f4 1787 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1788 return res
1789
1790
478c2c61
PH
1791def qualities(quality_ids):
1792 """ Get a numeric quality value out of a list of possible values """
1793 def q(qid):
1794 try:
1795 return quality_ids.index(qid)
1796 except ValueError:
1797 return -1
1798 return q
1799
acd69589
PH
1800
1801DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1802
a020a0dc
PH
1803
1804def limit_length(s, length):
1805 """ Add ellipses to overly long strings """
1806 if s is None:
1807 return None
1808 ELLIPSES = '...'
1809 if len(s) > length:
1810 return s[:length - len(ELLIPSES)] + ELLIPSES
1811 return s
48844745
PH
1812
1813
1814def version_tuple(v):
5f9b8394 1815 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1816
1817
1818def is_outdated_version(version, limit, assume_new=True):
1819 if not version:
1820 return not assume_new
1821 try:
1822 return version_tuple(version) < version_tuple(limit)
1823 except ValueError:
1824 return not assume_new
732ea2f0
PH
1825
1826
1827def ytdl_is_updateable():
1828 """ Returns if youtube-dl can be updated with -U """
1829 from zipimport import zipimporter
1830
1831 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1832
1833
1834def args_to_str(args):
1835 # Get a short string representation for a subprocess command
1836 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1837
1838
9b9c5355 1839def error_to_compat_str(err):
fdae2358
S
1840 err_str = str(err)
1841 # On python 2 error byte string must be decoded with proper
1842 # encoding rather than ascii
1843 if sys.version_info[0] < 3:
1844 err_str = err_str.decode(preferredencoding())
1845 return err_str
1846
1847
c460bdd5 1848def mimetype2ext(mt):
765ac263
JMF
1849 ext = {
1850 'audio/mp4': 'm4a',
1851 }.get(mt)
1852 if ext is not None:
1853 return ext
1854
c460bdd5
PH
1855 _, _, res = mt.rpartition('/')
1856
1857 return {
f6861ec9 1858 '3gpp': '3gp',
a0d8d704 1859 'ttml+xml': 'ttml',
f6861ec9 1860 'x-flv': 'flv',
a0d8d704
YCH
1861 'x-mp4-fragmented': 'mp4',
1862 'x-ms-wmv': 'wmv',
c460bdd5
PH
1863 }.get(res, res)
1864
1865
2ccd1b10
PH
1866def urlhandle_detect_ext(url_handle):
1867 try:
1868 url_handle.headers
1869 getheader = lambda h: url_handle.headers[h]
1870 except AttributeError: # Python < 3
1871 getheader = url_handle.info().getheader
1872
b55ee18f
PH
1873 cd = getheader('Content-Disposition')
1874 if cd:
1875 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1876 if m:
1877 e = determine_ext(m.group('filename'), default_ext=None)
1878 if e:
1879 return e
1880
c460bdd5 1881 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1882
1883
1e399778
YCH
1884def encode_data_uri(data, mime_type):
1885 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1886
1887
05900629 1888def age_restricted(content_limit, age_limit):
6ec6cb4e 1889 """ Returns True iff the content should be blocked """
05900629
PH
1890
1891 if age_limit is None: # No limit set
1892 return False
1893 if content_limit is None:
1894 return False # Content available for everyone
1895 return age_limit < content_limit
61ca9a80
PH
1896
1897
1898def is_html(first_bytes):
1899 """ Detect whether a file contains HTML by examining its first bytes. """
1900
1901 BOMS = [
1902 (b'\xef\xbb\xbf', 'utf-8'),
1903 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1904 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1905 (b'\xff\xfe', 'utf-16-le'),
1906 (b'\xfe\xff', 'utf-16-be'),
1907 ]
1908 for bom, enc in BOMS:
1909 if first_bytes.startswith(bom):
1910 s = first_bytes[len(bom):].decode(enc, 'replace')
1911 break
1912 else:
1913 s = first_bytes.decode('utf-8', 'replace')
1914
1915 return re.match(r'^\s*<', s)
a055469f
PH
1916
1917
1918def determine_protocol(info_dict):
1919 protocol = info_dict.get('protocol')
1920 if protocol is not None:
1921 return protocol
1922
1923 url = info_dict['url']
1924 if url.startswith('rtmp'):
1925 return 'rtmp'
1926 elif url.startswith('mms'):
1927 return 'mms'
1928 elif url.startswith('rtsp'):
1929 return 'rtsp'
1930
1931 ext = determine_ext(url)
1932 if ext == 'm3u8':
1933 return 'm3u8'
1934 elif ext == 'f4m':
1935 return 'f4m'
1936
1937 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1938
1939
1940def render_table(header_row, data):
1941 """ Render a list of rows, each as a list of values """
1942 table = [header_row] + data
1943 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1944 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1945 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1946
1947
1948def _match_one(filter_part, dct):
1949 COMPARISON_OPERATORS = {
1950 '<': operator.lt,
1951 '<=': operator.le,
1952 '>': operator.gt,
1953 '>=': operator.ge,
1954 '=': operator.eq,
1955 '!=': operator.ne,
1956 }
1957 operator_rex = re.compile(r'''(?x)\s*
1958 (?P<key>[a-z_]+)
1959 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1960 (?:
1961 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1962 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1963 )
1964 \s*$
1965 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1966 m = operator_rex.search(filter_part)
1967 if m:
1968 op = COMPARISON_OPERATORS[m.group('op')]
1969 if m.group('strval') is not None:
1970 if m.group('op') not in ('=', '!='):
1971 raise ValueError(
1972 'Operator %s does not support string values!' % m.group('op'))
1973 comparison_value = m.group('strval')
1974 else:
1975 try:
1976 comparison_value = int(m.group('intval'))
1977 except ValueError:
1978 comparison_value = parse_filesize(m.group('intval'))
1979 if comparison_value is None:
1980 comparison_value = parse_filesize(m.group('intval') + 'B')
1981 if comparison_value is None:
1982 raise ValueError(
1983 'Invalid integer value %r in filter part %r' % (
1984 m.group('intval'), filter_part))
1985 actual_value = dct.get(m.group('key'))
1986 if actual_value is None:
1987 return m.group('none_inclusive')
1988 return op(actual_value, comparison_value)
1989
1990 UNARY_OPERATORS = {
1991 '': lambda v: v is not None,
1992 '!': lambda v: v is None,
1993 }
1994 operator_rex = re.compile(r'''(?x)\s*
1995 (?P<op>%s)\s*(?P<key>[a-z_]+)
1996 \s*$
1997 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
1998 m = operator_rex.search(filter_part)
1999 if m:
2000 op = UNARY_OPERATORS[m.group('op')]
2001 actual_value = dct.get(m.group('key'))
2002 return op(actual_value)
2003
2004 raise ValueError('Invalid filter part %r' % filter_part)
2005
2006
2007def match_str(filter_str, dct):
2008 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
2009
2010 return all(
2011 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
2012
2013
2014def match_filter_func(filter_str):
2015 def _match_func(info_dict):
2016 if match_str(filter_str, info_dict):
2017 return None
2018 else:
2019 video_title = info_dict.get('title', info_dict.get('id', 'video'))
2020 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
2021 return _match_func
91410c9b
PH
2022
2023
bf6427d2
YCH
2024def parse_dfxp_time_expr(time_expr):
2025 if not time_expr:
d631d5f9 2026 return
bf6427d2
YCH
2027
2028 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
2029 if mobj:
2030 return float(mobj.group('time_offset'))
2031
db2fe38b 2032 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 2033 if mobj:
db2fe38b 2034 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
2035
2036
c1c924ab
YCH
2037def srt_subtitles_timecode(seconds):
2038 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
2039
2040
2041def dfxp2srt(dfxp_data):
4e335771
YCH
2042 _x = functools.partial(xpath_with_ns, ns_map={
2043 'ttml': 'http://www.w3.org/ns/ttml',
2044 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
2045 })
bf6427d2 2046
87de7069 2047 class TTMLPElementParser(object):
2b14cb56 2048 out = ''
bf6427d2 2049
2b14cb56 2050 def start(self, tag, attrib):
2051 if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
2052 self.out += '\n'
bf6427d2 2053
2b14cb56 2054 def end(self, tag):
2055 pass
bf6427d2 2056
2b14cb56 2057 def data(self, data):
2058 self.out += data
2059
2060 def close(self):
2061 return self.out.strip()
2062
2063 def parse_node(node):
2064 target = TTMLPElementParser()
2065 parser = xml.etree.ElementTree.XMLParser(target=target)
2066 parser.feed(xml.etree.ElementTree.tostring(node))
2067 return parser.close()
bf6427d2 2068
36e6f62c 2069 dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
bf6427d2 2070 out = []
4e335771 2071 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
2072
2073 if not paras:
2074 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
2075
2076 for para, index in zip(paras, itertools.count(1)):
d631d5f9 2077 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 2078 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
2079 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
2080 if begin_time is None:
2081 continue
7dff0363 2082 if not end_time:
d631d5f9
YCH
2083 if not dur:
2084 continue
2085 end_time = begin_time + dur
bf6427d2
YCH
2086 out.append('%d\n%s --> %s\n%s\n\n' % (
2087 index,
c1c924ab
YCH
2088 srt_subtitles_timecode(begin_time),
2089 srt_subtitles_timecode(end_time),
bf6427d2
YCH
2090 parse_node(para)))
2091
2092 return ''.join(out)
2093
2094
66e289ba
S
2095def cli_option(params, command_option, param):
2096 param = params.get(param)
2097 return [command_option, param] if param is not None else []
2098
2099
2100def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2101 param = params.get(param)
2102 assert isinstance(param, bool)
2103 if separator:
2104 return [command_option + separator + (true_value if param else false_value)]
2105 return [command_option, true_value if param else false_value]
2106
2107
2108def cli_valueless_option(params, command_option, param, expected_value=True):
2109 param = params.get(param)
2110 return [command_option] if param == expected_value else []
2111
2112
2113def cli_configuration_args(params, param, default=[]):
2114 ex_args = params.get(param)
2115 if ex_args is None:
2116 return default
2117 assert isinstance(ex_args, list)
2118 return ex_args
2119
2120
39672624
YCH
2121class ISO639Utils(object):
2122 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2123 _lang_map = {
2124 'aa': 'aar',
2125 'ab': 'abk',
2126 'ae': 'ave',
2127 'af': 'afr',
2128 'ak': 'aka',
2129 'am': 'amh',
2130 'an': 'arg',
2131 'ar': 'ara',
2132 'as': 'asm',
2133 'av': 'ava',
2134 'ay': 'aym',
2135 'az': 'aze',
2136 'ba': 'bak',
2137 'be': 'bel',
2138 'bg': 'bul',
2139 'bh': 'bih',
2140 'bi': 'bis',
2141 'bm': 'bam',
2142 'bn': 'ben',
2143 'bo': 'bod',
2144 'br': 'bre',
2145 'bs': 'bos',
2146 'ca': 'cat',
2147 'ce': 'che',
2148 'ch': 'cha',
2149 'co': 'cos',
2150 'cr': 'cre',
2151 'cs': 'ces',
2152 'cu': 'chu',
2153 'cv': 'chv',
2154 'cy': 'cym',
2155 'da': 'dan',
2156 'de': 'deu',
2157 'dv': 'div',
2158 'dz': 'dzo',
2159 'ee': 'ewe',
2160 'el': 'ell',
2161 'en': 'eng',
2162 'eo': 'epo',
2163 'es': 'spa',
2164 'et': 'est',
2165 'eu': 'eus',
2166 'fa': 'fas',
2167 'ff': 'ful',
2168 'fi': 'fin',
2169 'fj': 'fij',
2170 'fo': 'fao',
2171 'fr': 'fra',
2172 'fy': 'fry',
2173 'ga': 'gle',
2174 'gd': 'gla',
2175 'gl': 'glg',
2176 'gn': 'grn',
2177 'gu': 'guj',
2178 'gv': 'glv',
2179 'ha': 'hau',
2180 'he': 'heb',
2181 'hi': 'hin',
2182 'ho': 'hmo',
2183 'hr': 'hrv',
2184 'ht': 'hat',
2185 'hu': 'hun',
2186 'hy': 'hye',
2187 'hz': 'her',
2188 'ia': 'ina',
2189 'id': 'ind',
2190 'ie': 'ile',
2191 'ig': 'ibo',
2192 'ii': 'iii',
2193 'ik': 'ipk',
2194 'io': 'ido',
2195 'is': 'isl',
2196 'it': 'ita',
2197 'iu': 'iku',
2198 'ja': 'jpn',
2199 'jv': 'jav',
2200 'ka': 'kat',
2201 'kg': 'kon',
2202 'ki': 'kik',
2203 'kj': 'kua',
2204 'kk': 'kaz',
2205 'kl': 'kal',
2206 'km': 'khm',
2207 'kn': 'kan',
2208 'ko': 'kor',
2209 'kr': 'kau',
2210 'ks': 'kas',
2211 'ku': 'kur',
2212 'kv': 'kom',
2213 'kw': 'cor',
2214 'ky': 'kir',
2215 'la': 'lat',
2216 'lb': 'ltz',
2217 'lg': 'lug',
2218 'li': 'lim',
2219 'ln': 'lin',
2220 'lo': 'lao',
2221 'lt': 'lit',
2222 'lu': 'lub',
2223 'lv': 'lav',
2224 'mg': 'mlg',
2225 'mh': 'mah',
2226 'mi': 'mri',
2227 'mk': 'mkd',
2228 'ml': 'mal',
2229 'mn': 'mon',
2230 'mr': 'mar',
2231 'ms': 'msa',
2232 'mt': 'mlt',
2233 'my': 'mya',
2234 'na': 'nau',
2235 'nb': 'nob',
2236 'nd': 'nde',
2237 'ne': 'nep',
2238 'ng': 'ndo',
2239 'nl': 'nld',
2240 'nn': 'nno',
2241 'no': 'nor',
2242 'nr': 'nbl',
2243 'nv': 'nav',
2244 'ny': 'nya',
2245 'oc': 'oci',
2246 'oj': 'oji',
2247 'om': 'orm',
2248 'or': 'ori',
2249 'os': 'oss',
2250 'pa': 'pan',
2251 'pi': 'pli',
2252 'pl': 'pol',
2253 'ps': 'pus',
2254 'pt': 'por',
2255 'qu': 'que',
2256 'rm': 'roh',
2257 'rn': 'run',
2258 'ro': 'ron',
2259 'ru': 'rus',
2260 'rw': 'kin',
2261 'sa': 'san',
2262 'sc': 'srd',
2263 'sd': 'snd',
2264 'se': 'sme',
2265 'sg': 'sag',
2266 'si': 'sin',
2267 'sk': 'slk',
2268 'sl': 'slv',
2269 'sm': 'smo',
2270 'sn': 'sna',
2271 'so': 'som',
2272 'sq': 'sqi',
2273 'sr': 'srp',
2274 'ss': 'ssw',
2275 'st': 'sot',
2276 'su': 'sun',
2277 'sv': 'swe',
2278 'sw': 'swa',
2279 'ta': 'tam',
2280 'te': 'tel',
2281 'tg': 'tgk',
2282 'th': 'tha',
2283 'ti': 'tir',
2284 'tk': 'tuk',
2285 'tl': 'tgl',
2286 'tn': 'tsn',
2287 'to': 'ton',
2288 'tr': 'tur',
2289 'ts': 'tso',
2290 'tt': 'tat',
2291 'tw': 'twi',
2292 'ty': 'tah',
2293 'ug': 'uig',
2294 'uk': 'ukr',
2295 'ur': 'urd',
2296 'uz': 'uzb',
2297 've': 'ven',
2298 'vi': 'vie',
2299 'vo': 'vol',
2300 'wa': 'wln',
2301 'wo': 'wol',
2302 'xh': 'xho',
2303 'yi': 'yid',
2304 'yo': 'yor',
2305 'za': 'zha',
2306 'zh': 'zho',
2307 'zu': 'zul',
2308 }
2309
2310 @classmethod
2311 def short2long(cls, code):
2312 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2313 return cls._lang_map.get(code[:2])
2314
2315 @classmethod
2316 def long2short(cls, code):
2317 """Convert language code from ISO 639-2/T to ISO 639-1"""
2318 for short_name, long_name in cls._lang_map.items():
2319 if long_name == code:
2320 return short_name
2321
2322
4eb10f66
YCH
2323class ISO3166Utils(object):
2324 # From http://data.okfn.org/data/core/country-list
2325 _country_map = {
2326 'AF': 'Afghanistan',
2327 'AX': 'Åland Islands',
2328 'AL': 'Albania',
2329 'DZ': 'Algeria',
2330 'AS': 'American Samoa',
2331 'AD': 'Andorra',
2332 'AO': 'Angola',
2333 'AI': 'Anguilla',
2334 'AQ': 'Antarctica',
2335 'AG': 'Antigua and Barbuda',
2336 'AR': 'Argentina',
2337 'AM': 'Armenia',
2338 'AW': 'Aruba',
2339 'AU': 'Australia',
2340 'AT': 'Austria',
2341 'AZ': 'Azerbaijan',
2342 'BS': 'Bahamas',
2343 'BH': 'Bahrain',
2344 'BD': 'Bangladesh',
2345 'BB': 'Barbados',
2346 'BY': 'Belarus',
2347 'BE': 'Belgium',
2348 'BZ': 'Belize',
2349 'BJ': 'Benin',
2350 'BM': 'Bermuda',
2351 'BT': 'Bhutan',
2352 'BO': 'Bolivia, Plurinational State of',
2353 'BQ': 'Bonaire, Sint Eustatius and Saba',
2354 'BA': 'Bosnia and Herzegovina',
2355 'BW': 'Botswana',
2356 'BV': 'Bouvet Island',
2357 'BR': 'Brazil',
2358 'IO': 'British Indian Ocean Territory',
2359 'BN': 'Brunei Darussalam',
2360 'BG': 'Bulgaria',
2361 'BF': 'Burkina Faso',
2362 'BI': 'Burundi',
2363 'KH': 'Cambodia',
2364 'CM': 'Cameroon',
2365 'CA': 'Canada',
2366 'CV': 'Cape Verde',
2367 'KY': 'Cayman Islands',
2368 'CF': 'Central African Republic',
2369 'TD': 'Chad',
2370 'CL': 'Chile',
2371 'CN': 'China',
2372 'CX': 'Christmas Island',
2373 'CC': 'Cocos (Keeling) Islands',
2374 'CO': 'Colombia',
2375 'KM': 'Comoros',
2376 'CG': 'Congo',
2377 'CD': 'Congo, the Democratic Republic of the',
2378 'CK': 'Cook Islands',
2379 'CR': 'Costa Rica',
2380 'CI': 'Côte d\'Ivoire',
2381 'HR': 'Croatia',
2382 'CU': 'Cuba',
2383 'CW': 'Curaçao',
2384 'CY': 'Cyprus',
2385 'CZ': 'Czech Republic',
2386 'DK': 'Denmark',
2387 'DJ': 'Djibouti',
2388 'DM': 'Dominica',
2389 'DO': 'Dominican Republic',
2390 'EC': 'Ecuador',
2391 'EG': 'Egypt',
2392 'SV': 'El Salvador',
2393 'GQ': 'Equatorial Guinea',
2394 'ER': 'Eritrea',
2395 'EE': 'Estonia',
2396 'ET': 'Ethiopia',
2397 'FK': 'Falkland Islands (Malvinas)',
2398 'FO': 'Faroe Islands',
2399 'FJ': 'Fiji',
2400 'FI': 'Finland',
2401 'FR': 'France',
2402 'GF': 'French Guiana',
2403 'PF': 'French Polynesia',
2404 'TF': 'French Southern Territories',
2405 'GA': 'Gabon',
2406 'GM': 'Gambia',
2407 'GE': 'Georgia',
2408 'DE': 'Germany',
2409 'GH': 'Ghana',
2410 'GI': 'Gibraltar',
2411 'GR': 'Greece',
2412 'GL': 'Greenland',
2413 'GD': 'Grenada',
2414 'GP': 'Guadeloupe',
2415 'GU': 'Guam',
2416 'GT': 'Guatemala',
2417 'GG': 'Guernsey',
2418 'GN': 'Guinea',
2419 'GW': 'Guinea-Bissau',
2420 'GY': 'Guyana',
2421 'HT': 'Haiti',
2422 'HM': 'Heard Island and McDonald Islands',
2423 'VA': 'Holy See (Vatican City State)',
2424 'HN': 'Honduras',
2425 'HK': 'Hong Kong',
2426 'HU': 'Hungary',
2427 'IS': 'Iceland',
2428 'IN': 'India',
2429 'ID': 'Indonesia',
2430 'IR': 'Iran, Islamic Republic of',
2431 'IQ': 'Iraq',
2432 'IE': 'Ireland',
2433 'IM': 'Isle of Man',
2434 'IL': 'Israel',
2435 'IT': 'Italy',
2436 'JM': 'Jamaica',
2437 'JP': 'Japan',
2438 'JE': 'Jersey',
2439 'JO': 'Jordan',
2440 'KZ': 'Kazakhstan',
2441 'KE': 'Kenya',
2442 'KI': 'Kiribati',
2443 'KP': 'Korea, Democratic People\'s Republic of',
2444 'KR': 'Korea, Republic of',
2445 'KW': 'Kuwait',
2446 'KG': 'Kyrgyzstan',
2447 'LA': 'Lao People\'s Democratic Republic',
2448 'LV': 'Latvia',
2449 'LB': 'Lebanon',
2450 'LS': 'Lesotho',
2451 'LR': 'Liberia',
2452 'LY': 'Libya',
2453 'LI': 'Liechtenstein',
2454 'LT': 'Lithuania',
2455 'LU': 'Luxembourg',
2456 'MO': 'Macao',
2457 'MK': 'Macedonia, the Former Yugoslav Republic of',
2458 'MG': 'Madagascar',
2459 'MW': 'Malawi',
2460 'MY': 'Malaysia',
2461 'MV': 'Maldives',
2462 'ML': 'Mali',
2463 'MT': 'Malta',
2464 'MH': 'Marshall Islands',
2465 'MQ': 'Martinique',
2466 'MR': 'Mauritania',
2467 'MU': 'Mauritius',
2468 'YT': 'Mayotte',
2469 'MX': 'Mexico',
2470 'FM': 'Micronesia, Federated States of',
2471 'MD': 'Moldova, Republic of',
2472 'MC': 'Monaco',
2473 'MN': 'Mongolia',
2474 'ME': 'Montenegro',
2475 'MS': 'Montserrat',
2476 'MA': 'Morocco',
2477 'MZ': 'Mozambique',
2478 'MM': 'Myanmar',
2479 'NA': 'Namibia',
2480 'NR': 'Nauru',
2481 'NP': 'Nepal',
2482 'NL': 'Netherlands',
2483 'NC': 'New Caledonia',
2484 'NZ': 'New Zealand',
2485 'NI': 'Nicaragua',
2486 'NE': 'Niger',
2487 'NG': 'Nigeria',
2488 'NU': 'Niue',
2489 'NF': 'Norfolk Island',
2490 'MP': 'Northern Mariana Islands',
2491 'NO': 'Norway',
2492 'OM': 'Oman',
2493 'PK': 'Pakistan',
2494 'PW': 'Palau',
2495 'PS': 'Palestine, State of',
2496 'PA': 'Panama',
2497 'PG': 'Papua New Guinea',
2498 'PY': 'Paraguay',
2499 'PE': 'Peru',
2500 'PH': 'Philippines',
2501 'PN': 'Pitcairn',
2502 'PL': 'Poland',
2503 'PT': 'Portugal',
2504 'PR': 'Puerto Rico',
2505 'QA': 'Qatar',
2506 'RE': 'Réunion',
2507 'RO': 'Romania',
2508 'RU': 'Russian Federation',
2509 'RW': 'Rwanda',
2510 'BL': 'Saint Barthélemy',
2511 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2512 'KN': 'Saint Kitts and Nevis',
2513 'LC': 'Saint Lucia',
2514 'MF': 'Saint Martin (French part)',
2515 'PM': 'Saint Pierre and Miquelon',
2516 'VC': 'Saint Vincent and the Grenadines',
2517 'WS': 'Samoa',
2518 'SM': 'San Marino',
2519 'ST': 'Sao Tome and Principe',
2520 'SA': 'Saudi Arabia',
2521 'SN': 'Senegal',
2522 'RS': 'Serbia',
2523 'SC': 'Seychelles',
2524 'SL': 'Sierra Leone',
2525 'SG': 'Singapore',
2526 'SX': 'Sint Maarten (Dutch part)',
2527 'SK': 'Slovakia',
2528 'SI': 'Slovenia',
2529 'SB': 'Solomon Islands',
2530 'SO': 'Somalia',
2531 'ZA': 'South Africa',
2532 'GS': 'South Georgia and the South Sandwich Islands',
2533 'SS': 'South Sudan',
2534 'ES': 'Spain',
2535 'LK': 'Sri Lanka',
2536 'SD': 'Sudan',
2537 'SR': 'Suriname',
2538 'SJ': 'Svalbard and Jan Mayen',
2539 'SZ': 'Swaziland',
2540 'SE': 'Sweden',
2541 'CH': 'Switzerland',
2542 'SY': 'Syrian Arab Republic',
2543 'TW': 'Taiwan, Province of China',
2544 'TJ': 'Tajikistan',
2545 'TZ': 'Tanzania, United Republic of',
2546 'TH': 'Thailand',
2547 'TL': 'Timor-Leste',
2548 'TG': 'Togo',
2549 'TK': 'Tokelau',
2550 'TO': 'Tonga',
2551 'TT': 'Trinidad and Tobago',
2552 'TN': 'Tunisia',
2553 'TR': 'Turkey',
2554 'TM': 'Turkmenistan',
2555 'TC': 'Turks and Caicos Islands',
2556 'TV': 'Tuvalu',
2557 'UG': 'Uganda',
2558 'UA': 'Ukraine',
2559 'AE': 'United Arab Emirates',
2560 'GB': 'United Kingdom',
2561 'US': 'United States',
2562 'UM': 'United States Minor Outlying Islands',
2563 'UY': 'Uruguay',
2564 'UZ': 'Uzbekistan',
2565 'VU': 'Vanuatu',
2566 'VE': 'Venezuela, Bolivarian Republic of',
2567 'VN': 'Viet Nam',
2568 'VG': 'Virgin Islands, British',
2569 'VI': 'Virgin Islands, U.S.',
2570 'WF': 'Wallis and Futuna',
2571 'EH': 'Western Sahara',
2572 'YE': 'Yemen',
2573 'ZM': 'Zambia',
2574 'ZW': 'Zimbabwe',
2575 }
2576
2577 @classmethod
2578 def short2full(cls, code):
2579 """Convert an ISO 3166-2 country code to the corresponding full name"""
2580 return cls._country_map.get(code.upper())
2581
2582
91410c9b 2583class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2584 def __init__(self, proxies=None):
2585 # Set default handlers
2586 for type in ('http', 'https'):
2587 setattr(self, '%s_open' % type,
2588 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2589 meth(r, proxy, type))
2590 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2591
91410c9b 2592 def proxy_open(self, req, proxy, type):
2461f79d 2593 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2594 if req_proxy is not None:
2595 proxy = req_proxy
2461f79d
PH
2596 del req.headers['Ytdl-request-proxy']
2597
2598 if proxy == '__noproxy__':
2599 return None # No Proxy
91410c9b
PH
2600 return compat_urllib_request.ProxyHandler.proxy_open(
2601 self, req, proxy, type)
5bc880b9
YCH
2602
2603
2604def ohdave_rsa_encrypt(data, exponent, modulus):
2605 '''
2606 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
2607
2608 Input:
2609 data: data to encrypt, bytes-like object
2610 exponent, modulus: parameter e and N of RSA algorithm, both integer
2611 Output: hex string of encrypted data
2612
2613 Limitation: supports one block encryption only
2614 '''
2615
2616 payload = int(binascii.hexlify(data[::-1]), 16)
2617 encrypted = pow(payload, exponent, modulus)
2618 return '%x' % encrypted