]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Rename bypass geo restriction options
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
f45c185f 14import errno
be4a824d 15import functools
d77c3dfd 16import gzip
03f9daab 17import io
79a2e94e 18import itertools
f4bfd65f 19import json
d77c3dfd 20import locale
02dbf93f 21import math
347de493 22import operator
d77c3dfd 23import os
4eb7f1d1 24import pipes
c496ca96 25import platform
773f291d 26import random
d77c3dfd 27import re
c496ca96 28import socket
79a2e94e 29import ssl
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
8bb56eee 38 compat_HTMLParser,
8f9312c3 39 compat_basestring,
8c25f81b 40 compat_chr,
36e6f62c 41 compat_etree_fromstring,
8c25f81b 42 compat_html_entities,
55b2f099 43 compat_html_entities_html5,
be4a824d 44 compat_http_client,
c86b6142 45 compat_kwargs,
efa97bdc 46 compat_os_name,
8c25f81b 47 compat_parse_qs,
702ccf2d 48 compat_shlex_quote,
be4a824d 49 compat_socket_create_connection,
8c25f81b 50 compat_str,
edaa23f8 51 compat_struct_pack,
d3f8e038 52 compat_struct_unpack,
8c25f81b
PH
53 compat_urllib_error,
54 compat_urllib_parse,
15707c7e 55 compat_urllib_parse_urlencode,
8c25f81b 56 compat_urllib_parse_urlparse,
7581bfc9 57 compat_urllib_parse_unquote_plus,
8c25f81b
PH
58 compat_urllib_request,
59 compat_urlparse,
810c10ba 60 compat_xpath,
8c25f81b 61)
4644ac55 62
71aff188
YCH
63from .socks import (
64 ProxyType,
65 sockssocket,
66)
67
4644ac55 68
51fb4995
YCH
69def register_socks_protocols():
70 # "Register" SOCKS protocols
d5ae6bb5
YCH
71 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
72 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
73 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
74 if scheme not in compat_urlparse.uses_netloc:
75 compat_urlparse.uses_netloc.append(scheme)
76
77
468e2e92
FV
78# This is not clearly defined otherwise
79compiled_regex_type = type(re.compile(''))
80
3e669f36 81std_headers = {
15d10678 82 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
59ae15a5
PH
83 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
84 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
85 'Accept-Encoding': 'gzip, deflate',
86 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 87}
f427df17 88
5f6a1245 89
fb37eb25
S
90USER_AGENTS = {
91 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
92}
93
94
bf42a990
S
95NO_DEFAULT = object()
96
7105440c
YCH
97ENGLISH_MONTH_NAMES = [
98 'January', 'February', 'March', 'April', 'May', 'June',
99 'July', 'August', 'September', 'October', 'November', 'December']
100
f6717dec
S
101MONTH_NAMES = {
102 'en': ENGLISH_MONTH_NAMES,
103 'fr': [
3e4185c3
S
104 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
105 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 106}
a942d6cb 107
a7aaa398
S
108KNOWN_EXTENSIONS = (
109 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
110 'flv', 'f4v', 'f4a', 'f4b',
111 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
112 'mkv', 'mka', 'mk3d',
113 'avi', 'divx',
114 'mov',
115 'asf', 'wmv', 'wma',
116 '3gp', '3g2',
117 'mp3',
118 'flac',
119 'ape',
120 'wav',
121 'f4f', 'f4m', 'm3u8', 'smil')
122
c587cbb7 123# needed for sanitizing filenames in restricted mode
c8827027 124ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
125 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
126 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
c587cbb7 127
46f59e89
S
128DATE_FORMATS = (
129 '%d %B %Y',
130 '%d %b %Y',
131 '%B %d %Y',
cb655f34
S
132 '%B %dst %Y',
133 '%B %dnd %Y',
134 '%B %dth %Y',
46f59e89 135 '%b %d %Y',
cb655f34
S
136 '%b %dst %Y',
137 '%b %dnd %Y',
138 '%b %dth %Y',
46f59e89
S
139 '%b %dst %Y %I:%M',
140 '%b %dnd %Y %I:%M',
141 '%b %dth %Y %I:%M',
142 '%Y %m %d',
143 '%Y-%m-%d',
144 '%Y/%m/%d',
81c13222 145 '%Y/%m/%d %H:%M',
46f59e89 146 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 147 '%Y-%m-%d %H:%M',
46f59e89
S
148 '%Y-%m-%d %H:%M:%S',
149 '%Y-%m-%d %H:%M:%S.%f',
150 '%d.%m.%Y %H:%M',
151 '%d.%m.%Y %H.%M',
152 '%Y-%m-%dT%H:%M:%SZ',
153 '%Y-%m-%dT%H:%M:%S.%fZ',
154 '%Y-%m-%dT%H:%M:%S.%f0Z',
155 '%Y-%m-%dT%H:%M:%S',
156 '%Y-%m-%dT%H:%M:%S.%f',
157 '%Y-%m-%dT%H:%M',
c6eed6b8
S
158 '%b %d %Y at %H:%M',
159 '%b %d %Y at %H:%M:%S',
46f59e89
S
160)
161
162DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
163DATE_FORMATS_DAY_FIRST.extend([
164 '%d-%m-%Y',
165 '%d.%m.%Y',
166 '%d.%m.%y',
167 '%d/%m/%Y',
168 '%d/%m/%y',
169 '%d/%m/%Y %H:%M:%S',
170])
171
172DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
173DATE_FORMATS_MONTH_FIRST.extend([
174 '%m-%d-%Y',
175 '%m.%d.%Y',
176 '%m/%d/%Y',
177 '%m/%d/%y',
178 '%m/%d/%Y %H:%M:%S',
179])
180
06b3fe29
S
181PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
182
7105440c 183
d77c3dfd 184def preferredencoding():
59ae15a5 185 """Get preferred encoding.
d77c3dfd 186
59ae15a5
PH
187 Returns the best encoding scheme for the system, based on
188 locale.getpreferredencoding() and some further tweaks.
189 """
190 try:
191 pref = locale.getpreferredencoding()
28e614de 192 'TEST'.encode(pref)
70a1165b 193 except Exception:
59ae15a5 194 pref = 'UTF-8'
bae611f2 195
59ae15a5 196 return pref
d77c3dfd 197
f4bfd65f 198
181c8655 199def write_json_file(obj, fn):
1394646a 200 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 201
92120217 202 fn = encodeFilename(fn)
61ee5aeb 203 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
204 encoding = get_filesystem_encoding()
205 # os.path.basename returns a bytes object, but NamedTemporaryFile
206 # will fail if the filename contains non ascii characters unless we
207 # use a unicode object
208 path_basename = lambda f: os.path.basename(fn).decode(encoding)
209 # the same for os.path.dirname
210 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
211 else:
212 path_basename = os.path.basename
213 path_dirname = os.path.dirname
214
73159f99
S
215 args = {
216 'suffix': '.tmp',
ec5f6016
JMF
217 'prefix': path_basename(fn) + '.',
218 'dir': path_dirname(fn),
73159f99
S
219 'delete': False,
220 }
221
181c8655
PH
222 # In Python 2.x, json.dump expects a bytestream.
223 # In Python 3.x, it writes to a character stream
224 if sys.version_info < (3, 0):
73159f99 225 args['mode'] = 'wb'
181c8655 226 else:
73159f99
S
227 args.update({
228 'mode': 'w',
229 'encoding': 'utf-8',
230 })
231
c86b6142 232 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
233
234 try:
235 with tf:
236 json.dump(obj, tf)
1394646a
IK
237 if sys.platform == 'win32':
238 # Need to remove existing file on Windows, else os.rename raises
239 # WindowsError or FileExistsError.
240 try:
241 os.unlink(fn)
242 except OSError:
243 pass
181c8655 244 os.rename(tf.name, fn)
70a1165b 245 except Exception:
181c8655
PH
246 try:
247 os.remove(tf.name)
248 except OSError:
249 pass
250 raise
251
252
253if sys.version_info >= (2, 7):
ee114368 254 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 255 """ Find the xpath xpath[@key=val] """
5d2354f1 256 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 257 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
258 return node.find(expr)
259else:
ee114368 260 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 261 for f in node.findall(compat_xpath(xpath)):
ee114368
S
262 if key not in f.attrib:
263 continue
264 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
265 return f
266 return None
267
d7e66d39
JMF
268# On python2.6 the xml.etree.ElementTree.Element methods don't support
269# the namespace parameter
5f6a1245
JW
270
271
d7e66d39
JMF
272def xpath_with_ns(path, ns_map):
273 components = [c.split(':') for c in path.split('/')]
274 replaced = []
275 for c in components:
276 if len(c) == 1:
277 replaced.append(c[0])
278 else:
279 ns, tag = c
280 replaced.append('{%s}%s' % (ns_map[ns], tag))
281 return '/'.join(replaced)
282
d77c3dfd 283
a41fb80c 284def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 285 def _find_xpath(xpath):
810c10ba 286 return node.find(compat_xpath(xpath))
578c0745
S
287
288 if isinstance(xpath, (str, compat_str)):
289 n = _find_xpath(xpath)
290 else:
291 for xp in xpath:
292 n = _find_xpath(xp)
293 if n is not None:
294 break
d74bebd5 295
8e636da4 296 if n is None:
bf42a990
S
297 if default is not NO_DEFAULT:
298 return default
299 elif fatal:
bf0ff932
PH
300 name = xpath if name is None else name
301 raise ExtractorError('Could not find XML element %s' % name)
302 else:
303 return None
a41fb80c
S
304 return n
305
306
307def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
308 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
309 if n is None or n == default:
310 return n
311 if n.text is None:
312 if default is not NO_DEFAULT:
313 return default
314 elif fatal:
315 name = xpath if name is None else name
316 raise ExtractorError('Could not find XML element\'s text %s' % name)
317 else:
318 return None
319 return n.text
a41fb80c
S
320
321
322def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
323 n = find_xpath_attr(node, xpath, key)
324 if n is None:
325 if default is not NO_DEFAULT:
326 return default
327 elif fatal:
328 name = '%s[@%s]' % (xpath, key) if name is None else name
329 raise ExtractorError('Could not find XML attribute %s' % name)
330 else:
331 return None
332 return n.attrib[key]
bf0ff932
PH
333
334
9e6dd238 335def get_element_by_id(id, html):
43e8fafd 336 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 337 return get_element_by_attribute('id', id, html)
43e8fafd 338
12ea2f30 339
84c237fb 340def get_element_by_class(class_name, html):
2af12ad9
TC
341 """Return the content of the first tag with the specified class in the passed HTML document"""
342 retval = get_elements_by_class(class_name, html)
343 return retval[0] if retval else None
344
345
346def get_element_by_attribute(attribute, value, html, escape_value=True):
347 retval = get_elements_by_attribute(attribute, value, html, escape_value)
348 return retval[0] if retval else None
349
350
351def get_elements_by_class(class_name, html):
352 """Return the content of all tags with the specified class in the passed HTML document as a list"""
353 return get_elements_by_attribute(
84c237fb
YCH
354 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
355 html, escape_value=False)
356
357
2af12ad9 358def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 359 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 360
84c237fb
YCH
361 value = re.escape(value) if escape_value else value
362
2af12ad9
TC
363 retlist = []
364 for m in re.finditer(r'''(?xs)
38285056 365 <([a-zA-Z0-9:._-]+)
abc97b5e 366 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
38285056 367 \s+%s=['"]?%s['"]?
abc97b5e 368 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
38285056
PH
369 \s*>
370 (?P<content>.*?)
371 </\1>
2af12ad9
TC
372 ''' % (re.escape(attribute), value), html):
373 res = m.group('content')
38285056 374
2af12ad9
TC
375 if res.startswith('"') or res.startswith("'"):
376 res = res[1:-1]
38285056 377
2af12ad9 378 retlist.append(unescapeHTML(res))
a921f407 379
2af12ad9 380 return retlist
a921f407 381
c5229f39 382
8bb56eee
BF
383class HTMLAttributeParser(compat_HTMLParser):
384 """Trivial HTML parser to gather the attributes for a single element"""
385 def __init__(self):
c5229f39 386 self.attrs = {}
8bb56eee
BF
387 compat_HTMLParser.__init__(self)
388
389 def handle_starttag(self, tag, attrs):
390 self.attrs = dict(attrs)
391
c5229f39 392
8bb56eee
BF
393def extract_attributes(html_element):
394 """Given a string for an HTML element such as
395 <el
396 a="foo" B="bar" c="&98;az" d=boz
397 empty= noval entity="&amp;"
398 sq='"' dq="'"
399 >
400 Decode and return a dictionary of attributes.
401 {
402 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
403 'empty': '', 'noval': None, 'entity': '&',
404 'sq': '"', 'dq': '\''
405 }.
406 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
407 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
408 """
409 parser = HTMLAttributeParser()
410 parser.feed(html_element)
411 parser.close()
412 return parser.attrs
9e6dd238 413
c5229f39 414
9e6dd238 415def clean_html(html):
59ae15a5 416 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
417
418 if html is None: # Convenience for sanitizing descriptions etc.
419 return html
420
59ae15a5
PH
421 # Newline vs <br />
422 html = html.replace('\n', ' ')
6b3aef80
FV
423 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
424 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
425 # Strip html tags
426 html = re.sub('<.*?>', '', html)
427 # Replace html entities
428 html = unescapeHTML(html)
7decf895 429 return html.strip()
9e6dd238
FV
430
431
d77c3dfd 432def sanitize_open(filename, open_mode):
59ae15a5
PH
433 """Try to open the given filename, and slightly tweak it if this fails.
434
435 Attempts to open the given filename. If this fails, it tries to change
436 the filename slightly, step by step, until it's either able to open it
437 or it fails and raises a final exception, like the standard open()
438 function.
439
440 It returns the tuple (stream, definitive_file_name).
441 """
442 try:
28e614de 443 if filename == '-':
59ae15a5
PH
444 if sys.platform == 'win32':
445 import msvcrt
446 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 447 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
448 stream = open(encodeFilename(filename), open_mode)
449 return (stream, filename)
450 except (IOError, OSError) as err:
f45c185f
PH
451 if err.errno in (errno.EACCES,):
452 raise
59ae15a5 453
f45c185f 454 # In case of error, try to remove win32 forbidden chars
d55de57b 455 alt_filename = sanitize_path(filename)
f45c185f
PH
456 if alt_filename == filename:
457 raise
458 else:
459 # An exception here should be caught in the caller
d55de57b 460 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 461 return (stream, alt_filename)
d77c3dfd
FV
462
463
464def timeconvert(timestr):
59ae15a5
PH
465 """Convert RFC 2822 defined time string into system timestamp"""
466 timestamp = None
467 timetuple = email.utils.parsedate_tz(timestr)
468 if timetuple is not None:
469 timestamp = email.utils.mktime_tz(timetuple)
470 return timestamp
1c469a94 471
5f6a1245 472
796173d0 473def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
474 """Sanitizes a string so it could be used as part of a filename.
475 If restricted is set, use a stricter subset of allowed characters.
796173d0 476 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
477 """
478 def replace_insane(char):
c587cbb7
AT
479 if restricted and char in ACCENT_CHARS:
480 return ACCENT_CHARS[char]
59ae15a5
PH
481 if char == '?' or ord(char) < 32 or ord(char) == 127:
482 return ''
483 elif char == '"':
484 return '' if restricted else '\''
485 elif char == ':':
486 return '_-' if restricted else ' -'
487 elif char in '\\/|*<>':
488 return '_'
627dcfff 489 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
490 return '_'
491 if restricted and ord(char) > 127:
492 return '_'
493 return char
494
2aeb06d6
PH
495 # Handle timestamps
496 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 497 result = ''.join(map(replace_insane, s))
796173d0
PH
498 if not is_id:
499 while '__' in result:
500 result = result.replace('__', '_')
501 result = result.strip('_')
502 # Common case of "Foreign band name - English song title"
503 if restricted and result.startswith('-_'):
504 result = result[2:]
5a42414b
PH
505 if result.startswith('-'):
506 result = '_' + result[len('-'):]
a7440261 507 result = result.lstrip('.')
796173d0
PH
508 if not result:
509 result = '_'
59ae15a5 510 return result
d77c3dfd 511
5f6a1245 512
a2aaf4db
S
513def sanitize_path(s):
514 """Sanitizes and normalizes path on Windows"""
515 if sys.platform != 'win32':
516 return s
be531ef1
S
517 drive_or_unc, _ = os.path.splitdrive(s)
518 if sys.version_info < (2, 7) and not drive_or_unc:
519 drive_or_unc, _ = os.path.splitunc(s)
520 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
521 if drive_or_unc:
a2aaf4db
S
522 norm_path.pop(0)
523 sanitized_path = [
ec85ded8 524 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 525 for path_part in norm_path]
be531ef1
S
526 if drive_or_unc:
527 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
528 return os.path.join(*sanitized_path)
529
530
67dda517
S
531# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
532# unwanted failures due to missing protocol
17bcc626
S
533def sanitize_url(url):
534 return 'http:%s' % url if url.startswith('//') else url
535
536
67dda517 537def sanitized_Request(url, *args, **kwargs):
17bcc626 538 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
539
540
d77c3dfd 541def orderedSet(iterable):
59ae15a5
PH
542 """ Remove all duplicates from the input iterable """
543 res = []
544 for el in iterable:
545 if el not in res:
546 res.append(el)
547 return res
d77c3dfd 548
912b38b4 549
55b2f099 550def _htmlentity_transform(entity_with_semicolon):
4e408e47 551 """Transforms an HTML entity to a character."""
55b2f099
YCH
552 entity = entity_with_semicolon[:-1]
553
4e408e47
PH
554 # Known non-numeric HTML entity
555 if entity in compat_html_entities.name2codepoint:
556 return compat_chr(compat_html_entities.name2codepoint[entity])
557
55b2f099
YCH
558 # TODO: HTML5 allows entities without a semicolon. For example,
559 # '&Eacuteric' should be decoded as 'Éric'.
560 if entity_with_semicolon in compat_html_entities_html5:
561 return compat_html_entities_html5[entity_with_semicolon]
562
91757b0f 563 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
564 if mobj is not None:
565 numstr = mobj.group(1)
28e614de 566 if numstr.startswith('x'):
4e408e47 567 base = 16
28e614de 568 numstr = '0%s' % numstr
4e408e47
PH
569 else:
570 base = 10
7aefc49c
S
571 # See https://github.com/rg3/youtube-dl/issues/7518
572 try:
573 return compat_chr(int(numstr, base))
574 except ValueError:
575 pass
4e408e47
PH
576
577 # Unknown entity in name, return its literal representation
7a3f0c00 578 return '&%s;' % entity
4e408e47
PH
579
580
d77c3dfd 581def unescapeHTML(s):
912b38b4
PH
582 if s is None:
583 return None
584 assert type(s) == compat_str
d77c3dfd 585
4e408e47 586 return re.sub(
55b2f099 587 r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 588
8bf48f23 589
aa49acd1
S
590def get_subprocess_encoding():
591 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
592 # For subprocess calls, encode with locale encoding
593 # Refer to http://stackoverflow.com/a/9951851/35070
594 encoding = preferredencoding()
595 else:
596 encoding = sys.getfilesystemencoding()
597 if encoding is None:
598 encoding = 'utf-8'
599 return encoding
600
601
8bf48f23 602def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
603 """
604 @param s The name of the file
605 """
d77c3dfd 606
8bf48f23 607 assert type(s) == compat_str
d77c3dfd 608
59ae15a5
PH
609 # Python 3 has a Unicode API
610 if sys.version_info >= (3, 0):
611 return s
0f00efed 612
aa49acd1
S
613 # Pass '' directly to use Unicode APIs on Windows 2000 and up
614 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
615 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
616 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
617 return s
618
8ee239e9
YCH
619 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
620 if sys.platform.startswith('java'):
621 return s
622
aa49acd1
S
623 return s.encode(get_subprocess_encoding(), 'ignore')
624
625
626def decodeFilename(b, for_subprocess=False):
627
628 if sys.version_info >= (3, 0):
629 return b
630
631 if not isinstance(b, bytes):
632 return b
633
634 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 635
f07b74fc
PH
636
637def encodeArgument(s):
638 if not isinstance(s, compat_str):
639 # Legacy code that uses byte strings
640 # Uncomment the following line after fixing all post processors
7af808a5 641 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
642 s = s.decode('ascii')
643 return encodeFilename(s, True)
644
645
aa49acd1
S
646def decodeArgument(b):
647 return decodeFilename(b, True)
648
649
8271226a
PH
650def decodeOption(optval):
651 if optval is None:
652 return optval
653 if isinstance(optval, bytes):
654 optval = optval.decode(preferredencoding())
655
656 assert isinstance(optval, compat_str)
657 return optval
1c256f70 658
5f6a1245 659
4539dd30
PH
660def formatSeconds(secs):
661 if secs > 3600:
662 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
663 elif secs > 60:
664 return '%d:%02d' % (secs // 60, secs % 60)
665 else:
666 return '%d' % secs
667
a0ddb8a2 668
be4a824d
PH
669def make_HTTPS_handler(params, **kwargs):
670 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 671 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 672 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 673 if opts_no_check_certificate:
be5f2c19 674 context.check_hostname = False
0db261ba 675 context.verify_mode = ssl.CERT_NONE
a2366922 676 try:
be4a824d 677 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
678 except TypeError:
679 # Python 2.7.8
680 # (create_default_context present but HTTPSHandler has no context=)
681 pass
682
683 if sys.version_info < (3, 2):
d7932313 684 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 685 else: # Python < 3.4
d7932313 686 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 687 context.verify_mode = (ssl.CERT_NONE
dca08720 688 if opts_no_check_certificate
ea6d901e 689 else ssl.CERT_REQUIRED)
303b479e 690 context.set_default_verify_paths()
be4a824d 691 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 692
732ea2f0 693
08f2a92c
JMF
694def bug_reports_message():
695 if ytdl_is_updateable():
696 update_cmd = 'type youtube-dl -U to update'
697 else:
698 update_cmd = 'see https://yt-dl.org/update on how to update'
699 msg = '; please report this issue on https://yt-dl.org/bug .'
700 msg += ' Make sure you are using the latest version; %s.' % update_cmd
701 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
702 return msg
703
704
bf5b9d85
PM
705class YoutubeDLError(Exception):
706 """Base exception for YoutubeDL errors."""
707 pass
708
709
710class ExtractorError(YoutubeDLError):
1c256f70 711 """Error during info extraction."""
5f6a1245 712
d11271dd 713 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
714 """ tb, if given, is the original traceback (so that it can be printed out).
715 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
716 """
717
718 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
719 expected = True
d11271dd
PH
720 if video_id is not None:
721 msg = video_id + ': ' + msg
410f3e73 722 if cause:
28e614de 723 msg += ' (caused by %r)' % cause
9a82b238 724 if not expected:
08f2a92c 725 msg += bug_reports_message()
1c256f70 726 super(ExtractorError, self).__init__(msg)
d5979c5d 727
1c256f70 728 self.traceback = tb
8cc83b8d 729 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 730 self.cause = cause
d11271dd 731 self.video_id = video_id
1c256f70 732
01951dda
PH
733 def format_traceback(self):
734 if self.traceback is None:
735 return None
28e614de 736 return ''.join(traceback.format_tb(self.traceback))
01951dda 737
1c256f70 738
416c7fcb
PH
739class UnsupportedError(ExtractorError):
740 def __init__(self, url):
741 super(UnsupportedError, self).__init__(
742 'Unsupported URL: %s' % url, expected=True)
743 self.url = url
744
745
55b3e45b
JMF
746class RegexNotFoundError(ExtractorError):
747 """Error when a regex didn't match"""
748 pass
749
750
773f291d
S
751class GeoRestrictedError(ExtractorError):
752 """Geographic restriction Error exception.
753
754 This exception may be thrown when a video is not available from your
755 geographic location due to geographic restrictions imposed by a website.
756 """
757 def __init__(self, msg, countries=None):
758 super(GeoRestrictedError, self).__init__(msg, expected=True)
759 self.msg = msg
760 self.countries = countries
761
762
bf5b9d85 763class DownloadError(YoutubeDLError):
59ae15a5 764 """Download Error exception.
d77c3dfd 765
59ae15a5
PH
766 This exception may be thrown by FileDownloader objects if they are not
767 configured to continue on errors. They will contain the appropriate
768 error message.
769 """
5f6a1245 770
8cc83b8d
FV
771 def __init__(self, msg, exc_info=None):
772 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
773 super(DownloadError, self).__init__(msg)
774 self.exc_info = exc_info
d77c3dfd
FV
775
776
bf5b9d85 777class SameFileError(YoutubeDLError):
59ae15a5 778 """Same File exception.
d77c3dfd 779
59ae15a5
PH
780 This exception will be thrown by FileDownloader objects if they detect
781 multiple files would have to be downloaded to the same file on disk.
782 """
783 pass
d77c3dfd
FV
784
785
bf5b9d85 786class PostProcessingError(YoutubeDLError):
59ae15a5 787 """Post Processing exception.
d77c3dfd 788
59ae15a5
PH
789 This exception may be raised by PostProcessor's .run() method to
790 indicate an error in the postprocessing task.
791 """
5f6a1245 792
7851b379 793 def __init__(self, msg):
bf5b9d85 794 super(PostProcessingError, self).__init__(msg)
7851b379 795 self.msg = msg
d77c3dfd 796
5f6a1245 797
bf5b9d85 798class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
799 """ --max-downloads limit has been reached. """
800 pass
d77c3dfd
FV
801
802
bf5b9d85 803class UnavailableVideoError(YoutubeDLError):
59ae15a5 804 """Unavailable Format exception.
d77c3dfd 805
59ae15a5
PH
806 This exception will be thrown when a video is requested
807 in a format that is not available for that video.
808 """
809 pass
d77c3dfd
FV
810
811
bf5b9d85 812class ContentTooShortError(YoutubeDLError):
59ae15a5 813 """Content Too Short exception.
d77c3dfd 814
59ae15a5
PH
815 This exception may be raised by FileDownloader objects when a file they
816 download is too small for what the server announced first, indicating
817 the connection was probably interrupted.
818 """
d77c3dfd 819
59ae15a5 820 def __init__(self, downloaded, expected):
bf5b9d85
PM
821 super(ContentTooShortError, self).__init__(
822 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
823 )
2c7ed247 824 # Both in bytes
59ae15a5
PH
825 self.downloaded = downloaded
826 self.expected = expected
d77c3dfd 827
5f6a1245 828
bf5b9d85 829class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
830 def __init__(self, code=None, msg='Unknown error'):
831 super(XAttrMetadataError, self).__init__(msg)
832 self.code = code
bd264412 833 self.msg = msg
efa97bdc
YCH
834
835 # Parsing code and msg
836 if (self.code in (errno.ENOSPC, errno.EDQUOT) or
837 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
838 self.reason = 'NO_SPACE'
839 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
840 self.reason = 'VALUE_TOO_LONG'
841 else:
842 self.reason = 'NOT_SUPPORTED'
843
844
bf5b9d85 845class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
846 pass
847
848
c5a59d93 849def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
850 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
851 # expected HTTP responses to meet HTTP/1.0 or later (see also
852 # https://github.com/rg3/youtube-dl/issues/6727)
853 if sys.version_info < (3, 0):
5a1a2e94 854 kwargs[b'strict'] = True
be4a824d
PH
855 hc = http_class(*args, **kwargs)
856 source_address = ydl_handler._params.get('source_address')
857 if source_address is not None:
858 sa = (source_address, 0)
859 if hasattr(hc, 'source_address'): # Python 2.7+
860 hc.source_address = sa
861 else: # Python 2.6
862 def _hc_connect(self, *args, **kwargs):
863 sock = compat_socket_create_connection(
864 (self.host, self.port), self.timeout, sa)
865 if is_https:
d7932313
PH
866 self.sock = ssl.wrap_socket(
867 sock, self.key_file, self.cert_file,
868 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
869 else:
870 self.sock = sock
871 hc.connect = functools.partial(_hc_connect, hc)
872
873 return hc
874
875
87f0e62d 876def handle_youtubedl_headers(headers):
992fc9d6
YCH
877 filtered_headers = headers
878
879 if 'Youtubedl-no-compression' in filtered_headers:
880 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 881 del filtered_headers['Youtubedl-no-compression']
87f0e62d 882
992fc9d6 883 return filtered_headers
87f0e62d
YCH
884
885
acebc9cd 886class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
887 """Handler for HTTP requests and responses.
888
889 This class, when installed with an OpenerDirector, automatically adds
890 the standard headers to every HTTP request and handles gzipped and
891 deflated responses from web servers. If compression is to be avoided in
892 a particular request, the original request in the program code only has
0424ec30 893 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
894 removed before making the real request.
895
896 Part of this code was copied from:
897
898 http://techknack.net/python-urllib2-handlers/
899
900 Andrew Rowls, the author of that code, agreed to release it to the
901 public domain.
902 """
903
be4a824d
PH
904 def __init__(self, params, *args, **kwargs):
905 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
906 self._params = params
907
908 def http_open(self, req):
71aff188
YCH
909 conn_class = compat_http_client.HTTPConnection
910
911 socks_proxy = req.headers.get('Ytdl-socks-proxy')
912 if socks_proxy:
913 conn_class = make_socks_conn_class(conn_class, socks_proxy)
914 del req.headers['Ytdl-socks-proxy']
915
be4a824d 916 return self.do_open(functools.partial(
71aff188 917 _create_http_connection, self, conn_class, False),
be4a824d
PH
918 req)
919
59ae15a5
PH
920 @staticmethod
921 def deflate(data):
922 try:
923 return zlib.decompress(data, -zlib.MAX_WBITS)
924 except zlib.error:
925 return zlib.decompress(data)
926
927 @staticmethod
928 def addinfourl_wrapper(stream, headers, url, code):
929 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
930 return compat_urllib_request.addinfourl(stream, headers, url, code)
931 ret = compat_urllib_request.addinfourl(stream, headers, url)
932 ret.code = code
933 return ret
934
acebc9cd 935 def http_request(self, req):
51f267d9
S
936 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
937 # always respected by websites, some tend to give out URLs with non percent-encoded
938 # non-ASCII characters (see telemb.py, ard.py [#3412])
939 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
940 # To work around aforementioned issue we will replace request's original URL with
941 # percent-encoded one
942 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
943 # the code of this workaround has been moved here from YoutubeDL.urlopen()
944 url = req.get_full_url()
945 url_escaped = escape_url(url)
946
947 # Substitute URL if any change after escaping
948 if url != url_escaped:
15d260eb 949 req = update_Request(req, url=url_escaped)
51f267d9 950
33ac271b 951 for h, v in std_headers.items():
3d5f7a39
JK
952 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
953 # The dict keys are capitalized because of this bug by urllib
954 if h.capitalize() not in req.headers:
33ac271b 955 req.add_header(h, v)
87f0e62d
YCH
956
957 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
958
959 if sys.version_info < (2, 7) and '#' in req.get_full_url():
960 # Python 2.6 is brain-dead when it comes to fragments
961 req._Request__original = req._Request__original.partition('#')[0]
962 req._Request__r_type = req._Request__r_type.partition('#')[0]
963
59ae15a5
PH
964 return req
965
acebc9cd 966 def http_response(self, req, resp):
59ae15a5
PH
967 old_resp = resp
968 # gzip
969 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
970 content = resp.read()
971 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
972 try:
973 uncompressed = io.BytesIO(gz.read())
974 except IOError as original_ioerror:
975 # There may be junk add the end of the file
976 # See http://stackoverflow.com/q/4928560/35070 for details
977 for i in range(1, 1024):
978 try:
979 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
980 uncompressed = io.BytesIO(gz.read())
981 except IOError:
982 continue
983 break
984 else:
985 raise original_ioerror
986 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 987 resp.msg = old_resp.msg
c047270c 988 del resp.headers['Content-encoding']
59ae15a5
PH
989 # deflate
990 if resp.headers.get('Content-encoding', '') == 'deflate':
991 gz = io.BytesIO(self.deflate(resp.read()))
992 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
993 resp.msg = old_resp.msg
c047270c 994 del resp.headers['Content-encoding']
ad729172
S
995 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
996 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
997 if 300 <= resp.code < 400:
998 location = resp.headers.get('Location')
999 if location:
1000 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1001 if sys.version_info >= (3, 0):
1002 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
1003 else:
1004 location = location.decode('utf-8')
5a4d9ddb
S
1005 location_escaped = escape_url(location)
1006 if location != location_escaped:
1007 del resp.headers['Location']
9a4aec8b
YCH
1008 if sys.version_info < (3, 0):
1009 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 1010 resp.headers['Location'] = location_escaped
59ae15a5 1011 return resp
0f8d03f8 1012
acebc9cd
PH
1013 https_request = http_request
1014 https_response = http_response
bf50b038 1015
5de90176 1016
71aff188
YCH
1017def make_socks_conn_class(base_class, socks_proxy):
1018 assert issubclass(base_class, (
1019 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1020
1021 url_components = compat_urlparse.urlparse(socks_proxy)
1022 if url_components.scheme.lower() == 'socks5':
1023 socks_type = ProxyType.SOCKS5
1024 elif url_components.scheme.lower() in ('socks', 'socks4'):
1025 socks_type = ProxyType.SOCKS4
51fb4995
YCH
1026 elif url_components.scheme.lower() == 'socks4a':
1027 socks_type = ProxyType.SOCKS4A
71aff188 1028
cdd94c2e
YCH
1029 def unquote_if_non_empty(s):
1030 if not s:
1031 return s
1032 return compat_urllib_parse_unquote_plus(s)
1033
71aff188
YCH
1034 proxy_args = (
1035 socks_type,
1036 url_components.hostname, url_components.port or 1080,
1037 True, # Remote DNS
cdd94c2e
YCH
1038 unquote_if_non_empty(url_components.username),
1039 unquote_if_non_empty(url_components.password),
71aff188
YCH
1040 )
1041
1042 class SocksConnection(base_class):
1043 def connect(self):
1044 self.sock = sockssocket()
1045 self.sock.setproxy(*proxy_args)
1046 if type(self.timeout) in (int, float):
1047 self.sock.settimeout(self.timeout)
1048 self.sock.connect((self.host, self.port))
1049
1050 if isinstance(self, compat_http_client.HTTPSConnection):
1051 if hasattr(self, '_context'): # Python > 2.6
1052 self.sock = self._context.wrap_socket(
1053 self.sock, server_hostname=self.host)
1054 else:
1055 self.sock = ssl.wrap_socket(self.sock)
1056
1057 return SocksConnection
1058
1059
be4a824d
PH
1060class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1061 def __init__(self, params, https_conn_class=None, *args, **kwargs):
1062 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1063 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1064 self._params = params
1065
1066 def https_open(self, req):
4f264c02 1067 kwargs = {}
71aff188
YCH
1068 conn_class = self._https_conn_class
1069
4f264c02
JMF
1070 if hasattr(self, '_context'): # python > 2.6
1071 kwargs['context'] = self._context
1072 if hasattr(self, '_check_hostname'): # python 3.x
1073 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
1074
1075 socks_proxy = req.headers.get('Ytdl-socks-proxy')
1076 if socks_proxy:
1077 conn_class = make_socks_conn_class(conn_class, socks_proxy)
1078 del req.headers['Ytdl-socks-proxy']
1079
be4a824d 1080 return self.do_open(functools.partial(
71aff188 1081 _create_http_connection, self, conn_class, True),
4f264c02 1082 req, **kwargs)
be4a824d
PH
1083
1084
a6420bf5
S
1085class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1086 def __init__(self, cookiejar=None):
1087 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1088
1089 def http_response(self, request, response):
1090 # Python 2 will choke on next HTTP request in row if there are non-ASCII
1091 # characters in Set-Cookie HTTP header of last response (see
1092 # https://github.com/rg3/youtube-dl/issues/6769).
1093 # In order to at least prevent crashing we will percent encode Set-Cookie
1094 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
1095 # if sys.version_info < (3, 0) and response.headers:
1096 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
1097 # set_cookie = response.headers.get(set_cookie_header)
1098 # if set_cookie:
1099 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
1100 # if set_cookie != set_cookie_escaped:
1101 # del response.headers[set_cookie_header]
1102 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
1103 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1104
1105 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1106 https_response = http_response
1107
1108
46f59e89
S
1109def extract_timezone(date_str):
1110 m = re.search(
1111 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
1112 date_str)
1113 if not m:
1114 timezone = datetime.timedelta()
1115 else:
1116 date_str = date_str[:-len(m.group('tz'))]
1117 if not m.group('sign'):
1118 timezone = datetime.timedelta()
1119 else:
1120 sign = 1 if m.group('sign') == '+' else -1
1121 timezone = datetime.timedelta(
1122 hours=sign * int(m.group('hours')),
1123 minutes=sign * int(m.group('minutes')))
1124 return timezone, date_str
1125
1126
08b38d54 1127def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
1128 """ Return a UNIX timestamp from the given date """
1129
1130 if date_str is None:
1131 return None
1132
52c3a6e4
S
1133 date_str = re.sub(r'\.[0-9]+', '', date_str)
1134
08b38d54 1135 if timezone is None:
46f59e89
S
1136 timezone, date_str = extract_timezone(date_str)
1137
52c3a6e4
S
1138 try:
1139 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
1140 dt = datetime.datetime.strptime(date_str, date_format) - timezone
1141 return calendar.timegm(dt.timetuple())
1142 except ValueError:
1143 pass
912b38b4
PH
1144
1145
46f59e89
S
1146def date_formats(day_first=True):
1147 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1148
1149
42bdd9d0 1150def unified_strdate(date_str, day_first=True):
bf50b038 1151 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
1152
1153 if date_str is None:
1154 return None
bf50b038 1155 upload_date = None
5f6a1245 1156 # Replace commas
026fcc04 1157 date_str = date_str.replace(',', ' ')
42bdd9d0 1158 # Remove AM/PM + timezone
9bb8e0a3 1159 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 1160 _, date_str = extract_timezone(date_str)
42bdd9d0 1161
46f59e89 1162 for expression in date_formats(day_first):
bf50b038
JMF
1163 try:
1164 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 1165 except ValueError:
bf50b038 1166 pass
42393ce2
PH
1167 if upload_date is None:
1168 timetuple = email.utils.parsedate_tz(date_str)
1169 if timetuple:
c6b9cf05
S
1170 try:
1171 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1172 except ValueError:
1173 pass
6a750402
JMF
1174 if upload_date is not None:
1175 return compat_str(upload_date)
bf50b038 1176
5f6a1245 1177
46f59e89
S
1178def unified_timestamp(date_str, day_first=True):
1179 if date_str is None:
1180 return None
1181
1182 date_str = date_str.replace(',', ' ')
1183
7dc2a74e 1184 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
1185 timezone, date_str = extract_timezone(date_str)
1186
1187 # Remove AM/PM + timezone
1188 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1189
1190 for expression in date_formats(day_first):
1191 try:
7dc2a74e 1192 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
1193 return calendar.timegm(dt.timetuple())
1194 except ValueError:
1195 pass
1196 timetuple = email.utils.parsedate_tz(date_str)
1197 if timetuple:
7dc2a74e 1198 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
1199
1200
28e614de 1201def determine_ext(url, default_ext='unknown_video'):
f4776371
S
1202 if url is None:
1203 return default_ext
9cb9a5df 1204 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
1205 if re.match(r'^[A-Za-z0-9]+$', guess):
1206 return guess
a7aaa398
S
1207 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1208 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 1209 return guess.rstrip('/')
73e79f2a 1210 else:
cbdbb766 1211 return default_ext
73e79f2a 1212
5f6a1245 1213
d4051a8e 1214def subtitles_filename(filename, sub_lang, sub_format):
28e614de 1215 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 1216
5f6a1245 1217
bd558525 1218def date_from_str(date_str):
37254abc
JMF
1219 """
1220 Return a datetime object from a string in the format YYYYMMDD or
1221 (now|today)[+-][0-9](day|week|month|year)(s)?"""
1222 today = datetime.date.today()
f8795e10 1223 if date_str in ('now', 'today'):
37254abc 1224 return today
f8795e10
PH
1225 if date_str == 'yesterday':
1226 return today - datetime.timedelta(days=1)
ec85ded8 1227 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
1228 if match is not None:
1229 sign = match.group('sign')
1230 time = int(match.group('time'))
1231 if sign == '-':
1232 time = -time
1233 unit = match.group('unit')
dfb1b146 1234 # A bad approximation?
37254abc
JMF
1235 if unit == 'month':
1236 unit = 'day'
1237 time *= 30
1238 elif unit == 'year':
1239 unit = 'day'
1240 time *= 365
1241 unit += 's'
1242 delta = datetime.timedelta(**{unit: time})
1243 return today + delta
611c1dd9 1244 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
1245
1246
e63fc1be 1247def hyphenate_date(date_str):
1248 """
1249 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1250 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1251 if match is not None:
1252 return '-'.join(match.groups())
1253 else:
1254 return date_str
1255
5f6a1245 1256
bd558525
JMF
1257class DateRange(object):
1258 """Represents a time interval between two dates"""
5f6a1245 1259
bd558525
JMF
1260 def __init__(self, start=None, end=None):
1261 """start and end must be strings in the format accepted by date"""
1262 if start is not None:
1263 self.start = date_from_str(start)
1264 else:
1265 self.start = datetime.datetime.min.date()
1266 if end is not None:
1267 self.end = date_from_str(end)
1268 else:
1269 self.end = datetime.datetime.max.date()
37254abc 1270 if self.start > self.end:
bd558525 1271 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 1272
bd558525
JMF
1273 @classmethod
1274 def day(cls, day):
1275 """Returns a range that only contains the given day"""
5f6a1245
JW
1276 return cls(day, day)
1277
bd558525
JMF
1278 def __contains__(self, date):
1279 """Check if the date is in the range"""
37254abc
JMF
1280 if not isinstance(date, datetime.date):
1281 date = date_from_str(date)
1282 return self.start <= date <= self.end
5f6a1245 1283
bd558525 1284 def __str__(self):
5f6a1245 1285 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
1286
1287
1288def platform_name():
1289 """ Returns the platform name as a compat_str """
1290 res = platform.platform()
1291 if isinstance(res, bytes):
1292 res = res.decode(preferredencoding())
1293
1294 assert isinstance(res, compat_str)
1295 return res
c257baff
PH
1296
1297
b58ddb32
PH
1298def _windows_write_string(s, out):
1299 """ Returns True if the string was written using special methods,
1300 False if it has yet to be written out."""
1301 # Adapted from http://stackoverflow.com/a/3259271/35070
1302
1303 import ctypes
1304 import ctypes.wintypes
1305
1306 WIN_OUTPUT_IDS = {
1307 1: -11,
1308 2: -12,
1309 }
1310
a383a98a
PH
1311 try:
1312 fileno = out.fileno()
1313 except AttributeError:
1314 # If the output stream doesn't have a fileno, it's virtual
1315 return False
aa42e873
PH
1316 except io.UnsupportedOperation:
1317 # Some strange Windows pseudo files?
1318 return False
b58ddb32
PH
1319 if fileno not in WIN_OUTPUT_IDS:
1320 return False
1321
e2f89ec7 1322 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1323 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
611c1dd9 1324 (b'GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
1325 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1326
e2f89ec7 1327 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1328 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1329 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
611c1dd9 1330 ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
1331 written = ctypes.wintypes.DWORD(0)
1332
611c1dd9 1333 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
1334 FILE_TYPE_CHAR = 0x0002
1335 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1336 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1337 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1338 ctypes.POINTER(ctypes.wintypes.DWORD))(
611c1dd9 1339 (b'GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
1340 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1341
1342 def not_a_console(handle):
1343 if handle == INVALID_HANDLE_VALUE or handle is None:
1344 return True
8fb3ac36
PH
1345 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1346 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1347
1348 if not_a_console(h):
1349 return False
1350
d1b9c912
PH
1351 def next_nonbmp_pos(s):
1352 try:
1353 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1354 except StopIteration:
1355 return len(s)
1356
1357 while s:
1358 count = min(next_nonbmp_pos(s), 1024)
1359
b58ddb32 1360 ret = WriteConsoleW(
d1b9c912 1361 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1362 if ret == 0:
1363 raise OSError('Failed to write string')
d1b9c912
PH
1364 if not count: # We just wrote a non-BMP character
1365 assert written.value == 2
1366 s = s[1:]
1367 else:
1368 assert written.value > 0
1369 s = s[written.value:]
b58ddb32
PH
1370 return True
1371
1372
734f90bb 1373def write_string(s, out=None, encoding=None):
7459e3a2
PH
1374 if out is None:
1375 out = sys.stderr
8bf48f23 1376 assert type(s) == compat_str
7459e3a2 1377
b58ddb32
PH
1378 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1379 if _windows_write_string(s, out):
1380 return
1381
7459e3a2
PH
1382 if ('b' in getattr(out, 'mode', '') or
1383 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1384 byt = s.encode(encoding or preferredencoding(), 'ignore')
1385 out.write(byt)
1386 elif hasattr(out, 'buffer'):
1387 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1388 byt = s.encode(enc, 'ignore')
1389 out.buffer.write(byt)
1390 else:
8bf48f23 1391 out.write(s)
7459e3a2
PH
1392 out.flush()
1393
1394
48ea9cea
PH
1395def bytes_to_intlist(bs):
1396 if not bs:
1397 return []
1398 if isinstance(bs[0], int): # Python 3
1399 return list(bs)
1400 else:
1401 return [ord(c) for c in bs]
1402
c257baff 1403
cba892fa 1404def intlist_to_bytes(xs):
1405 if not xs:
1406 return b''
edaa23f8 1407 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1408
1409
c1c9a79c
PH
1410# Cross-platform file locking
1411if sys.platform == 'win32':
1412 import ctypes.wintypes
1413 import msvcrt
1414
1415 class OVERLAPPED(ctypes.Structure):
1416 _fields_ = [
1417 ('Internal', ctypes.wintypes.LPVOID),
1418 ('InternalHigh', ctypes.wintypes.LPVOID),
1419 ('Offset', ctypes.wintypes.DWORD),
1420 ('OffsetHigh', ctypes.wintypes.DWORD),
1421 ('hEvent', ctypes.wintypes.HANDLE),
1422 ]
1423
1424 kernel32 = ctypes.windll.kernel32
1425 LockFileEx = kernel32.LockFileEx
1426 LockFileEx.argtypes = [
1427 ctypes.wintypes.HANDLE, # hFile
1428 ctypes.wintypes.DWORD, # dwFlags
1429 ctypes.wintypes.DWORD, # dwReserved
1430 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1431 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1432 ctypes.POINTER(OVERLAPPED) # Overlapped
1433 ]
1434 LockFileEx.restype = ctypes.wintypes.BOOL
1435 UnlockFileEx = kernel32.UnlockFileEx
1436 UnlockFileEx.argtypes = [
1437 ctypes.wintypes.HANDLE, # hFile
1438 ctypes.wintypes.DWORD, # dwReserved
1439 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1440 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1441 ctypes.POINTER(OVERLAPPED) # Overlapped
1442 ]
1443 UnlockFileEx.restype = ctypes.wintypes.BOOL
1444 whole_low = 0xffffffff
1445 whole_high = 0x7fffffff
1446
1447 def _lock_file(f, exclusive):
1448 overlapped = OVERLAPPED()
1449 overlapped.Offset = 0
1450 overlapped.OffsetHigh = 0
1451 overlapped.hEvent = 0
1452 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1453 handle = msvcrt.get_osfhandle(f.fileno())
1454 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1455 whole_low, whole_high, f._lock_file_overlapped_p):
1456 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1457
1458 def _unlock_file(f):
1459 assert f._lock_file_overlapped_p
1460 handle = msvcrt.get_osfhandle(f.fileno())
1461 if not UnlockFileEx(handle, 0,
1462 whole_low, whole_high, f._lock_file_overlapped_p):
1463 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1464
1465else:
399a76e6
YCH
1466 # Some platforms, such as Jython, is missing fcntl
1467 try:
1468 import fcntl
c1c9a79c 1469
399a76e6
YCH
1470 def _lock_file(f, exclusive):
1471 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 1472
399a76e6
YCH
1473 def _unlock_file(f):
1474 fcntl.flock(f, fcntl.LOCK_UN)
1475 except ImportError:
1476 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
1477
1478 def _lock_file(f, exclusive):
1479 raise IOError(UNSUPPORTED_MSG)
1480
1481 def _unlock_file(f):
1482 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
1483
1484
1485class locked_file(object):
1486 def __init__(self, filename, mode, encoding=None):
1487 assert mode in ['r', 'a', 'w']
1488 self.f = io.open(filename, mode, encoding=encoding)
1489 self.mode = mode
1490
1491 def __enter__(self):
1492 exclusive = self.mode != 'r'
1493 try:
1494 _lock_file(self.f, exclusive)
1495 except IOError:
1496 self.f.close()
1497 raise
1498 return self
1499
1500 def __exit__(self, etype, value, traceback):
1501 try:
1502 _unlock_file(self.f)
1503 finally:
1504 self.f.close()
1505
1506 def __iter__(self):
1507 return iter(self.f)
1508
1509 def write(self, *args):
1510 return self.f.write(*args)
1511
1512 def read(self, *args):
1513 return self.f.read(*args)
4eb7f1d1
JMF
1514
1515
4644ac55
S
1516def get_filesystem_encoding():
1517 encoding = sys.getfilesystemencoding()
1518 return encoding if encoding is not None else 'utf-8'
1519
1520
4eb7f1d1 1521def shell_quote(args):
a6a173c2 1522 quoted_args = []
4644ac55 1523 encoding = get_filesystem_encoding()
a6a173c2
JMF
1524 for a in args:
1525 if isinstance(a, bytes):
1526 # We may get a filename encoded with 'encodeFilename'
1527 a = a.decode(encoding)
1528 quoted_args.append(pipes.quote(a))
28e614de 1529 return ' '.join(quoted_args)
9d4660ca
PH
1530
1531
1532def smuggle_url(url, data):
1533 """ Pass additional data in a URL for internal use. """
1534
81953d1a
RA
1535 url, idata = unsmuggle_url(url, {})
1536 data.update(idata)
15707c7e 1537 sdata = compat_urllib_parse_urlencode(
28e614de
PH
1538 {'__youtubedl_smuggle': json.dumps(data)})
1539 return url + '#' + sdata
9d4660ca
PH
1540
1541
79f82953 1542def unsmuggle_url(smug_url, default=None):
83e865a3 1543 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1544 return smug_url, default
28e614de
PH
1545 url, _, sdata = smug_url.rpartition('#')
1546 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1547 data = json.loads(jsond)
1548 return url, data
02dbf93f
PH
1549
1550
02dbf93f
PH
1551def format_bytes(bytes):
1552 if bytes is None:
28e614de 1553 return 'N/A'
02dbf93f
PH
1554 if type(bytes) is str:
1555 bytes = float(bytes)
1556 if bytes == 0.0:
1557 exponent = 0
1558 else:
1559 exponent = int(math.log(bytes, 1024.0))
28e614de 1560 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1561 converted = float(bytes) / float(1024 ** exponent)
28e614de 1562 return '%.2f%s' % (converted, suffix)
f53c966a 1563
1c088fa8 1564
fb47597b
S
1565def lookup_unit_table(unit_table, s):
1566 units_re = '|'.join(re.escape(u) for u in unit_table)
1567 m = re.match(
782b1b5b 1568 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
1569 if not m:
1570 return None
1571 num_str = m.group('num').replace(',', '.')
1572 mult = unit_table[m.group('unit')]
1573 return int(float(num_str) * mult)
1574
1575
be64b5b0
PH
1576def parse_filesize(s):
1577 if s is None:
1578 return None
1579
dfb1b146 1580 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
1581 # but we support those too
1582 _UNIT_TABLE = {
1583 'B': 1,
1584 'b': 1,
70852b47 1585 'bytes': 1,
be64b5b0
PH
1586 'KiB': 1024,
1587 'KB': 1000,
1588 'kB': 1024,
1589 'Kb': 1000,
13585d76 1590 'kb': 1000,
70852b47
YCH
1591 'kilobytes': 1000,
1592 'kibibytes': 1024,
be64b5b0
PH
1593 'MiB': 1024 ** 2,
1594 'MB': 1000 ** 2,
1595 'mB': 1024 ** 2,
1596 'Mb': 1000 ** 2,
13585d76 1597 'mb': 1000 ** 2,
70852b47
YCH
1598 'megabytes': 1000 ** 2,
1599 'mebibytes': 1024 ** 2,
be64b5b0
PH
1600 'GiB': 1024 ** 3,
1601 'GB': 1000 ** 3,
1602 'gB': 1024 ** 3,
1603 'Gb': 1000 ** 3,
13585d76 1604 'gb': 1000 ** 3,
70852b47
YCH
1605 'gigabytes': 1000 ** 3,
1606 'gibibytes': 1024 ** 3,
be64b5b0
PH
1607 'TiB': 1024 ** 4,
1608 'TB': 1000 ** 4,
1609 'tB': 1024 ** 4,
1610 'Tb': 1000 ** 4,
13585d76 1611 'tb': 1000 ** 4,
70852b47
YCH
1612 'terabytes': 1000 ** 4,
1613 'tebibytes': 1024 ** 4,
be64b5b0
PH
1614 'PiB': 1024 ** 5,
1615 'PB': 1000 ** 5,
1616 'pB': 1024 ** 5,
1617 'Pb': 1000 ** 5,
13585d76 1618 'pb': 1000 ** 5,
70852b47
YCH
1619 'petabytes': 1000 ** 5,
1620 'pebibytes': 1024 ** 5,
be64b5b0
PH
1621 'EiB': 1024 ** 6,
1622 'EB': 1000 ** 6,
1623 'eB': 1024 ** 6,
1624 'Eb': 1000 ** 6,
13585d76 1625 'eb': 1000 ** 6,
70852b47
YCH
1626 'exabytes': 1000 ** 6,
1627 'exbibytes': 1024 ** 6,
be64b5b0
PH
1628 'ZiB': 1024 ** 7,
1629 'ZB': 1000 ** 7,
1630 'zB': 1024 ** 7,
1631 'Zb': 1000 ** 7,
13585d76 1632 'zb': 1000 ** 7,
70852b47
YCH
1633 'zettabytes': 1000 ** 7,
1634 'zebibytes': 1024 ** 7,
be64b5b0
PH
1635 'YiB': 1024 ** 8,
1636 'YB': 1000 ** 8,
1637 'yB': 1024 ** 8,
1638 'Yb': 1000 ** 8,
13585d76 1639 'yb': 1000 ** 8,
70852b47
YCH
1640 'yottabytes': 1000 ** 8,
1641 'yobibytes': 1024 ** 8,
be64b5b0
PH
1642 }
1643
fb47597b
S
1644 return lookup_unit_table(_UNIT_TABLE, s)
1645
1646
1647def parse_count(s):
1648 if s is None:
be64b5b0
PH
1649 return None
1650
fb47597b
S
1651 s = s.strip()
1652
1653 if re.match(r'^[\d,.]+$', s):
1654 return str_to_int(s)
1655
1656 _UNIT_TABLE = {
1657 'k': 1000,
1658 'K': 1000,
1659 'm': 1000 ** 2,
1660 'M': 1000 ** 2,
1661 'kk': 1000 ** 2,
1662 'KK': 1000 ** 2,
1663 }
be64b5b0 1664
fb47597b 1665 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 1666
2f7ae819 1667
a942d6cb 1668def month_by_name(name, lang='en'):
caefb1de
PH
1669 """ Return the number of a month by (locale-independently) English name """
1670
f6717dec 1671 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 1672
caefb1de 1673 try:
f6717dec 1674 return month_names.index(name) + 1
7105440c
YCH
1675 except ValueError:
1676 return None
1677
1678
1679def month_by_abbreviation(abbrev):
1680 """ Return the number of a month by (locale-independently) English
1681 abbreviations """
1682
1683 try:
1684 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1685 except ValueError:
1686 return None
18258362
JMF
1687
1688
5aafe895 1689def fix_xml_ampersands(xml_str):
18258362 1690 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1691 return re.sub(
1692 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1693 '&amp;',
5aafe895 1694 xml_str)
e3946f98
PH
1695
1696
1697def setproctitle(title):
8bf48f23 1698 assert isinstance(title, compat_str)
c1c05c67
YCH
1699
1700 # ctypes in Jython is not complete
1701 # http://bugs.jython.org/issue2148
1702 if sys.platform.startswith('java'):
1703 return
1704
e3946f98 1705 try:
611c1dd9 1706 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
1707 except OSError:
1708 return
2f49bcd6
RC
1709 except TypeError:
1710 # LoadLibrary in Windows Python 2.7.13 only expects
1711 # a bytestring, but since unicode_literals turns
1712 # every string into a unicode string, it fails.
1713 return
6eefe533
PH
1714 title_bytes = title.encode('utf-8')
1715 buf = ctypes.create_string_buffer(len(title_bytes))
1716 buf.value = title_bytes
e3946f98 1717 try:
6eefe533 1718 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1719 except AttributeError:
1720 return # Strange libc, just skip this
d7dda168
PH
1721
1722
1723def remove_start(s, start):
46bc9b7d 1724 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
1725
1726
2b9faf55 1727def remove_end(s, end):
46bc9b7d 1728 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
1729
1730
31b2051e
S
1731def remove_quotes(s):
1732 if s is None or len(s) < 2:
1733 return s
1734 for quote in ('"', "'", ):
1735 if s[0] == quote and s[-1] == quote:
1736 return s[1:-1]
1737 return s
1738
1739
29eb5174 1740def url_basename(url):
9b8aaeed 1741 path = compat_urlparse.urlparse(url).path
28e614de 1742 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1743
1744
02dc0a36
S
1745def base_url(url):
1746 return re.match(r'https?://[^?#&]+/', url).group()
1747
1748
e34c3361
S
1749def urljoin(base, path):
1750 if not isinstance(path, compat_str) or not path:
1751 return None
b0c65c67 1752 if re.match(r'^(?:https?:)?//', path):
e34c3361 1753 return path
b0c65c67 1754 if not isinstance(base, compat_str) or not re.match(r'^(?:https?:)?//', base):
e34c3361
S
1755 return None
1756 return compat_urlparse.urljoin(base, path)
1757
1758
aa94a6d3
PH
1759class HEADRequest(compat_urllib_request.Request):
1760 def get_method(self):
611c1dd9 1761 return 'HEAD'
7217e148
PH
1762
1763
95cf60e8
S
1764class PUTRequest(compat_urllib_request.Request):
1765 def get_method(self):
1766 return 'PUT'
1767
1768
9732d77e 1769def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1770 if get_attr:
1771 if v is not None:
1772 v = getattr(v, get_attr, None)
9572013d
PH
1773 if v == '':
1774 v = None
1812afb7
S
1775 if v is None:
1776 return default
1777 try:
1778 return int(v) * invscale // scale
1779 except ValueError:
af98f8ff 1780 return default
9732d77e 1781
9572013d 1782
40a90862
JMF
1783def str_or_none(v, default=None):
1784 return default if v is None else compat_str(v)
1785
9732d77e
PH
1786
1787def str_to_int(int_str):
48d4681e 1788 """ A more relaxed version of int_or_none """
9732d77e
PH
1789 if int_str is None:
1790 return None
28e614de 1791 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1792 return int(int_str)
608d11f5
PH
1793
1794
9732d77e 1795def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1796 if v is None:
1797 return default
1798 try:
1799 return float(v) * invscale / scale
1800 except ValueError:
1801 return default
43f775e4
PH
1802
1803
b72b4431
S
1804def strip_or_none(v):
1805 return None if v is None else v.strip()
1806
1807
608d11f5 1808def parse_duration(s):
8f9312c3 1809 if not isinstance(s, compat_basestring):
608d11f5
PH
1810 return None
1811
ca7b3246
S
1812 s = s.strip()
1813
acaff495 1814 days, hours, mins, secs, ms = [None] * 5
15846398 1815 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 1816 if m:
1817 days, hours, mins, secs, ms = m.groups()
1818 else:
1819 m = re.match(
1820 r'''(?ix)(?:P?T)?
8f4b58d7 1821 (?:
acaff495 1822 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 1823 )?
acaff495 1824 (?:
1825 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
1826 )?
1827 (?:
1828 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
1829 )?
1830 (?:
1831 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 1832 )?Z?$''', s)
acaff495 1833 if m:
1834 days, hours, mins, secs, ms = m.groups()
1835 else:
15846398 1836 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 1837 if m:
1838 hours, mins = m.groups()
1839 else:
1840 return None
1841
1842 duration = 0
1843 if secs:
1844 duration += float(secs)
1845 if mins:
1846 duration += float(mins) * 60
1847 if hours:
1848 duration += float(hours) * 60 * 60
1849 if days:
1850 duration += float(days) * 24 * 60 * 60
1851 if ms:
1852 duration += float(ms)
1853 return duration
91d7d0b3
JMF
1854
1855
e65e4c88 1856def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1857 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1858 return (
1859 '{0}.{1}{2}'.format(name, ext, real_ext)
1860 if not expected_real_ext or real_ext[1:] == expected_real_ext
1861 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1862
1863
b3ed15b7
S
1864def replace_extension(filename, ext, expected_real_ext=None):
1865 name, real_ext = os.path.splitext(filename)
1866 return '{0}.{1}'.format(
1867 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1868 ext)
1869
1870
d70ad093
PH
1871def check_executable(exe, args=[]):
1872 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1873 args can be a list of arguments for a short output (like -version) """
1874 try:
1875 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1876 except OSError:
1877 return False
1878 return exe
b7ab0590
PH
1879
1880
95807118 1881def get_exe_version(exe, args=['--version'],
cae97f65 1882 version_re=None, unrecognized='present'):
95807118
PH
1883 """ Returns the version of the specified executable,
1884 or False if the executable is not present """
1885 try:
b64d04c1
YCH
1886 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
1887 # SIGTTOU if youtube-dl is run in the background.
1888 # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
cae97f65 1889 out, _ = subprocess.Popen(
54116803 1890 [encodeArgument(exe)] + args,
00ca7552 1891 stdin=subprocess.PIPE,
95807118
PH
1892 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1893 except OSError:
1894 return False
cae97f65
PH
1895 if isinstance(out, bytes): # Python 2.x
1896 out = out.decode('ascii', 'ignore')
1897 return detect_exe_version(out, version_re, unrecognized)
1898
1899
1900def detect_exe_version(output, version_re=None, unrecognized='present'):
1901 assert isinstance(output, compat_str)
1902 if version_re is None:
1903 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1904 m = re.search(version_re, output)
95807118
PH
1905 if m:
1906 return m.group(1)
1907 else:
1908 return unrecognized
1909
1910
b7ab0590 1911class PagedList(object):
dd26ced1
PH
1912 def __len__(self):
1913 # This is only useful for tests
1914 return len(self.getslice())
1915
9c44d242
PH
1916
1917class OnDemandPagedList(PagedList):
b95dc034 1918 def __init__(self, pagefunc, pagesize, use_cache=False):
9c44d242
PH
1919 self._pagefunc = pagefunc
1920 self._pagesize = pagesize
b95dc034
YCH
1921 self._use_cache = use_cache
1922 if use_cache:
1923 self._cache = {}
9c44d242 1924
b7ab0590
PH
1925 def getslice(self, start=0, end=None):
1926 res = []
1927 for pagenum in itertools.count(start // self._pagesize):
1928 firstid = pagenum * self._pagesize
1929 nextfirstid = pagenum * self._pagesize + self._pagesize
1930 if start >= nextfirstid:
1931 continue
1932
b95dc034
YCH
1933 page_results = None
1934 if self._use_cache:
1935 page_results = self._cache.get(pagenum)
1936 if page_results is None:
1937 page_results = list(self._pagefunc(pagenum))
1938 if self._use_cache:
1939 self._cache[pagenum] = page_results
b7ab0590
PH
1940
1941 startv = (
1942 start % self._pagesize
1943 if firstid <= start < nextfirstid
1944 else 0)
1945
1946 endv = (
1947 ((end - 1) % self._pagesize) + 1
1948 if (end is not None and firstid <= end <= nextfirstid)
1949 else None)
1950
1951 if startv != 0 or endv is not None:
1952 page_results = page_results[startv:endv]
1953 res.extend(page_results)
1954
1955 # A little optimization - if current page is not "full", ie. does
1956 # not contain page_size videos then we can assume that this page
1957 # is the last one - there are no more ids on further pages -
1958 # i.e. no need to query again.
1959 if len(page_results) + startv < self._pagesize:
1960 break
1961
1962 # If we got the whole page, but the next page is not interesting,
1963 # break out early as well
1964 if end == nextfirstid:
1965 break
1966 return res
81c2f20b
PH
1967
1968
9c44d242
PH
1969class InAdvancePagedList(PagedList):
1970 def __init__(self, pagefunc, pagecount, pagesize):
1971 self._pagefunc = pagefunc
1972 self._pagecount = pagecount
1973 self._pagesize = pagesize
1974
1975 def getslice(self, start=0, end=None):
1976 res = []
1977 start_page = start // self._pagesize
1978 end_page = (
1979 self._pagecount if end is None else (end // self._pagesize + 1))
1980 skip_elems = start - start_page * self._pagesize
1981 only_more = None if end is None else end - start
1982 for pagenum in range(start_page, end_page):
1983 page = list(self._pagefunc(pagenum))
1984 if skip_elems:
1985 page = page[skip_elems:]
1986 skip_elems = None
1987 if only_more is not None:
1988 if len(page) < only_more:
1989 only_more -= len(page)
1990 else:
1991 page = page[:only_more]
1992 res.extend(page)
1993 break
1994 res.extend(page)
1995 return res
1996
1997
81c2f20b 1998def uppercase_escape(s):
676eb3f2 1999 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 2000 return re.sub(
a612753d 2001 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
2002 lambda m: unicode_escape(m.group(0))[0],
2003 s)
0fe2ff78
YCH
2004
2005
2006def lowercase_escape(s):
2007 unicode_escape = codecs.getdecoder('unicode_escape')
2008 return re.sub(
2009 r'\\u[0-9a-fA-F]{4}',
2010 lambda m: unicode_escape(m.group(0))[0],
2011 s)
b53466e1 2012
d05cfe06
S
2013
2014def escape_rfc3986(s):
2015 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 2016 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 2017 s = s.encode('utf-8')
ecc0c5ee 2018 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
2019
2020
2021def escape_url(url):
2022 """Escape URL as suggested by RFC 3986"""
2023 url_parsed = compat_urllib_parse_urlparse(url)
2024 return url_parsed._replace(
efbed08d 2025 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
2026 path=escape_rfc3986(url_parsed.path),
2027 params=escape_rfc3986(url_parsed.params),
2028 query=escape_rfc3986(url_parsed.query),
2029 fragment=escape_rfc3986(url_parsed.fragment)
2030 ).geturl()
2031
62e609ab
PH
2032
2033def read_batch_urls(batch_fd):
2034 def fixup(url):
2035 if not isinstance(url, compat_str):
2036 url = url.decode('utf-8', 'replace')
28e614de 2037 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
2038 if url.startswith(BOM_UTF8):
2039 url = url[len(BOM_UTF8):]
2040 url = url.strip()
2041 if url.startswith(('#', ';', ']')):
2042 return False
2043 return url
2044
2045 with contextlib.closing(batch_fd) as fd:
2046 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
2047
2048
2049def urlencode_postdata(*args, **kargs):
15707c7e 2050 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
2051
2052
38f9ef31 2053def update_url_query(url, query):
cacd9966
YCH
2054 if not query:
2055 return url
38f9ef31 2056 parsed_url = compat_urlparse.urlparse(url)
2057 qs = compat_parse_qs(parsed_url.query)
2058 qs.update(query)
2059 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 2060 query=compat_urllib_parse_urlencode(qs, True)))
16392824 2061
8e60dc75 2062
ed0291d1
S
2063def update_Request(req, url=None, data=None, headers={}, query={}):
2064 req_headers = req.headers.copy()
2065 req_headers.update(headers)
2066 req_data = data or req.data
2067 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
2068 req_get_method = req.get_method()
2069 if req_get_method == 'HEAD':
2070 req_type = HEADRequest
2071 elif req_get_method == 'PUT':
2072 req_type = PUTRequest
2073 else:
2074 req_type = compat_urllib_request.Request
ed0291d1
S
2075 new_req = req_type(
2076 req_url, data=req_data, headers=req_headers,
2077 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2078 if hasattr(req, 'timeout'):
2079 new_req.timeout = req.timeout
2080 return new_req
2081
2082
86296ad2 2083def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
2084 if isinstance(key_or_keys, (list, tuple)):
2085 for key in key_or_keys:
86296ad2
S
2086 if key not in d or d[key] is None or skip_false_values and not d[key]:
2087 continue
2088 return d[key]
cbecc9b9
S
2089 return default
2090 return d.get(key_or_keys, default)
2091
2092
329ca3be
S
2093def try_get(src, getter, expected_type=None):
2094 try:
2095 v = getter(src)
2096 except (AttributeError, KeyError, TypeError, IndexError):
2097 pass
2098 else:
2099 if expected_type is None or isinstance(v, expected_type):
2100 return v
2101
2102
8e60dc75
S
2103def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2104 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2105
16392824 2106
a1a530b0
PH
2107US_RATINGS = {
2108 'G': 0,
2109 'PG': 10,
2110 'PG-13': 13,
2111 'R': 16,
2112 'NC': 18,
2113}
fac55558
PH
2114
2115
a8795327
S
2116TV_PARENTAL_GUIDELINES = {
2117 'TV-Y': 0,
2118 'TV-Y7': 7,
2119 'TV-G': 0,
2120 'TV-PG': 0,
2121 'TV-14': 14,
2122 'TV-MA': 17,
2123}
2124
2125
146c80e2 2126def parse_age_limit(s):
a8795327
S
2127 if type(s) == int:
2128 return s if 0 <= s <= 21 else None
2129 if not isinstance(s, compat_basestring):
d838b1bd 2130 return None
146c80e2 2131 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
2132 if m:
2133 return int(m.group('age'))
2134 if s in US_RATINGS:
2135 return US_RATINGS[s]
2136 return TV_PARENTAL_GUIDELINES.get(s)
146c80e2
S
2137
2138
fac55558 2139def strip_jsonp(code):
609a61e3 2140 return re.sub(
5950cb1d 2141 r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
2142
2143
e05f6939 2144def js_to_json(code):
4195096e
S
2145 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
2146 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
2147 INTEGER_TABLE = (
2148 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
2149 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
2150 )
2151
e05f6939 2152 def fix_kv(m):
e7b6d122
PH
2153 v = m.group(0)
2154 if v in ('true', 'false', 'null'):
2155 return v
b3ee552e 2156 elif v.startswith('/*') or v.startswith('//') or v == ',':
bd1e4844 2157 return ""
2158
2159 if v[0] in ("'", '"'):
2160 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 2161 '"': '\\"',
bd1e4844 2162 "\\'": "'",
2163 '\\\n': '',
2164 '\\x': '\\u00',
2165 }.get(m.group(0), m.group(0)), v[1:-1])
2166
89ac4a19
S
2167 for regex, base in INTEGER_TABLE:
2168 im = re.match(regex, v)
2169 if im:
e4659b45 2170 i = int(im.group(1), base)
89ac4a19
S
2171 return '"%d":' % i if v.endswith(':') else '%d' % i
2172
e7b6d122 2173 return '"%s"' % v
e05f6939 2174
bd1e4844 2175 return re.sub(r'''(?sx)
2176 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
2177 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 2178 {comment}|,(?={skip}[\]}}])|
bd1e4844 2179 [a-zA-Z_][.a-zA-Z_0-9]*|
4195096e
S
2180 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
2181 [0-9]+(?={skip}:)
2182 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
2183
2184
478c2c61
PH
2185def qualities(quality_ids):
2186 """ Get a numeric quality value out of a list of possible values """
2187 def q(qid):
2188 try:
2189 return quality_ids.index(qid)
2190 except ValueError:
2191 return -1
2192 return q
2193
acd69589
PH
2194
2195DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 2196
a020a0dc
PH
2197
2198def limit_length(s, length):
2199 """ Add ellipses to overly long strings """
2200 if s is None:
2201 return None
2202 ELLIPSES = '...'
2203 if len(s) > length:
2204 return s[:length - len(ELLIPSES)] + ELLIPSES
2205 return s
48844745
PH
2206
2207
2208def version_tuple(v):
5f9b8394 2209 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
2210
2211
2212def is_outdated_version(version, limit, assume_new=True):
2213 if not version:
2214 return not assume_new
2215 try:
2216 return version_tuple(version) < version_tuple(limit)
2217 except ValueError:
2218 return not assume_new
732ea2f0
PH
2219
2220
2221def ytdl_is_updateable():
2222 """ Returns if youtube-dl can be updated with -U """
2223 from zipimport import zipimporter
2224
2225 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
2226
2227
2228def args_to_str(args):
2229 # Get a short string representation for a subprocess command
702ccf2d 2230 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
2231
2232
9b9c5355 2233def error_to_compat_str(err):
fdae2358
S
2234 err_str = str(err)
2235 # On python 2 error byte string must be decoded with proper
2236 # encoding rather than ascii
2237 if sys.version_info[0] < 3:
2238 err_str = err_str.decode(preferredencoding())
2239 return err_str
2240
2241
c460bdd5 2242def mimetype2ext(mt):
eb9ee194
S
2243 if mt is None:
2244 return None
2245
765ac263
JMF
2246 ext = {
2247 'audio/mp4': 'm4a',
6c33d24b
YCH
2248 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
2249 # it's the most popular one
2250 'audio/mpeg': 'mp3',
765ac263
JMF
2251 }.get(mt)
2252 if ext is not None:
2253 return ext
2254
c460bdd5 2255 _, _, res = mt.rpartition('/')
6562d34a 2256 res = res.split(';')[0].strip().lower()
c460bdd5
PH
2257
2258 return {
f6861ec9 2259 '3gpp': '3gp',
cafcf657 2260 'smptett+xml': 'tt',
2261 'srt': 'srt',
2262 'ttaf+xml': 'dfxp',
a0d8d704 2263 'ttml+xml': 'ttml',
cafcf657 2264 'vtt': 'vtt',
f6861ec9 2265 'x-flv': 'flv',
a0d8d704
YCH
2266 'x-mp4-fragmented': 'mp4',
2267 'x-ms-wmv': 'wmv',
b4173f15
RA
2268 'mpegurl': 'm3u8',
2269 'x-mpegurl': 'm3u8',
2270 'vnd.apple.mpegurl': 'm3u8',
2271 'dash+xml': 'mpd',
2272 'f4m': 'f4m',
2273 'f4m+xml': 'f4m',
f164b971 2274 'hds+xml': 'f4m',
e910fe2f 2275 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 2276 'quicktime': 'mov',
c460bdd5
PH
2277 }.get(res, res)
2278
2279
4f3c5e06 2280def parse_codecs(codecs_str):
2281 # http://tools.ietf.org/html/rfc6381
2282 if not codecs_str:
2283 return {}
2284 splited_codecs = list(filter(None, map(
2285 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
2286 vcodec, acodec = None, None
2287 for full_codec in splited_codecs:
2288 codec = full_codec.split('.')[0]
2289 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
2290 if not vcodec:
2291 vcodec = full_codec
073ac122 2292 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3'):
4f3c5e06 2293 if not acodec:
2294 acodec = full_codec
2295 else:
2296 write_string('WARNING: Unknown codec %s' % full_codec, sys.stderr)
2297 if not vcodec and not acodec:
2298 if len(splited_codecs) == 2:
2299 return {
2300 'vcodec': vcodec,
2301 'acodec': acodec,
2302 }
2303 elif len(splited_codecs) == 1:
2304 return {
2305 'vcodec': 'none',
2306 'acodec': vcodec,
2307 }
2308 else:
2309 return {
2310 'vcodec': vcodec or 'none',
2311 'acodec': acodec or 'none',
2312 }
2313 return {}
2314
2315
2ccd1b10 2316def urlhandle_detect_ext(url_handle):
79298173 2317 getheader = url_handle.headers.get
2ccd1b10 2318
b55ee18f
PH
2319 cd = getheader('Content-Disposition')
2320 if cd:
2321 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
2322 if m:
2323 e = determine_ext(m.group('filename'), default_ext=None)
2324 if e:
2325 return e
2326
c460bdd5 2327 return mimetype2ext(getheader('Content-Type'))
05900629
PH
2328
2329
1e399778
YCH
2330def encode_data_uri(data, mime_type):
2331 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
2332
2333
05900629 2334def age_restricted(content_limit, age_limit):
6ec6cb4e 2335 """ Returns True iff the content should be blocked """
05900629
PH
2336
2337 if age_limit is None: # No limit set
2338 return False
2339 if content_limit is None:
2340 return False # Content available for everyone
2341 return age_limit < content_limit
61ca9a80
PH
2342
2343
2344def is_html(first_bytes):
2345 """ Detect whether a file contains HTML by examining its first bytes. """
2346
2347 BOMS = [
2348 (b'\xef\xbb\xbf', 'utf-8'),
2349 (b'\x00\x00\xfe\xff', 'utf-32-be'),
2350 (b'\xff\xfe\x00\x00', 'utf-32-le'),
2351 (b'\xff\xfe', 'utf-16-le'),
2352 (b'\xfe\xff', 'utf-16-be'),
2353 ]
2354 for bom, enc in BOMS:
2355 if first_bytes.startswith(bom):
2356 s = first_bytes[len(bom):].decode(enc, 'replace')
2357 break
2358 else:
2359 s = first_bytes.decode('utf-8', 'replace')
2360
2361 return re.match(r'^\s*<', s)
a055469f
PH
2362
2363
2364def determine_protocol(info_dict):
2365 protocol = info_dict.get('protocol')
2366 if protocol is not None:
2367 return protocol
2368
2369 url = info_dict['url']
2370 if url.startswith('rtmp'):
2371 return 'rtmp'
2372 elif url.startswith('mms'):
2373 return 'mms'
2374 elif url.startswith('rtsp'):
2375 return 'rtsp'
2376
2377 ext = determine_ext(url)
2378 if ext == 'm3u8':
2379 return 'm3u8'
2380 elif ext == 'f4m':
2381 return 'f4m'
2382
2383 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
2384
2385
2386def render_table(header_row, data):
2387 """ Render a list of rows, each as a list of values """
2388 table = [header_row] + data
2389 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
2390 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
2391 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
2392
2393
2394def _match_one(filter_part, dct):
2395 COMPARISON_OPERATORS = {
2396 '<': operator.lt,
2397 '<=': operator.le,
2398 '>': operator.gt,
2399 '>=': operator.ge,
2400 '=': operator.eq,
2401 '!=': operator.ne,
2402 }
2403 operator_rex = re.compile(r'''(?x)\s*
2404 (?P<key>[a-z_]+)
2405 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2406 (?:
2407 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 2408 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
2409 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
2410 )
2411 \s*$
2412 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
2413 m = operator_rex.search(filter_part)
2414 if m:
2415 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 2416 actual_value = dct.get(m.group('key'))
db13c16e
S
2417 if (m.group('quotedstrval') is not None or
2418 m.group('strval') is not None or
e5a088dc
S
2419 # If the original field is a string and matching comparisonvalue is
2420 # a number we should respect the origin of the original field
2421 # and process comparison value as a string (see
2422 # https://github.com/rg3/youtube-dl/issues/11082).
2423 actual_value is not None and m.group('intval') is not None and
2424 isinstance(actual_value, compat_str)):
347de493
PH
2425 if m.group('op') not in ('=', '!='):
2426 raise ValueError(
2427 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
2428 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
2429 quote = m.group('quote')
2430 if quote is not None:
2431 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
2432 else:
2433 try:
2434 comparison_value = int(m.group('intval'))
2435 except ValueError:
2436 comparison_value = parse_filesize(m.group('intval'))
2437 if comparison_value is None:
2438 comparison_value = parse_filesize(m.group('intval') + 'B')
2439 if comparison_value is None:
2440 raise ValueError(
2441 'Invalid integer value %r in filter part %r' % (
2442 m.group('intval'), filter_part))
347de493
PH
2443 if actual_value is None:
2444 return m.group('none_inclusive')
2445 return op(actual_value, comparison_value)
2446
2447 UNARY_OPERATORS = {
2448 '': lambda v: v is not None,
2449 '!': lambda v: v is None,
2450 }
2451 operator_rex = re.compile(r'''(?x)\s*
2452 (?P<op>%s)\s*(?P<key>[a-z_]+)
2453 \s*$
2454 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
2455 m = operator_rex.search(filter_part)
2456 if m:
2457 op = UNARY_OPERATORS[m.group('op')]
2458 actual_value = dct.get(m.group('key'))
2459 return op(actual_value)
2460
2461 raise ValueError('Invalid filter part %r' % filter_part)
2462
2463
2464def match_str(filter_str, dct):
2465 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
2466
2467 return all(
2468 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
2469
2470
2471def match_filter_func(filter_str):
2472 def _match_func(info_dict):
2473 if match_str(filter_str, info_dict):
2474 return None
2475 else:
2476 video_title = info_dict.get('title', info_dict.get('id', 'video'))
2477 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
2478 return _match_func
91410c9b
PH
2479
2480
bf6427d2
YCH
2481def parse_dfxp_time_expr(time_expr):
2482 if not time_expr:
d631d5f9 2483 return
bf6427d2
YCH
2484
2485 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
2486 if mobj:
2487 return float(mobj.group('time_offset'))
2488
db2fe38b 2489 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 2490 if mobj:
db2fe38b 2491 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
2492
2493
c1c924ab
YCH
2494def srt_subtitles_timecode(seconds):
2495 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
2496
2497
2498def dfxp2srt(dfxp_data):
4e335771
YCH
2499 _x = functools.partial(xpath_with_ns, ns_map={
2500 'ttml': 'http://www.w3.org/ns/ttml',
2501 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
5bf28d78 2502 'ttaf1_0604': 'http://www.w3.org/2006/04/ttaf1',
4e335771 2503 })
bf6427d2 2504
87de7069 2505 class TTMLPElementParser(object):
2b14cb56 2506 out = ''
bf6427d2 2507
2b14cb56 2508 def start(self, tag, attrib):
2509 if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
2510 self.out += '\n'
bf6427d2 2511
2b14cb56 2512 def end(self, tag):
2513 pass
bf6427d2 2514
2b14cb56 2515 def data(self, data):
2516 self.out += data
2517
2518 def close(self):
2519 return self.out.strip()
2520
2521 def parse_node(node):
2522 target = TTMLPElementParser()
2523 parser = xml.etree.ElementTree.XMLParser(target=target)
2524 parser.feed(xml.etree.ElementTree.tostring(node))
2525 return parser.close()
bf6427d2 2526
36e6f62c 2527 dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
bf6427d2 2528 out = []
5bf28d78 2529 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall(_x('.//ttaf1_0604:p')) or dfxp.findall('.//p')
1b0427e6
YCH
2530
2531 if not paras:
2532 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
2533
2534 for para, index in zip(paras, itertools.count(1)):
d631d5f9 2535 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 2536 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
2537 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
2538 if begin_time is None:
2539 continue
7dff0363 2540 if not end_time:
d631d5f9
YCH
2541 if not dur:
2542 continue
2543 end_time = begin_time + dur
bf6427d2
YCH
2544 out.append('%d\n%s --> %s\n%s\n\n' % (
2545 index,
c1c924ab
YCH
2546 srt_subtitles_timecode(begin_time),
2547 srt_subtitles_timecode(end_time),
bf6427d2
YCH
2548 parse_node(para)))
2549
2550 return ''.join(out)
2551
2552
66e289ba
S
2553def cli_option(params, command_option, param):
2554 param = params.get(param)
98e698f1
RA
2555 if param:
2556 param = compat_str(param)
66e289ba
S
2557 return [command_option, param] if param is not None else []
2558
2559
2560def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2561 param = params.get(param)
2562 assert isinstance(param, bool)
2563 if separator:
2564 return [command_option + separator + (true_value if param else false_value)]
2565 return [command_option, true_value if param else false_value]
2566
2567
2568def cli_valueless_option(params, command_option, param, expected_value=True):
2569 param = params.get(param)
2570 return [command_option] if param == expected_value else []
2571
2572
2573def cli_configuration_args(params, param, default=[]):
2574 ex_args = params.get(param)
2575 if ex_args is None:
2576 return default
2577 assert isinstance(ex_args, list)
2578 return ex_args
2579
2580
39672624
YCH
2581class ISO639Utils(object):
2582 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2583 _lang_map = {
2584 'aa': 'aar',
2585 'ab': 'abk',
2586 'ae': 'ave',
2587 'af': 'afr',
2588 'ak': 'aka',
2589 'am': 'amh',
2590 'an': 'arg',
2591 'ar': 'ara',
2592 'as': 'asm',
2593 'av': 'ava',
2594 'ay': 'aym',
2595 'az': 'aze',
2596 'ba': 'bak',
2597 'be': 'bel',
2598 'bg': 'bul',
2599 'bh': 'bih',
2600 'bi': 'bis',
2601 'bm': 'bam',
2602 'bn': 'ben',
2603 'bo': 'bod',
2604 'br': 'bre',
2605 'bs': 'bos',
2606 'ca': 'cat',
2607 'ce': 'che',
2608 'ch': 'cha',
2609 'co': 'cos',
2610 'cr': 'cre',
2611 'cs': 'ces',
2612 'cu': 'chu',
2613 'cv': 'chv',
2614 'cy': 'cym',
2615 'da': 'dan',
2616 'de': 'deu',
2617 'dv': 'div',
2618 'dz': 'dzo',
2619 'ee': 'ewe',
2620 'el': 'ell',
2621 'en': 'eng',
2622 'eo': 'epo',
2623 'es': 'spa',
2624 'et': 'est',
2625 'eu': 'eus',
2626 'fa': 'fas',
2627 'ff': 'ful',
2628 'fi': 'fin',
2629 'fj': 'fij',
2630 'fo': 'fao',
2631 'fr': 'fra',
2632 'fy': 'fry',
2633 'ga': 'gle',
2634 'gd': 'gla',
2635 'gl': 'glg',
2636 'gn': 'grn',
2637 'gu': 'guj',
2638 'gv': 'glv',
2639 'ha': 'hau',
2640 'he': 'heb',
2641 'hi': 'hin',
2642 'ho': 'hmo',
2643 'hr': 'hrv',
2644 'ht': 'hat',
2645 'hu': 'hun',
2646 'hy': 'hye',
2647 'hz': 'her',
2648 'ia': 'ina',
2649 'id': 'ind',
2650 'ie': 'ile',
2651 'ig': 'ibo',
2652 'ii': 'iii',
2653 'ik': 'ipk',
2654 'io': 'ido',
2655 'is': 'isl',
2656 'it': 'ita',
2657 'iu': 'iku',
2658 'ja': 'jpn',
2659 'jv': 'jav',
2660 'ka': 'kat',
2661 'kg': 'kon',
2662 'ki': 'kik',
2663 'kj': 'kua',
2664 'kk': 'kaz',
2665 'kl': 'kal',
2666 'km': 'khm',
2667 'kn': 'kan',
2668 'ko': 'kor',
2669 'kr': 'kau',
2670 'ks': 'kas',
2671 'ku': 'kur',
2672 'kv': 'kom',
2673 'kw': 'cor',
2674 'ky': 'kir',
2675 'la': 'lat',
2676 'lb': 'ltz',
2677 'lg': 'lug',
2678 'li': 'lim',
2679 'ln': 'lin',
2680 'lo': 'lao',
2681 'lt': 'lit',
2682 'lu': 'lub',
2683 'lv': 'lav',
2684 'mg': 'mlg',
2685 'mh': 'mah',
2686 'mi': 'mri',
2687 'mk': 'mkd',
2688 'ml': 'mal',
2689 'mn': 'mon',
2690 'mr': 'mar',
2691 'ms': 'msa',
2692 'mt': 'mlt',
2693 'my': 'mya',
2694 'na': 'nau',
2695 'nb': 'nob',
2696 'nd': 'nde',
2697 'ne': 'nep',
2698 'ng': 'ndo',
2699 'nl': 'nld',
2700 'nn': 'nno',
2701 'no': 'nor',
2702 'nr': 'nbl',
2703 'nv': 'nav',
2704 'ny': 'nya',
2705 'oc': 'oci',
2706 'oj': 'oji',
2707 'om': 'orm',
2708 'or': 'ori',
2709 'os': 'oss',
2710 'pa': 'pan',
2711 'pi': 'pli',
2712 'pl': 'pol',
2713 'ps': 'pus',
2714 'pt': 'por',
2715 'qu': 'que',
2716 'rm': 'roh',
2717 'rn': 'run',
2718 'ro': 'ron',
2719 'ru': 'rus',
2720 'rw': 'kin',
2721 'sa': 'san',
2722 'sc': 'srd',
2723 'sd': 'snd',
2724 'se': 'sme',
2725 'sg': 'sag',
2726 'si': 'sin',
2727 'sk': 'slk',
2728 'sl': 'slv',
2729 'sm': 'smo',
2730 'sn': 'sna',
2731 'so': 'som',
2732 'sq': 'sqi',
2733 'sr': 'srp',
2734 'ss': 'ssw',
2735 'st': 'sot',
2736 'su': 'sun',
2737 'sv': 'swe',
2738 'sw': 'swa',
2739 'ta': 'tam',
2740 'te': 'tel',
2741 'tg': 'tgk',
2742 'th': 'tha',
2743 'ti': 'tir',
2744 'tk': 'tuk',
2745 'tl': 'tgl',
2746 'tn': 'tsn',
2747 'to': 'ton',
2748 'tr': 'tur',
2749 'ts': 'tso',
2750 'tt': 'tat',
2751 'tw': 'twi',
2752 'ty': 'tah',
2753 'ug': 'uig',
2754 'uk': 'ukr',
2755 'ur': 'urd',
2756 'uz': 'uzb',
2757 've': 'ven',
2758 'vi': 'vie',
2759 'vo': 'vol',
2760 'wa': 'wln',
2761 'wo': 'wol',
2762 'xh': 'xho',
2763 'yi': 'yid',
2764 'yo': 'yor',
2765 'za': 'zha',
2766 'zh': 'zho',
2767 'zu': 'zul',
2768 }
2769
2770 @classmethod
2771 def short2long(cls, code):
2772 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2773 return cls._lang_map.get(code[:2])
2774
2775 @classmethod
2776 def long2short(cls, code):
2777 """Convert language code from ISO 639-2/T to ISO 639-1"""
2778 for short_name, long_name in cls._lang_map.items():
2779 if long_name == code:
2780 return short_name
2781
2782
4eb10f66
YCH
2783class ISO3166Utils(object):
2784 # From http://data.okfn.org/data/core/country-list
2785 _country_map = {
2786 'AF': 'Afghanistan',
2787 'AX': 'Åland Islands',
2788 'AL': 'Albania',
2789 'DZ': 'Algeria',
2790 'AS': 'American Samoa',
2791 'AD': 'Andorra',
2792 'AO': 'Angola',
2793 'AI': 'Anguilla',
2794 'AQ': 'Antarctica',
2795 'AG': 'Antigua and Barbuda',
2796 'AR': 'Argentina',
2797 'AM': 'Armenia',
2798 'AW': 'Aruba',
2799 'AU': 'Australia',
2800 'AT': 'Austria',
2801 'AZ': 'Azerbaijan',
2802 'BS': 'Bahamas',
2803 'BH': 'Bahrain',
2804 'BD': 'Bangladesh',
2805 'BB': 'Barbados',
2806 'BY': 'Belarus',
2807 'BE': 'Belgium',
2808 'BZ': 'Belize',
2809 'BJ': 'Benin',
2810 'BM': 'Bermuda',
2811 'BT': 'Bhutan',
2812 'BO': 'Bolivia, Plurinational State of',
2813 'BQ': 'Bonaire, Sint Eustatius and Saba',
2814 'BA': 'Bosnia and Herzegovina',
2815 'BW': 'Botswana',
2816 'BV': 'Bouvet Island',
2817 'BR': 'Brazil',
2818 'IO': 'British Indian Ocean Territory',
2819 'BN': 'Brunei Darussalam',
2820 'BG': 'Bulgaria',
2821 'BF': 'Burkina Faso',
2822 'BI': 'Burundi',
2823 'KH': 'Cambodia',
2824 'CM': 'Cameroon',
2825 'CA': 'Canada',
2826 'CV': 'Cape Verde',
2827 'KY': 'Cayman Islands',
2828 'CF': 'Central African Republic',
2829 'TD': 'Chad',
2830 'CL': 'Chile',
2831 'CN': 'China',
2832 'CX': 'Christmas Island',
2833 'CC': 'Cocos (Keeling) Islands',
2834 'CO': 'Colombia',
2835 'KM': 'Comoros',
2836 'CG': 'Congo',
2837 'CD': 'Congo, the Democratic Republic of the',
2838 'CK': 'Cook Islands',
2839 'CR': 'Costa Rica',
2840 'CI': 'Côte d\'Ivoire',
2841 'HR': 'Croatia',
2842 'CU': 'Cuba',
2843 'CW': 'Curaçao',
2844 'CY': 'Cyprus',
2845 'CZ': 'Czech Republic',
2846 'DK': 'Denmark',
2847 'DJ': 'Djibouti',
2848 'DM': 'Dominica',
2849 'DO': 'Dominican Republic',
2850 'EC': 'Ecuador',
2851 'EG': 'Egypt',
2852 'SV': 'El Salvador',
2853 'GQ': 'Equatorial Guinea',
2854 'ER': 'Eritrea',
2855 'EE': 'Estonia',
2856 'ET': 'Ethiopia',
2857 'FK': 'Falkland Islands (Malvinas)',
2858 'FO': 'Faroe Islands',
2859 'FJ': 'Fiji',
2860 'FI': 'Finland',
2861 'FR': 'France',
2862 'GF': 'French Guiana',
2863 'PF': 'French Polynesia',
2864 'TF': 'French Southern Territories',
2865 'GA': 'Gabon',
2866 'GM': 'Gambia',
2867 'GE': 'Georgia',
2868 'DE': 'Germany',
2869 'GH': 'Ghana',
2870 'GI': 'Gibraltar',
2871 'GR': 'Greece',
2872 'GL': 'Greenland',
2873 'GD': 'Grenada',
2874 'GP': 'Guadeloupe',
2875 'GU': 'Guam',
2876 'GT': 'Guatemala',
2877 'GG': 'Guernsey',
2878 'GN': 'Guinea',
2879 'GW': 'Guinea-Bissau',
2880 'GY': 'Guyana',
2881 'HT': 'Haiti',
2882 'HM': 'Heard Island and McDonald Islands',
2883 'VA': 'Holy See (Vatican City State)',
2884 'HN': 'Honduras',
2885 'HK': 'Hong Kong',
2886 'HU': 'Hungary',
2887 'IS': 'Iceland',
2888 'IN': 'India',
2889 'ID': 'Indonesia',
2890 'IR': 'Iran, Islamic Republic of',
2891 'IQ': 'Iraq',
2892 'IE': 'Ireland',
2893 'IM': 'Isle of Man',
2894 'IL': 'Israel',
2895 'IT': 'Italy',
2896 'JM': 'Jamaica',
2897 'JP': 'Japan',
2898 'JE': 'Jersey',
2899 'JO': 'Jordan',
2900 'KZ': 'Kazakhstan',
2901 'KE': 'Kenya',
2902 'KI': 'Kiribati',
2903 'KP': 'Korea, Democratic People\'s Republic of',
2904 'KR': 'Korea, Republic of',
2905 'KW': 'Kuwait',
2906 'KG': 'Kyrgyzstan',
2907 'LA': 'Lao People\'s Democratic Republic',
2908 'LV': 'Latvia',
2909 'LB': 'Lebanon',
2910 'LS': 'Lesotho',
2911 'LR': 'Liberia',
2912 'LY': 'Libya',
2913 'LI': 'Liechtenstein',
2914 'LT': 'Lithuania',
2915 'LU': 'Luxembourg',
2916 'MO': 'Macao',
2917 'MK': 'Macedonia, the Former Yugoslav Republic of',
2918 'MG': 'Madagascar',
2919 'MW': 'Malawi',
2920 'MY': 'Malaysia',
2921 'MV': 'Maldives',
2922 'ML': 'Mali',
2923 'MT': 'Malta',
2924 'MH': 'Marshall Islands',
2925 'MQ': 'Martinique',
2926 'MR': 'Mauritania',
2927 'MU': 'Mauritius',
2928 'YT': 'Mayotte',
2929 'MX': 'Mexico',
2930 'FM': 'Micronesia, Federated States of',
2931 'MD': 'Moldova, Republic of',
2932 'MC': 'Monaco',
2933 'MN': 'Mongolia',
2934 'ME': 'Montenegro',
2935 'MS': 'Montserrat',
2936 'MA': 'Morocco',
2937 'MZ': 'Mozambique',
2938 'MM': 'Myanmar',
2939 'NA': 'Namibia',
2940 'NR': 'Nauru',
2941 'NP': 'Nepal',
2942 'NL': 'Netherlands',
2943 'NC': 'New Caledonia',
2944 'NZ': 'New Zealand',
2945 'NI': 'Nicaragua',
2946 'NE': 'Niger',
2947 'NG': 'Nigeria',
2948 'NU': 'Niue',
2949 'NF': 'Norfolk Island',
2950 'MP': 'Northern Mariana Islands',
2951 'NO': 'Norway',
2952 'OM': 'Oman',
2953 'PK': 'Pakistan',
2954 'PW': 'Palau',
2955 'PS': 'Palestine, State of',
2956 'PA': 'Panama',
2957 'PG': 'Papua New Guinea',
2958 'PY': 'Paraguay',
2959 'PE': 'Peru',
2960 'PH': 'Philippines',
2961 'PN': 'Pitcairn',
2962 'PL': 'Poland',
2963 'PT': 'Portugal',
2964 'PR': 'Puerto Rico',
2965 'QA': 'Qatar',
2966 'RE': 'Réunion',
2967 'RO': 'Romania',
2968 'RU': 'Russian Federation',
2969 'RW': 'Rwanda',
2970 'BL': 'Saint Barthélemy',
2971 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2972 'KN': 'Saint Kitts and Nevis',
2973 'LC': 'Saint Lucia',
2974 'MF': 'Saint Martin (French part)',
2975 'PM': 'Saint Pierre and Miquelon',
2976 'VC': 'Saint Vincent and the Grenadines',
2977 'WS': 'Samoa',
2978 'SM': 'San Marino',
2979 'ST': 'Sao Tome and Principe',
2980 'SA': 'Saudi Arabia',
2981 'SN': 'Senegal',
2982 'RS': 'Serbia',
2983 'SC': 'Seychelles',
2984 'SL': 'Sierra Leone',
2985 'SG': 'Singapore',
2986 'SX': 'Sint Maarten (Dutch part)',
2987 'SK': 'Slovakia',
2988 'SI': 'Slovenia',
2989 'SB': 'Solomon Islands',
2990 'SO': 'Somalia',
2991 'ZA': 'South Africa',
2992 'GS': 'South Georgia and the South Sandwich Islands',
2993 'SS': 'South Sudan',
2994 'ES': 'Spain',
2995 'LK': 'Sri Lanka',
2996 'SD': 'Sudan',
2997 'SR': 'Suriname',
2998 'SJ': 'Svalbard and Jan Mayen',
2999 'SZ': 'Swaziland',
3000 'SE': 'Sweden',
3001 'CH': 'Switzerland',
3002 'SY': 'Syrian Arab Republic',
3003 'TW': 'Taiwan, Province of China',
3004 'TJ': 'Tajikistan',
3005 'TZ': 'Tanzania, United Republic of',
3006 'TH': 'Thailand',
3007 'TL': 'Timor-Leste',
3008 'TG': 'Togo',
3009 'TK': 'Tokelau',
3010 'TO': 'Tonga',
3011 'TT': 'Trinidad and Tobago',
3012 'TN': 'Tunisia',
3013 'TR': 'Turkey',
3014 'TM': 'Turkmenistan',
3015 'TC': 'Turks and Caicos Islands',
3016 'TV': 'Tuvalu',
3017 'UG': 'Uganda',
3018 'UA': 'Ukraine',
3019 'AE': 'United Arab Emirates',
3020 'GB': 'United Kingdom',
3021 'US': 'United States',
3022 'UM': 'United States Minor Outlying Islands',
3023 'UY': 'Uruguay',
3024 'UZ': 'Uzbekistan',
3025 'VU': 'Vanuatu',
3026 'VE': 'Venezuela, Bolivarian Republic of',
3027 'VN': 'Viet Nam',
3028 'VG': 'Virgin Islands, British',
3029 'VI': 'Virgin Islands, U.S.',
3030 'WF': 'Wallis and Futuna',
3031 'EH': 'Western Sahara',
3032 'YE': 'Yemen',
3033 'ZM': 'Zambia',
3034 'ZW': 'Zimbabwe',
3035 }
3036
3037 @classmethod
3038 def short2full(cls, code):
3039 """Convert an ISO 3166-2 country code to the corresponding full name"""
3040 return cls._country_map.get(code.upper())
3041
3042
773f291d
S
3043class GeoUtils(object):
3044 # Major IPv4 address blocks per country
3045 _country_ip_map = {
3046 'AD': '85.94.160.0/19',
3047 'AE': '94.200.0.0/13',
3048 'AF': '149.54.0.0/17',
3049 'AG': '209.59.64.0/18',
3050 'AI': '204.14.248.0/21',
3051 'AL': '46.99.0.0/16',
3052 'AM': '46.70.0.0/15',
3053 'AO': '105.168.0.0/13',
3054 'AP': '159.117.192.0/21',
3055 'AR': '181.0.0.0/12',
3056 'AS': '202.70.112.0/20',
3057 'AT': '84.112.0.0/13',
3058 'AU': '1.128.0.0/11',
3059 'AW': '181.41.0.0/18',
3060 'AZ': '5.191.0.0/16',
3061 'BA': '31.176.128.0/17',
3062 'BB': '65.48.128.0/17',
3063 'BD': '114.130.0.0/16',
3064 'BE': '57.0.0.0/8',
3065 'BF': '129.45.128.0/17',
3066 'BG': '95.42.0.0/15',
3067 'BH': '37.131.0.0/17',
3068 'BI': '154.117.192.0/18',
3069 'BJ': '137.255.0.0/16',
3070 'BL': '192.131.134.0/24',
3071 'BM': '196.12.64.0/18',
3072 'BN': '156.31.0.0/16',
3073 'BO': '161.56.0.0/16',
3074 'BQ': '161.0.80.0/20',
3075 'BR': '152.240.0.0/12',
3076 'BS': '24.51.64.0/18',
3077 'BT': '119.2.96.0/19',
3078 'BW': '168.167.0.0/16',
3079 'BY': '178.120.0.0/13',
3080 'BZ': '179.42.192.0/18',
3081 'CA': '99.224.0.0/11',
3082 'CD': '41.243.0.0/16',
3083 'CF': '196.32.200.0/21',
3084 'CG': '197.214.128.0/17',
3085 'CH': '85.0.0.0/13',
3086 'CI': '154.232.0.0/14',
3087 'CK': '202.65.32.0/19',
3088 'CL': '152.172.0.0/14',
3089 'CM': '165.210.0.0/15',
3090 'CN': '36.128.0.0/10',
3091 'CO': '181.240.0.0/12',
3092 'CR': '201.192.0.0/12',
3093 'CU': '152.206.0.0/15',
3094 'CV': '165.90.96.0/19',
3095 'CW': '190.88.128.0/17',
3096 'CY': '46.198.0.0/15',
3097 'CZ': '88.100.0.0/14',
3098 'DE': '53.0.0.0/8',
3099 'DJ': '197.241.0.0/17',
3100 'DK': '87.48.0.0/12',
3101 'DM': '192.243.48.0/20',
3102 'DO': '152.166.0.0/15',
3103 'DZ': '41.96.0.0/12',
3104 'EC': '186.68.0.0/15',
3105 'EE': '90.190.0.0/15',
3106 'EG': '156.160.0.0/11',
3107 'ER': '196.200.96.0/20',
3108 'ES': '88.0.0.0/11',
3109 'ET': '196.188.0.0/14',
3110 'EU': '2.16.0.0/13',
3111 'FI': '91.152.0.0/13',
3112 'FJ': '144.120.0.0/16',
3113 'FM': '119.252.112.0/20',
3114 'FO': '88.85.32.0/19',
3115 'FR': '90.0.0.0/9',
3116 'GA': '41.158.0.0/15',
3117 'GB': '25.0.0.0/8',
3118 'GD': '74.122.88.0/21',
3119 'GE': '31.146.0.0/16',
3120 'GF': '161.22.64.0/18',
3121 'GG': '62.68.160.0/19',
3122 'GH': '45.208.0.0/14',
3123 'GI': '85.115.128.0/19',
3124 'GL': '88.83.0.0/19',
3125 'GM': '160.182.0.0/15',
3126 'GN': '197.149.192.0/18',
3127 'GP': '104.250.0.0/19',
3128 'GQ': '105.235.224.0/20',
3129 'GR': '94.64.0.0/13',
3130 'GT': '168.234.0.0/16',
3131 'GU': '168.123.0.0/16',
3132 'GW': '197.214.80.0/20',
3133 'GY': '181.41.64.0/18',
3134 'HK': '113.252.0.0/14',
3135 'HN': '181.210.0.0/16',
3136 'HR': '93.136.0.0/13',
3137 'HT': '148.102.128.0/17',
3138 'HU': '84.0.0.0/14',
3139 'ID': '39.192.0.0/10',
3140 'IE': '87.32.0.0/12',
3141 'IL': '79.176.0.0/13',
3142 'IM': '5.62.80.0/20',
3143 'IN': '117.192.0.0/10',
3144 'IO': '203.83.48.0/21',
3145 'IQ': '37.236.0.0/14',
3146 'IR': '2.176.0.0/12',
3147 'IS': '82.221.0.0/16',
3148 'IT': '79.0.0.0/10',
3149 'JE': '87.244.64.0/18',
3150 'JM': '72.27.0.0/17',
3151 'JO': '176.29.0.0/16',
3152 'JP': '126.0.0.0/8',
3153 'KE': '105.48.0.0/12',
3154 'KG': '158.181.128.0/17',
3155 'KH': '36.37.128.0/17',
3156 'KI': '103.25.140.0/22',
3157 'KM': '197.255.224.0/20',
3158 'KN': '198.32.32.0/19',
3159 'KP': '175.45.176.0/22',
3160 'KR': '175.192.0.0/10',
3161 'KW': '37.36.0.0/14',
3162 'KY': '64.96.0.0/15',
3163 'KZ': '2.72.0.0/13',
3164 'LA': '115.84.64.0/18',
3165 'LB': '178.135.0.0/16',
3166 'LC': '192.147.231.0/24',
3167 'LI': '82.117.0.0/19',
3168 'LK': '112.134.0.0/15',
3169 'LR': '41.86.0.0/19',
3170 'LS': '129.232.0.0/17',
3171 'LT': '78.56.0.0/13',
3172 'LU': '188.42.0.0/16',
3173 'LV': '46.109.0.0/16',
3174 'LY': '41.252.0.0/14',
3175 'MA': '105.128.0.0/11',
3176 'MC': '88.209.64.0/18',
3177 'MD': '37.246.0.0/16',
3178 'ME': '178.175.0.0/17',
3179 'MF': '74.112.232.0/21',
3180 'MG': '154.126.0.0/17',
3181 'MH': '117.103.88.0/21',
3182 'MK': '77.28.0.0/15',
3183 'ML': '154.118.128.0/18',
3184 'MM': '37.111.0.0/17',
3185 'MN': '49.0.128.0/17',
3186 'MO': '60.246.0.0/16',
3187 'MP': '202.88.64.0/20',
3188 'MQ': '109.203.224.0/19',
3189 'MR': '41.188.64.0/18',
3190 'MS': '208.90.112.0/22',
3191 'MT': '46.11.0.0/16',
3192 'MU': '105.16.0.0/12',
3193 'MV': '27.114.128.0/18',
3194 'MW': '105.234.0.0/16',
3195 'MX': '187.192.0.0/11',
3196 'MY': '175.136.0.0/13',
3197 'MZ': '197.218.0.0/15',
3198 'NA': '41.182.0.0/16',
3199 'NC': '101.101.0.0/18',
3200 'NE': '197.214.0.0/18',
3201 'NF': '203.17.240.0/22',
3202 'NG': '105.112.0.0/12',
3203 'NI': '186.76.0.0/15',
3204 'NL': '145.96.0.0/11',
3205 'NO': '84.208.0.0/13',
3206 'NP': '36.252.0.0/15',
3207 'NR': '203.98.224.0/19',
3208 'NU': '49.156.48.0/22',
3209 'NZ': '49.224.0.0/14',
3210 'OM': '5.36.0.0/15',
3211 'PA': '186.72.0.0/15',
3212 'PE': '186.160.0.0/14',
3213 'PF': '123.50.64.0/18',
3214 'PG': '124.240.192.0/19',
3215 'PH': '49.144.0.0/13',
3216 'PK': '39.32.0.0/11',
3217 'PL': '83.0.0.0/11',
3218 'PM': '70.36.0.0/20',
3219 'PR': '66.50.0.0/16',
3220 'PS': '188.161.0.0/16',
3221 'PT': '85.240.0.0/13',
3222 'PW': '202.124.224.0/20',
3223 'PY': '181.120.0.0/14',
3224 'QA': '37.210.0.0/15',
3225 'RE': '139.26.0.0/16',
3226 'RO': '79.112.0.0/13',
3227 'RS': '178.220.0.0/14',
3228 'RU': '5.136.0.0/13',
3229 'RW': '105.178.0.0/15',
3230 'SA': '188.48.0.0/13',
3231 'SB': '202.1.160.0/19',
3232 'SC': '154.192.0.0/11',
3233 'SD': '154.96.0.0/13',
3234 'SE': '78.64.0.0/12',
3235 'SG': '152.56.0.0/14',
3236 'SI': '188.196.0.0/14',
3237 'SK': '78.98.0.0/15',
3238 'SL': '197.215.0.0/17',
3239 'SM': '89.186.32.0/19',
3240 'SN': '41.82.0.0/15',
3241 'SO': '197.220.64.0/19',
3242 'SR': '186.179.128.0/17',
3243 'SS': '105.235.208.0/21',
3244 'ST': '197.159.160.0/19',
3245 'SV': '168.243.0.0/16',
3246 'SX': '190.102.0.0/20',
3247 'SY': '5.0.0.0/16',
3248 'SZ': '41.84.224.0/19',
3249 'TC': '65.255.48.0/20',
3250 'TD': '154.68.128.0/19',
3251 'TG': '196.168.0.0/14',
3252 'TH': '171.96.0.0/13',
3253 'TJ': '85.9.128.0/18',
3254 'TK': '27.96.24.0/21',
3255 'TL': '180.189.160.0/20',
3256 'TM': '95.85.96.0/19',
3257 'TN': '197.0.0.0/11',
3258 'TO': '175.176.144.0/21',
3259 'TR': '78.160.0.0/11',
3260 'TT': '186.44.0.0/15',
3261 'TV': '202.2.96.0/19',
3262 'TW': '120.96.0.0/11',
3263 'TZ': '156.156.0.0/14',
3264 'UA': '93.72.0.0/13',
3265 'UG': '154.224.0.0/13',
3266 'US': '3.0.0.0/8',
3267 'UY': '167.56.0.0/13',
3268 'UZ': '82.215.64.0/18',
3269 'VA': '212.77.0.0/19',
3270 'VC': '24.92.144.0/20',
3271 'VE': '186.88.0.0/13',
3272 'VG': '172.103.64.0/18',
3273 'VI': '146.226.0.0/16',
3274 'VN': '14.160.0.0/11',
3275 'VU': '202.80.32.0/20',
3276 'WF': '117.20.32.0/21',
3277 'WS': '202.4.32.0/19',
3278 'YE': '134.35.0.0/16',
3279 'YT': '41.242.116.0/22',
3280 'ZA': '41.0.0.0/11',
3281 'ZM': '165.56.0.0/13',
3282 'ZW': '41.85.192.0/19',
3283 }
3284
3285 @classmethod
3286 def random_ipv4(cls, code):
3287 block = cls._country_ip_map.get(code.upper())
3288 if not block:
3289 return None
3290 addr, preflen = block.split('/')
3291 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
3292 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa
S
3293 return compat_str(socket.inet_ntoa(
3294 compat_struct_pack('!I', random.randint(addr_min, addr_max))))
773f291d
S
3295
3296
91410c9b 3297class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
3298 def __init__(self, proxies=None):
3299 # Set default handlers
3300 for type in ('http', 'https'):
3301 setattr(self, '%s_open' % type,
3302 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
3303 meth(r, proxy, type))
3304 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
3305
91410c9b 3306 def proxy_open(self, req, proxy, type):
2461f79d 3307 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
3308 if req_proxy is not None:
3309 proxy = req_proxy
2461f79d
PH
3310 del req.headers['Ytdl-request-proxy']
3311
3312 if proxy == '__noproxy__':
3313 return None # No Proxy
51fb4995 3314 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188
YCH
3315 req.add_header('Ytdl-socks-proxy', proxy)
3316 # youtube-dl's http/https handlers do wrapping the socket with socks
3317 return None
91410c9b
PH
3318 return compat_urllib_request.ProxyHandler.proxy_open(
3319 self, req, proxy, type)
5bc880b9
YCH
3320
3321
3322def ohdave_rsa_encrypt(data, exponent, modulus):
3323 '''
3324 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
3325
3326 Input:
3327 data: data to encrypt, bytes-like object
3328 exponent, modulus: parameter e and N of RSA algorithm, both integer
3329 Output: hex string of encrypted data
3330
3331 Limitation: supports one block encryption only
3332 '''
3333
3334 payload = int(binascii.hexlify(data[::-1]), 16)
3335 encrypted = pow(payload, exponent, modulus)
3336 return '%x' % encrypted
81bdc8fd
YCH
3337
3338
5eb6bdce 3339def encode_base_n(num, n, table=None):
59f898b7 3340 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
3341 if not table:
3342 table = FULL_TABLE[:n]
3343
5eb6bdce
YCH
3344 if n > len(table):
3345 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
3346
3347 if num == 0:
3348 return table[0]
3349
81bdc8fd
YCH
3350 ret = ''
3351 while num:
3352 ret = table[num % n] + ret
3353 num = num // n
3354 return ret
f52354a8
YCH
3355
3356
3357def decode_packed_codes(code):
06b3fe29 3358 mobj = re.search(PACKED_CODES_RE, code)
f52354a8
YCH
3359 obfucasted_code, base, count, symbols = mobj.groups()
3360 base = int(base)
3361 count = int(count)
3362 symbols = symbols.split('|')
3363 symbol_table = {}
3364
3365 while count:
3366 count -= 1
5eb6bdce 3367 base_n_count = encode_base_n(count, base)
f52354a8
YCH
3368 symbol_table[base_n_count] = symbols[count] or base_n_count
3369
3370 return re.sub(
3371 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
3372 obfucasted_code)
e154c651 3373
3374
3375def parse_m3u8_attributes(attrib):
3376 info = {}
3377 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
3378 if val.startswith('"'):
3379 val = val[1:-1]
3380 info[key] = val
3381 return info
1143535d
YCH
3382
3383
3384def urshift(val, n):
3385 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
3386
3387
3388# Based on png2str() written by @gdkchan and improved by @yokrysty
3389# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
3390def decode_png(png_data):
3391 # Reference: https://www.w3.org/TR/PNG/
3392 header = png_data[8:]
3393
3394 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
3395 raise IOError('Not a valid PNG file.')
3396
3397 int_map = {1: '>B', 2: '>H', 4: '>I'}
3398 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
3399
3400 chunks = []
3401
3402 while header:
3403 length = unpack_integer(header[:4])
3404 header = header[4:]
3405
3406 chunk_type = header[:4]
3407 header = header[4:]
3408
3409 chunk_data = header[:length]
3410 header = header[length:]
3411
3412 header = header[4:] # Skip CRC
3413
3414 chunks.append({
3415 'type': chunk_type,
3416 'length': length,
3417 'data': chunk_data
3418 })
3419
3420 ihdr = chunks[0]['data']
3421
3422 width = unpack_integer(ihdr[:4])
3423 height = unpack_integer(ihdr[4:8])
3424
3425 idat = b''
3426
3427 for chunk in chunks:
3428 if chunk['type'] == b'IDAT':
3429 idat += chunk['data']
3430
3431 if not idat:
3432 raise IOError('Unable to read PNG data.')
3433
3434 decompressed_data = bytearray(zlib.decompress(idat))
3435
3436 stride = width * 3
3437 pixels = []
3438
3439 def _get_pixel(idx):
3440 x = idx % stride
3441 y = idx // stride
3442 return pixels[y][x]
3443
3444 for y in range(height):
3445 basePos = y * (1 + stride)
3446 filter_type = decompressed_data[basePos]
3447
3448 current_row = []
3449
3450 pixels.append(current_row)
3451
3452 for x in range(stride):
3453 color = decompressed_data[1 + basePos + x]
3454 basex = y * stride + x
3455 left = 0
3456 up = 0
3457
3458 if x > 2:
3459 left = _get_pixel(basex - 3)
3460 if y > 0:
3461 up = _get_pixel(basex - stride)
3462
3463 if filter_type == 1: # Sub
3464 color = (color + left) & 0xff
3465 elif filter_type == 2: # Up
3466 color = (color + up) & 0xff
3467 elif filter_type == 3: # Average
3468 color = (color + ((left + up) >> 1)) & 0xff
3469 elif filter_type == 4: # Paeth
3470 a = left
3471 b = up
3472 c = 0
3473
3474 if x > 2 and y > 0:
3475 c = _get_pixel(basex - stride - 3)
3476
3477 p = a + b - c
3478
3479 pa = abs(p - a)
3480 pb = abs(p - b)
3481 pc = abs(p - c)
3482
3483 if pa <= pb and pa <= pc:
3484 color = (color + a) & 0xff
3485 elif pb <= pc:
3486 color = (color + b) & 0xff
3487 else:
3488 color = (color + c) & 0xff
3489
3490 current_row.append(color)
3491
3492 return width, height, pixels
efa97bdc
YCH
3493
3494
3495def write_xattr(path, key, value):
3496 # This mess below finds the best xattr tool for the job
3497 try:
3498 # try the pyxattr module...
3499 import xattr
3500
53a7e3d2
YCH
3501 if hasattr(xattr, 'set'): # pyxattr
3502 # Unicode arguments are not supported in python-pyxattr until
3503 # version 0.5.0
3504 # See https://github.com/rg3/youtube-dl/issues/5498
3505 pyxattr_required_version = '0.5.0'
3506 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
3507 # TODO: fallback to CLI tools
3508 raise XAttrUnavailableError(
3509 'python-pyxattr is detected but is too old. '
3510 'youtube-dl requires %s or above while your version is %s. '
3511 'Falling back to other xattr implementations' % (
3512 pyxattr_required_version, xattr.__version__))
3513
3514 setxattr = xattr.set
3515 else: # xattr
3516 setxattr = xattr.setxattr
efa97bdc
YCH
3517
3518 try:
53a7e3d2 3519 setxattr(path, key, value)
efa97bdc
YCH
3520 except EnvironmentError as e:
3521 raise XAttrMetadataError(e.errno, e.strerror)
3522
3523 except ImportError:
3524 if compat_os_name == 'nt':
3525 # Write xattrs to NTFS Alternate Data Streams:
3526 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
3527 assert ':' not in key
3528 assert os.path.exists(path)
3529
3530 ads_fn = path + ':' + key
3531 try:
3532 with open(ads_fn, 'wb') as f:
3533 f.write(value)
3534 except EnvironmentError as e:
3535 raise XAttrMetadataError(e.errno, e.strerror)
3536 else:
3537 user_has_setfattr = check_executable('setfattr', ['--version'])
3538 user_has_xattr = check_executable('xattr', ['-h'])
3539
3540 if user_has_setfattr or user_has_xattr:
3541
3542 value = value.decode('utf-8')
3543 if user_has_setfattr:
3544 executable = 'setfattr'
3545 opts = ['-n', key, '-v', value]
3546 elif user_has_xattr:
3547 executable = 'xattr'
3548 opts = ['-w', key, value]
3549
3550 cmd = ([encodeFilename(executable, True)] +
3551 [encodeArgument(o) for o in opts] +
3552 [encodeFilename(path, True)])
3553
3554 try:
3555 p = subprocess.Popen(
3556 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
3557 except EnvironmentError as e:
3558 raise XAttrMetadataError(e.errno, e.strerror)
3559 stdout, stderr = p.communicate()
3560 stderr = stderr.decode('utf-8', 'replace')
3561 if p.returncode != 0:
3562 raise XAttrMetadataError(p.returncode, stderr)
3563
3564 else:
3565 # On Unix, and can't find pyxattr, setfattr, or xattr.
3566 if sys.platform.startswith('linux'):
3567 raise XAttrUnavailableError(
3568 "Couldn't find a tool to set the xattrs. "
3569 "Install either the python 'pyxattr' or 'xattr' "
3570 "modules, or the GNU 'attr' package "
3571 "(which contains the 'setfattr' tool).")
3572 else:
3573 raise XAttrUnavailableError(
3574 "Couldn't find a tool to set the xattrs. "
3575 "Install either the python 'xattr' module, "
3576 "or the 'xattr' binary.")