]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[YoutubeDL] Include rtmpdump in exe versions -v output
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
912b38b4 4import calendar
676eb3f2 5import codecs
62e609ab 6import contextlib
e3946f98 7import ctypes
c496ca96
PH
8import datetime
9import email.utils
f45c185f 10import errno
e68301af 11import getpass
d77c3dfd 12import gzip
b7ab0590 13import itertools
03f9daab 14import io
f4bfd65f 15import json
d77c3dfd 16import locale
02dbf93f 17import math
d77c3dfd 18import os
4eb7f1d1 19import pipes
c496ca96 20import platform
d77c3dfd 21import re
13ebea79 22import ssl
c496ca96 23import socket
b53466e1 24import struct
1c088fa8 25import subprocess
d77c3dfd 26import sys
181c8655 27import tempfile
01951dda 28import traceback
bcf89ce6 29import xml.etree.ElementTree
d77c3dfd 30import zlib
d77c3dfd 31
01ba00ca 32try:
59ae15a5 33 import urllib.request as compat_urllib_request
01ba00ca 34except ImportError: # Python 2
59ae15a5 35 import urllib2 as compat_urllib_request
01ba00ca
PH
36
37try:
59ae15a5 38 import urllib.error as compat_urllib_error
01ba00ca 39except ImportError: # Python 2
59ae15a5 40 import urllib2 as compat_urllib_error
01ba00ca
PH
41
42try:
59ae15a5 43 import urllib.parse as compat_urllib_parse
01ba00ca 44except ImportError: # Python 2
59ae15a5 45 import urllib as compat_urllib_parse
01ba00ca 46
799c0763
PH
47try:
48 from urllib.parse import urlparse as compat_urllib_parse_urlparse
49except ImportError: # Python 2
50 from urlparse import urlparse as compat_urllib_parse_urlparse
51
6543f0dc
JMF
52try:
53 import urllib.parse as compat_urlparse
54except ImportError: # Python 2
55 import urlparse as compat_urlparse
56
01ba00ca 57try:
59ae15a5 58 import http.cookiejar as compat_cookiejar
01ba00ca 59except ImportError: # Python 2
59ae15a5 60 import cookielib as compat_cookiejar
01ba00ca 61
3e669f36 62try:
59ae15a5 63 import html.entities as compat_html_entities
9f37a959 64except ImportError: # Python 2
59ae15a5 65 import htmlentitydefs as compat_html_entities
3e669f36 66
a8156c1d 67try:
59ae15a5 68 import html.parser as compat_html_parser
9f37a959 69except ImportError: # Python 2
59ae15a5 70 import HTMLParser as compat_html_parser
a8156c1d 71
348d0a7a 72try:
59ae15a5 73 import http.client as compat_http_client
9f37a959 74except ImportError: # Python 2
59ae15a5 75 import httplib as compat_http_client
348d0a7a 76
2eabb802 77try:
0e283428 78 from urllib.error import HTTPError as compat_HTTPError
2eabb802
PH
79except ImportError: # Python 2
80 from urllib2 import HTTPError as compat_HTTPError
81
e0df6211
PH
82try:
83 from urllib.request import urlretrieve as compat_urlretrieve
84except ImportError: # Python 2
85 from urllib import urlretrieve as compat_urlretrieve
86
87
5910e210
PH
88try:
89 from subprocess import DEVNULL
90 compat_subprocess_get_DEVNULL = lambda: DEVNULL
91except ImportError:
92 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
93
9f37a959 94try:
f1f725c6
PH
95 from urllib.parse import unquote as compat_urllib_parse_unquote
96except ImportError:
97 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
59ae15a5
PH
98 if string == '':
99 return string
100 res = string.split('%')
101 if len(res) == 1:
102 return string
103 if encoding is None:
104 encoding = 'utf-8'
105 if errors is None:
106 errors = 'replace'
107 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
108 pct_sequence = b''
109 string = res[0]
110 for item in res[1:]:
111 try:
112 if not item:
113 raise ValueError
114 pct_sequence += item[:2].decode('hex')
115 rest = item[2:]
116 if not rest:
117 # This segment was just a single percent-encoded character.
118 # May be part of a sequence of code units, so delay decoding.
119 # (Stored in pct_sequence).
120 continue
121 except ValueError:
122 rest = '%' + item
123 # Encountered non-percent-encoded characters. Flush the current
124 # pct_sequence.
125 string += pct_sequence.decode(encoding, errors) + rest
126 pct_sequence = b''
127 if pct_sequence:
128 # Flush the final pct_sequence
129 string += pct_sequence.decode(encoding, errors)
130 return string
131
f1f725c6
PH
132
133try:
134 from urllib.parse import parse_qs as compat_parse_qs
135except ImportError: # Python 2
136 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
137 # Python 2's version is apparently totally broken
138
59ae15a5
PH
139 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
140 encoding='utf-8', errors='replace'):
141 qs, _coerce_result = qs, unicode
142 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
143 r = []
144 for name_value in pairs:
145 if not name_value and not strict_parsing:
146 continue
147 nv = name_value.split('=', 1)
148 if len(nv) != 2:
149 if strict_parsing:
150 raise ValueError("bad query field: %r" % (name_value,))
151 # Handle case of a control-name with no equal sign
152 if keep_blank_values:
153 nv.append('')
154 else:
155 continue
156 if len(nv[1]) or keep_blank_values:
157 name = nv[0].replace('+', ' ')
f1f725c6
PH
158 name = compat_urllib_parse_unquote(
159 name, encoding=encoding, errors=errors)
59ae15a5
PH
160 name = _coerce_result(name)
161 value = nv[1].replace('+', ' ')
f1f725c6
PH
162 value = compat_urllib_parse_unquote(
163 value, encoding=encoding, errors=errors)
59ae15a5
PH
164 value = _coerce_result(value)
165 r.append((name, value))
166 return r
167
168 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
169 encoding='utf-8', errors='replace'):
170 parsed_result = {}
171 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
172 encoding=encoding, errors=errors)
173 for name, value in pairs:
174 if name in parsed_result:
175 parsed_result[name].append(value)
176 else:
177 parsed_result[name] = [value]
178 return parsed_result
348d0a7a 179
3e669f36 180try:
59ae15a5 181 compat_str = unicode # Python 2
3e669f36 182except NameError:
59ae15a5 183 compat_str = str
3e669f36
PH
184
185try:
59ae15a5 186 compat_chr = unichr # Python 2
3e669f36 187except NameError:
59ae15a5 188 compat_chr = chr
3e669f36 189
f7300c5c
JMF
190try:
191 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
192except ImportError: # Python 2.6
193 from xml.parsers.expat import ExpatError as compat_xml_parse_error
194
8d31fa3c
PH
195try:
196 from shlex import quote as shlex_quote
197except ImportError: # Python < 3.3
198 def shlex_quote(s):
199 return "'" + s.replace("'", "'\"'\"'") + "'"
200
201
b31756c1
FV
202def compat_ord(c):
203 if type(c) is int: return c
204 else: return ord(c)
205
4644ac55 206
4644ac55
S
207if sys.version_info >= (3, 0):
208 compat_getenv = os.getenv
209 compat_expanduser = os.path.expanduser
210else:
fc66e4a0
S
211 # Environment variables should be decoded with filesystem encoding.
212 # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
213
4644ac55
S
214 def compat_getenv(key, default=None):
215 env = os.getenv(key, default)
216 if env:
217 env = env.decode(get_filesystem_encoding())
218 return env
219
fc66e4a0
S
220 # HACK: The default implementations of os.path.expanduser from cpython do not decode
221 # environment variables with filesystem encoding. We will work around this by
222 # providing adjusted implementations.
223 # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
224 # for different platforms with correct environment variables decoding.
225
226 if os.name == 'posix':
227 def compat_expanduser(path):
228 """Expand ~ and ~user constructions. If user or $HOME is unknown,
229 do nothing."""
230 if not path.startswith('~'):
231 return path
232 i = path.find('/', 1)
233 if i < 0:
234 i = len(path)
235 if i == 1:
236 if 'HOME' not in os.environ:
237 import pwd
238 userhome = pwd.getpwuid(os.getuid()).pw_dir
239 else:
240 userhome = compat_getenv('HOME')
241 else:
242 import pwd
243 try:
244 pwent = pwd.getpwnam(path[1:i])
245 except KeyError:
246 return path
247 userhome = pwent.pw_dir
248 userhome = userhome.rstrip('/')
249 return (userhome + path[i:]) or '/'
250 elif os.name == 'nt' or os.name == 'ce':
251 def compat_expanduser(path):
252 """Expand ~ and ~user constructs.
253
254 If user or $HOME is unknown, do nothing."""
255 if path[:1] != '~':
256 return path
257 i, n = 1, len(path)
258 while i < n and path[i] not in '/\\':
259 i = i + 1
260
261 if 'HOME' in os.environ:
262 userhome = compat_getenv('HOME')
263 elif 'USERPROFILE' in os.environ:
264 userhome = compat_getenv('USERPROFILE')
265 elif not 'HOMEPATH' in os.environ:
266 return path
267 else:
268 try:
269 drive = compat_getenv('HOMEDRIVE')
270 except KeyError:
271 drive = ''
272 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
273
274 if i != 1: #~user
275 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
276
277 return userhome + path[i:]
278 else:
279 compat_expanduser = os.path.expanduser
4644ac55
S
280
281
468e2e92
FV
282# This is not clearly defined otherwise
283compiled_regex_type = type(re.compile(''))
284
3e669f36 285std_headers = {
ae8f7871 286 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
59ae15a5
PH
287 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
288 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
289 'Accept-Encoding': 'gzip, deflate',
290 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 291}
f427df17 292
d77c3dfd 293def preferredencoding():
59ae15a5 294 """Get preferred encoding.
d77c3dfd 295
59ae15a5
PH
296 Returns the best encoding scheme for the system, based on
297 locale.getpreferredencoding() and some further tweaks.
298 """
299 try:
300 pref = locale.getpreferredencoding()
301 u'TEST'.encode(pref)
302 except:
303 pref = 'UTF-8'
bae611f2 304
59ae15a5 305 return pref
d77c3dfd 306
8cd10ac4 307if sys.version_info < (3,0):
59ae15a5
PH
308 def compat_print(s):
309 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 310else:
59ae15a5
PH
311 def compat_print(s):
312 assert type(s) == type(u'')
313 print(s)
d77c3dfd 314
f4bfd65f 315
181c8655
PH
316def write_json_file(obj, fn):
317 """ Encode obj as JSON and write it to fn, atomically """
318
73159f99
S
319 args = {
320 'suffix': '.tmp',
321 'prefix': os.path.basename(fn) + '.',
322 'dir': os.path.dirname(fn),
323 'delete': False,
324 }
325
181c8655
PH
326 # In Python 2.x, json.dump expects a bytestream.
327 # In Python 3.x, it writes to a character stream
328 if sys.version_info < (3, 0):
73159f99 329 args['mode'] = 'wb'
181c8655 330 else:
73159f99
S
331 args.update({
332 'mode': 'w',
333 'encoding': 'utf-8',
334 })
335
336 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
337
338 try:
339 with tf:
340 json.dump(obj, tf)
341 os.rename(tf.name, fn)
342 except:
343 try:
344 os.remove(tf.name)
345 except OSError:
346 pass
347 raise
348
349
350if sys.version_info >= (2, 7):
59ae56fa
PH
351 def find_xpath_attr(node, xpath, key, val):
352 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
353 assert re.match(r'^[a-zA-Z-]+$', key)
354 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
59ae56fa
PH
355 expr = xpath + u"[@%s='%s']" % (key, val)
356 return node.find(expr)
357else:
358 def find_xpath_attr(node, xpath, key, val):
4eefbfdb
PH
359 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
360 # .//node does not match if a node is a direct child of . !
361 if isinstance(xpath, unicode):
362 xpath = xpath.encode('ascii')
363
59ae56fa
PH
364 for f in node.findall(xpath):
365 if f.attrib.get(key) == val:
366 return f
367 return None
368
d7e66d39
JMF
369# On python2.6 the xml.etree.ElementTree.Element methods don't support
370# the namespace parameter
371def xpath_with_ns(path, ns_map):
372 components = [c.split(':') for c in path.split('/')]
373 replaced = []
374 for c in components:
375 if len(c) == 1:
376 replaced.append(c[0])
377 else:
378 ns, tag = c
379 replaced.append('{%s}%s' % (ns_map[ns], tag))
380 return '/'.join(replaced)
381
d77c3dfd 382
bf0ff932 383def xpath_text(node, xpath, name=None, fatal=False):
d74bebd5
PH
384 if sys.version_info < (2, 7): # Crazy 2.6
385 xpath = xpath.encode('ascii')
386
bf0ff932
PH
387 n = node.find(xpath)
388 if n is None:
389 if fatal:
390 name = xpath if name is None else name
391 raise ExtractorError('Could not find XML element %s' % name)
392 else:
393 return None
394 return n.text
395
396
a8156c1d 397compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
a921f407
JMF
398class BaseHTMLParser(compat_html_parser.HTMLParser):
399 def __init(self):
400 compat_html_parser.HTMLParser.__init__(self)
401 self.html = None
402
403 def loads(self, html):
404 self.html = html
405 self.feed(html)
406 self.close()
407
408class AttrParser(BaseHTMLParser):
43e8fafd
ND
409 """Modified HTMLParser that isolates a tag with the specified attribute"""
410 def __init__(self, attribute, value):
411 self.attribute = attribute
412 self.value = value
59ae15a5
PH
413 self.result = None
414 self.started = False
415 self.depth = {}
59ae15a5
PH
416 self.watch_startpos = False
417 self.error_count = 0
a921f407 418 BaseHTMLParser.__init__(self)
59ae15a5
PH
419
420 def error(self, message):
421 if self.error_count > 10 or self.started:
422 raise compat_html_parser.HTMLParseError(message, self.getpos())
423 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
424 self.error_count += 1
425 self.goahead(1)
426
59ae15a5
PH
427 def handle_starttag(self, tag, attrs):
428 attrs = dict(attrs)
429 if self.started:
430 self.find_startpos(None)
43e8fafd 431 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
432 self.result = [tag]
433 self.started = True
434 self.watch_startpos = True
435 if self.started:
436 if not tag in self.depth: self.depth[tag] = 0
437 self.depth[tag] += 1
438
439 def handle_endtag(self, tag):
440 if self.started:
441 if tag in self.depth: self.depth[tag] -= 1
442 if self.depth[self.result[0]] == 0:
443 self.started = False
444 self.result.append(self.getpos())
445
446 def find_startpos(self, x):
447 """Needed to put the start position of the result (self.result[1])
448 after the opening tag with the requested id"""
449 if self.watch_startpos:
450 self.watch_startpos = False
451 self.result.append(self.getpos())
452 handle_entityref = handle_charref = handle_data = handle_comment = \
453 handle_decl = handle_pi = unknown_decl = find_startpos
454
455 def get_result(self):
456 if self.result is None:
457 return None
458 if len(self.result) != 3:
459 return None
460 lines = self.html.split('\n')
461 lines = lines[self.result[1][0]-1:self.result[2][0]]
462 lines[0] = lines[0][self.result[1][1]:]
463 if len(lines) == 1:
464 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
465 lines[-1] = lines[-1][:self.result[2][1]]
466 return '\n'.join(lines).strip()
3b024e17
PH
467# Hack for https://github.com/rg3/youtube-dl/issues/662
468if sys.version_info < (2, 7, 3):
469 AttrParser.parse_endtag = (lambda self, i:
470 i + len("</scr'+'ipt>")
471 if self.rawdata[i:].startswith("</scr'+'ipt>")
472 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
473
474def get_element_by_id(id, html):
43e8fafd
ND
475 """Return the content of the tag with the specified ID in the passed HTML document"""
476 return get_element_by_attribute("id", id, html)
477
478def get_element_by_attribute(attribute, value, html):
479 """Return the content of the tag with the specified attribute in the passed HTML document"""
480 parser = AttrParser(attribute, value)
59ae15a5
PH
481 try:
482 parser.loads(html)
483 except compat_html_parser.HTMLParseError:
484 pass
485 return parser.get_result()
9e6dd238 486
a921f407
JMF
487class MetaParser(BaseHTMLParser):
488 """
489 Modified HTMLParser that isolates a meta tag with the specified name
490 attribute.
491 """
492 def __init__(self, name):
493 BaseHTMLParser.__init__(self)
494 self.name = name
495 self.content = None
496 self.result = None
497
498 def handle_starttag(self, tag, attrs):
499 if tag != 'meta':
500 return
501 attrs = dict(attrs)
502 if attrs.get('name') == self.name:
503 self.result = attrs.get('content')
504
505 def get_result(self):
506 return self.result
507
508def get_meta_content(name, html):
509 """
510 Return the content attribute from the meta tag with the given name attribute.
511 """
512 parser = MetaParser(name)
513 try:
514 parser.loads(html)
515 except compat_html_parser.HTMLParseError:
516 pass
517 return parser.get_result()
518
9e6dd238
FV
519
520def clean_html(html):
59ae15a5
PH
521 """Clean an HTML snippet into a readable string"""
522 # Newline vs <br />
523 html = html.replace('\n', ' ')
6b3aef80
FV
524 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
525 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
526 # Strip html tags
527 html = re.sub('<.*?>', '', html)
528 # Replace html entities
529 html = unescapeHTML(html)
7decf895 530 return html.strip()
9e6dd238
FV
531
532
d77c3dfd 533def sanitize_open(filename, open_mode):
59ae15a5
PH
534 """Try to open the given filename, and slightly tweak it if this fails.
535
536 Attempts to open the given filename. If this fails, it tries to change
537 the filename slightly, step by step, until it's either able to open it
538 or it fails and raises a final exception, like the standard open()
539 function.
540
541 It returns the tuple (stream, definitive_file_name).
542 """
543 try:
544 if filename == u'-':
545 if sys.platform == 'win32':
546 import msvcrt
547 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 548 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
549 stream = open(encodeFilename(filename), open_mode)
550 return (stream, filename)
551 except (IOError, OSError) as err:
f45c185f
PH
552 if err.errno in (errno.EACCES,):
553 raise
59ae15a5 554
f45c185f
PH
555 # In case of error, try to remove win32 forbidden chars
556 alt_filename = os.path.join(
557 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
558 for path_part in os.path.split(filename)
559 )
560 if alt_filename == filename:
561 raise
562 else:
563 # An exception here should be caught in the caller
564 stream = open(encodeFilename(filename), open_mode)
565 return (stream, alt_filename)
d77c3dfd
FV
566
567
568def timeconvert(timestr):
59ae15a5
PH
569 """Convert RFC 2822 defined time string into system timestamp"""
570 timestamp = None
571 timetuple = email.utils.parsedate_tz(timestr)
572 if timetuple is not None:
573 timestamp = email.utils.mktime_tz(timetuple)
574 return timestamp
1c469a94 575
796173d0 576def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
577 """Sanitizes a string so it could be used as part of a filename.
578 If restricted is set, use a stricter subset of allowed characters.
796173d0 579 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
580 """
581 def replace_insane(char):
582 if char == '?' or ord(char) < 32 or ord(char) == 127:
583 return ''
584 elif char == '"':
585 return '' if restricted else '\''
586 elif char == ':':
587 return '_-' if restricted else ' -'
588 elif char in '\\/|*<>':
589 return '_'
627dcfff 590 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
591 return '_'
592 if restricted and ord(char) > 127:
593 return '_'
594 return char
595
596 result = u''.join(map(replace_insane, s))
796173d0
PH
597 if not is_id:
598 while '__' in result:
599 result = result.replace('__', '_')
600 result = result.strip('_')
601 # Common case of "Foreign band name - English song title"
602 if restricted and result.startswith('-_'):
603 result = result[2:]
604 if not result:
605 result = '_'
59ae15a5 606 return result
d77c3dfd
FV
607
608def orderedSet(iterable):
59ae15a5
PH
609 """ Remove all duplicates from the input iterable """
610 res = []
611 for el in iterable:
612 if el not in res:
613 res.append(el)
614 return res
d77c3dfd 615
912b38b4 616
4e408e47
PH
617def _htmlentity_transform(entity):
618 """Transforms an HTML entity to a character."""
619 # Known non-numeric HTML entity
620 if entity in compat_html_entities.name2codepoint:
621 return compat_chr(compat_html_entities.name2codepoint[entity])
622
623 mobj = re.match(r'#(x?[0-9]+)', entity)
624 if mobj is not None:
625 numstr = mobj.group(1)
626 if numstr.startswith(u'x'):
627 base = 16
628 numstr = u'0%s' % numstr
629 else:
630 base = 10
631 return compat_chr(int(numstr, base))
632
633 # Unknown entity in name, return its literal representation
634 return (u'&%s;' % entity)
635
636
d77c3dfd 637def unescapeHTML(s):
912b38b4
PH
638 if s is None:
639 return None
640 assert type(s) == compat_str
d77c3dfd 641
4e408e47
PH
642 return re.sub(
643 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 644
8bf48f23
PH
645
646def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
647 """
648 @param s The name of the file
649 """
d77c3dfd 650
8bf48f23 651 assert type(s) == compat_str
d77c3dfd 652
59ae15a5
PH
653 # Python 3 has a Unicode API
654 if sys.version_info >= (3, 0):
655 return s
0f00efed 656
59ae15a5
PH
657 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
658 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
659 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
660 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
661 if not for_subprocess:
662 return s
663 else:
664 # For subprocess calls, encode with locale encoding
665 # Refer to http://stackoverflow.com/a/9951851/35070
666 encoding = preferredencoding()
59ae15a5 667 else:
6df40dcb 668 encoding = sys.getfilesystemencoding()
8bf48f23
PH
669 if encoding is None:
670 encoding = 'utf-8'
671 return s.encode(encoding, 'ignore')
672
f07b74fc
PH
673
674def encodeArgument(s):
675 if not isinstance(s, compat_str):
676 # Legacy code that uses byte strings
677 # Uncomment the following line after fixing all post processors
678 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
679 s = s.decode('ascii')
680 return encodeFilename(s, True)
681
682
8271226a
PH
683def decodeOption(optval):
684 if optval is None:
685 return optval
686 if isinstance(optval, bytes):
687 optval = optval.decode(preferredencoding())
688
689 assert isinstance(optval, compat_str)
690 return optval
1c256f70 691
4539dd30
PH
692def formatSeconds(secs):
693 if secs > 3600:
694 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
695 elif secs > 60:
696 return '%d:%02d' % (secs // 60, secs % 60)
697 else:
698 return '%d' % secs
699
a0ddb8a2
PH
700
701def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
13ebea79
PH
702 if sys.version_info < (3, 2):
703 import httplib
704
705 class HTTPSConnectionV3(httplib.HTTPSConnection):
706 def __init__(self, *args, **kwargs):
707 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
708
709 def connect(self):
710 sock = socket.create_connection((self.host, self.port), self.timeout)
ac79fa02 711 if getattr(self, '_tunnel_host', False):
13ebea79
PH
712 self.sock = sock
713 self._tunnel()
714 try:
aa37e3d4 715 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
de79c46c 716 except ssl.SSLError:
13ebea79
PH
717 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
718
719 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
720 def https_open(self, req):
721 return self.do_open(HTTPSConnectionV3, req)
a0ddb8a2 722 return HTTPSHandlerV3(**kwargs)
aa37e3d4
PH
723 elif hasattr(ssl, 'create_default_context'): # Python >= 3.4
724 context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
725 context.options &= ~ssl.OP_NO_SSLv3 # Allow older, not-as-secure SSLv3
726 if opts_no_check_certificate:
727 context.verify_mode = ssl.CERT_NONE
728 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
729 else: # Python < 3.4
730 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
ea6d901e 731 context.verify_mode = (ssl.CERT_NONE
dca08720 732 if opts_no_check_certificate
ea6d901e 733 else ssl.CERT_REQUIRED)
303b479e
PH
734 context.set_default_verify_paths()
735 try:
736 context.load_default_certs()
737 except AttributeError:
738 pass # Python < 3.4
a0ddb8a2 739 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
ea6d901e 740
1c256f70
PH
741class ExtractorError(Exception):
742 """Error during info extraction."""
d11271dd 743 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
744 """ tb, if given, is the original traceback (so that it can be printed out).
745 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
746 """
747
748 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
749 expected = True
d11271dd
PH
750 if video_id is not None:
751 msg = video_id + ': ' + msg
410f3e73
PH
752 if cause:
753 msg += u' (caused by %r)' % cause
9a82b238 754 if not expected:
298f833b 755 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
1c256f70 756 super(ExtractorError, self).__init__(msg)
d5979c5d 757
1c256f70 758 self.traceback = tb
8cc83b8d 759 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 760 self.cause = cause
d11271dd 761 self.video_id = video_id
1c256f70 762
01951dda
PH
763 def format_traceback(self):
764 if self.traceback is None:
765 return None
766 return u''.join(traceback.format_tb(self.traceback))
767
1c256f70 768
55b3e45b
JMF
769class RegexNotFoundError(ExtractorError):
770 """Error when a regex didn't match"""
771 pass
772
773
d77c3dfd 774class DownloadError(Exception):
59ae15a5 775 """Download Error exception.
d77c3dfd 776
59ae15a5
PH
777 This exception may be thrown by FileDownloader objects if they are not
778 configured to continue on errors. They will contain the appropriate
779 error message.
780 """
8cc83b8d
FV
781 def __init__(self, msg, exc_info=None):
782 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
783 super(DownloadError, self).__init__(msg)
784 self.exc_info = exc_info
d77c3dfd
FV
785
786
787class SameFileError(Exception):
59ae15a5 788 """Same File exception.
d77c3dfd 789
59ae15a5
PH
790 This exception will be thrown by FileDownloader objects if they detect
791 multiple files would have to be downloaded to the same file on disk.
792 """
793 pass
d77c3dfd
FV
794
795
796class PostProcessingError(Exception):
59ae15a5 797 """Post Processing exception.
d77c3dfd 798
59ae15a5
PH
799 This exception may be raised by PostProcessor's .run() method to
800 indicate an error in the postprocessing task.
801 """
7851b379
PH
802 def __init__(self, msg):
803 self.msg = msg
d77c3dfd
FV
804
805class MaxDownloadsReached(Exception):
59ae15a5
PH
806 """ --max-downloads limit has been reached. """
807 pass
d77c3dfd
FV
808
809
810class UnavailableVideoError(Exception):
59ae15a5 811 """Unavailable Format exception.
d77c3dfd 812
59ae15a5
PH
813 This exception will be thrown when a video is requested
814 in a format that is not available for that video.
815 """
816 pass
d77c3dfd
FV
817
818
819class ContentTooShortError(Exception):
59ae15a5 820 """Content Too Short exception.
d77c3dfd 821
59ae15a5
PH
822 This exception may be raised by FileDownloader objects when a file they
823 download is too small for what the server announced first, indicating
824 the connection was probably interrupted.
825 """
826 # Both in bytes
827 downloaded = None
828 expected = None
d77c3dfd 829
59ae15a5
PH
830 def __init__(self, downloaded, expected):
831 self.downloaded = downloaded
832 self.expected = expected
d77c3dfd 833
acebc9cd 834class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
835 """Handler for HTTP requests and responses.
836
837 This class, when installed with an OpenerDirector, automatically adds
838 the standard headers to every HTTP request and handles gzipped and
839 deflated responses from web servers. If compression is to be avoided in
840 a particular request, the original request in the program code only has
841 to include the HTTP header "Youtubedl-No-Compression", which will be
842 removed before making the real request.
843
844 Part of this code was copied from:
845
846 http://techknack.net/python-urllib2-handlers/
847
848 Andrew Rowls, the author of that code, agreed to release it to the
849 public domain.
850 """
851
852 @staticmethod
853 def deflate(data):
854 try:
855 return zlib.decompress(data, -zlib.MAX_WBITS)
856 except zlib.error:
857 return zlib.decompress(data)
858
859 @staticmethod
860 def addinfourl_wrapper(stream, headers, url, code):
861 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
862 return compat_urllib_request.addinfourl(stream, headers, url, code)
863 ret = compat_urllib_request.addinfourl(stream, headers, url)
864 ret.code = code
865 return ret
866
acebc9cd 867 def http_request(self, req):
33ac271b
PH
868 for h, v in std_headers.items():
869 if h not in req.headers:
870 req.add_header(h, v)
59ae15a5
PH
871 if 'Youtubedl-no-compression' in req.headers:
872 if 'Accept-encoding' in req.headers:
873 del req.headers['Accept-encoding']
874 del req.headers['Youtubedl-no-compression']
3446dfb7 875 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
876 if 'User-agent' in req.headers:
877 del req.headers['User-agent']
878 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 879 del req.headers['Youtubedl-user-agent']
989b4b2b
PH
880
881 if sys.version_info < (2, 7) and '#' in req.get_full_url():
882 # Python 2.6 is brain-dead when it comes to fragments
883 req._Request__original = req._Request__original.partition('#')[0]
884 req._Request__r_type = req._Request__r_type.partition('#')[0]
885
59ae15a5
PH
886 return req
887
acebc9cd 888 def http_response(self, req, resp):
59ae15a5
PH
889 old_resp = resp
890 # gzip
891 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
892 content = resp.read()
893 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
894 try:
895 uncompressed = io.BytesIO(gz.read())
896 except IOError as original_ioerror:
897 # There may be junk add the end of the file
898 # See http://stackoverflow.com/q/4928560/35070 for details
899 for i in range(1, 1024):
900 try:
901 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
902 uncompressed = io.BytesIO(gz.read())
903 except IOError:
904 continue
905 break
906 else:
907 raise original_ioerror
908 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
909 resp.msg = old_resp.msg
910 # deflate
911 if resp.headers.get('Content-encoding', '') == 'deflate':
912 gz = io.BytesIO(self.deflate(resp.read()))
913 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
914 resp.msg = old_resp.msg
915 return resp
0f8d03f8 916
acebc9cd
PH
917 https_request = http_request
918 https_response = http_response
bf50b038 919
5de90176 920
305d0683 921def parse_iso8601(date_str, delimiter='T'):
912b38b4
PH
922 """ Return a UNIX timestamp from the given date """
923
924 if date_str is None:
925 return None
926
927 m = re.search(
6ad4013d 928 r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
912b38b4
PH
929 date_str)
930 if not m:
931 timezone = datetime.timedelta()
932 else:
933 date_str = date_str[:-len(m.group(0))]
934 if not m.group('sign'):
935 timezone = datetime.timedelta()
936 else:
937 sign = 1 if m.group('sign') == '+' else -1
938 timezone = datetime.timedelta(
939 hours=sign * int(m.group('hours')),
940 minutes=sign * int(m.group('minutes')))
6ad4013d 941 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
305d0683 942 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
943 return calendar.timegm(dt.timetuple())
944
945
bf50b038
JMF
946def unified_strdate(date_str):
947 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
948
949 if date_str is None:
950 return None
951
bf50b038
JMF
952 upload_date = None
953 #Replace commas
026fcc04 954 date_str = date_str.replace(',', ' ')
bf50b038 955 # %z (UTC offset) is only supported in python>=3.2
026fcc04 956 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
19e1d359
JMF
957 format_expressions = [
958 '%d %B %Y',
0f99566c 959 '%d %b %Y',
19e1d359
JMF
960 '%B %d %Y',
961 '%b %d %Y',
78ff59d0
PP
962 '%b %dst %Y %I:%M%p',
963 '%b %dnd %Y %I:%M%p',
964 '%b %dth %Y %I:%M%p',
19e1d359 965 '%Y-%m-%d',
fe556f1b 966 '%Y/%m/%d',
4cf96546 967 '%d.%m.%Y',
19e1d359 968 '%d/%m/%Y',
423817c4 969 '%d/%m/%y',
19e1d359 970 '%Y/%m/%d %H:%M:%S',
99b67fec 971 '%d/%m/%Y %H:%M:%S',
5d73273f 972 '%Y-%m-%d %H:%M:%S',
e9be9a6a 973 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 974 '%d.%m.%Y %H:%M',
b047de6f 975 '%d.%m.%Y %H.%M',
19e1d359 976 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
977 '%Y-%m-%dT%H:%M:%S.%fZ',
978 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 979 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 980 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 981 '%Y-%m-%dT%H:%M',
19e1d359 982 ]
bf50b038
JMF
983 for expression in format_expressions:
984 try:
985 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 986 except ValueError:
bf50b038 987 pass
42393ce2
PH
988 if upload_date is None:
989 timetuple = email.utils.parsedate_tz(date_str)
990 if timetuple:
991 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
992 return upload_date
993
cbdbb766 994def determine_ext(url, default_ext=u'unknown_video'):
f4776371
S
995 if url is None:
996 return default_ext
73e79f2a
PH
997 guess = url.partition(u'?')[0].rpartition(u'.')[2]
998 if re.match(r'^[A-Za-z0-9]+$', guess):
999 return guess
1000 else:
cbdbb766 1001 return default_ext
73e79f2a 1002
d4051a8e
JMF
1003def subtitles_filename(filename, sub_lang, sub_format):
1004 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
1005
bd558525 1006def date_from_str(date_str):
37254abc
JMF
1007 """
1008 Return a datetime object from a string in the format YYYYMMDD or
1009 (now|today)[+-][0-9](day|week|month|year)(s)?"""
1010 today = datetime.date.today()
1011 if date_str == 'now'or date_str == 'today':
1012 return today
1013 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
1014 if match is not None:
1015 sign = match.group('sign')
1016 time = int(match.group('time'))
1017 if sign == '-':
1018 time = -time
1019 unit = match.group('unit')
1020 #A bad aproximation?
1021 if unit == 'month':
1022 unit = 'day'
1023 time *= 30
1024 elif unit == 'year':
1025 unit = 'day'
1026 time *= 365
1027 unit += 's'
1028 delta = datetime.timedelta(**{unit: time})
1029 return today + delta
bd558525
JMF
1030 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
1031
e63fc1be 1032def hyphenate_date(date_str):
1033 """
1034 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1035 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1036 if match is not None:
1037 return '-'.join(match.groups())
1038 else:
1039 return date_str
1040
bd558525
JMF
1041class DateRange(object):
1042 """Represents a time interval between two dates"""
1043 def __init__(self, start=None, end=None):
1044 """start and end must be strings in the format accepted by date"""
1045 if start is not None:
1046 self.start = date_from_str(start)
1047 else:
1048 self.start = datetime.datetime.min.date()
1049 if end is not None:
1050 self.end = date_from_str(end)
1051 else:
1052 self.end = datetime.datetime.max.date()
37254abc 1053 if self.start > self.end:
bd558525
JMF
1054 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1055 @classmethod
1056 def day(cls, day):
1057 """Returns a range that only contains the given day"""
1058 return cls(day,day)
1059 def __contains__(self, date):
1060 """Check if the date is in the range"""
37254abc
JMF
1061 if not isinstance(date, datetime.date):
1062 date = date_from_str(date)
1063 return self.start <= date <= self.end
bd558525
JMF
1064 def __str__(self):
1065 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
c496ca96
PH
1066
1067
1068def platform_name():
1069 """ Returns the platform name as a compat_str """
1070 res = platform.platform()
1071 if isinstance(res, bytes):
1072 res = res.decode(preferredencoding())
1073
1074 assert isinstance(res, compat_str)
1075 return res
c257baff
PH
1076
1077
b58ddb32
PH
1078def _windows_write_string(s, out):
1079 """ Returns True if the string was written using special methods,
1080 False if it has yet to be written out."""
1081 # Adapted from http://stackoverflow.com/a/3259271/35070
1082
1083 import ctypes
1084 import ctypes.wintypes
1085
1086 WIN_OUTPUT_IDS = {
1087 1: -11,
1088 2: -12,
1089 }
1090
a383a98a
PH
1091 try:
1092 fileno = out.fileno()
1093 except AttributeError:
1094 # If the output stream doesn't have a fileno, it's virtual
1095 return False
b58ddb32
PH
1096 if fileno not in WIN_OUTPUT_IDS:
1097 return False
1098
1099 GetStdHandle = ctypes.WINFUNCTYPE(
1100 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
1101 ("GetStdHandle", ctypes.windll.kernel32))
1102 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1103
1104 WriteConsoleW = ctypes.WINFUNCTYPE(
1105 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1106 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
1107 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
1108 written = ctypes.wintypes.DWORD(0)
1109
1110 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
1111 FILE_TYPE_CHAR = 0x0002
1112 FILE_TYPE_REMOTE = 0x8000
1113 GetConsoleMode = ctypes.WINFUNCTYPE(
1114 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1115 ctypes.POINTER(ctypes.wintypes.DWORD))(
1116 ("GetConsoleMode", ctypes.windll.kernel32))
1117 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1118
1119 def not_a_console(handle):
1120 if handle == INVALID_HANDLE_VALUE or handle is None:
1121 return True
1122 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
1123 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
1124
1125 if not_a_console(h):
1126 return False
1127
d1b9c912
PH
1128 def next_nonbmp_pos(s):
1129 try:
1130 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1131 except StopIteration:
1132 return len(s)
1133
1134 while s:
1135 count = min(next_nonbmp_pos(s), 1024)
1136
b58ddb32 1137 ret = WriteConsoleW(
d1b9c912 1138 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1139 if ret == 0:
1140 raise OSError('Failed to write string')
d1b9c912
PH
1141 if not count: # We just wrote a non-BMP character
1142 assert written.value == 2
1143 s = s[1:]
1144 else:
1145 assert written.value > 0
1146 s = s[written.value:]
b58ddb32
PH
1147 return True
1148
1149
734f90bb 1150def write_string(s, out=None, encoding=None):
7459e3a2
PH
1151 if out is None:
1152 out = sys.stderr
8bf48f23 1153 assert type(s) == compat_str
7459e3a2 1154
b58ddb32
PH
1155 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1156 if _windows_write_string(s, out):
1157 return
1158
7459e3a2
PH
1159 if ('b' in getattr(out, 'mode', '') or
1160 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1161 byt = s.encode(encoding or preferredencoding(), 'ignore')
1162 out.write(byt)
1163 elif hasattr(out, 'buffer'):
1164 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1165 byt = s.encode(enc, 'ignore')
1166 out.buffer.write(byt)
1167 else:
8bf48f23 1168 out.write(s)
7459e3a2
PH
1169 out.flush()
1170
1171
48ea9cea
PH
1172def bytes_to_intlist(bs):
1173 if not bs:
1174 return []
1175 if isinstance(bs[0], int): # Python 3
1176 return list(bs)
1177 else:
1178 return [ord(c) for c in bs]
1179
c257baff 1180
cba892fa 1181def intlist_to_bytes(xs):
1182 if not xs:
1183 return b''
1184 if isinstance(chr(0), bytes): # Python 2
1185 return ''.join([chr(x) for x in xs])
1186 else:
1187 return bytes(xs)
c38b1e77
PH
1188
1189
c1c9a79c
PH
1190# Cross-platform file locking
1191if sys.platform == 'win32':
1192 import ctypes.wintypes
1193 import msvcrt
1194
1195 class OVERLAPPED(ctypes.Structure):
1196 _fields_ = [
1197 ('Internal', ctypes.wintypes.LPVOID),
1198 ('InternalHigh', ctypes.wintypes.LPVOID),
1199 ('Offset', ctypes.wintypes.DWORD),
1200 ('OffsetHigh', ctypes.wintypes.DWORD),
1201 ('hEvent', ctypes.wintypes.HANDLE),
1202 ]
1203
1204 kernel32 = ctypes.windll.kernel32
1205 LockFileEx = kernel32.LockFileEx
1206 LockFileEx.argtypes = [
1207 ctypes.wintypes.HANDLE, # hFile
1208 ctypes.wintypes.DWORD, # dwFlags
1209 ctypes.wintypes.DWORD, # dwReserved
1210 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1211 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1212 ctypes.POINTER(OVERLAPPED) # Overlapped
1213 ]
1214 LockFileEx.restype = ctypes.wintypes.BOOL
1215 UnlockFileEx = kernel32.UnlockFileEx
1216 UnlockFileEx.argtypes = [
1217 ctypes.wintypes.HANDLE, # hFile
1218 ctypes.wintypes.DWORD, # dwReserved
1219 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1220 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1221 ctypes.POINTER(OVERLAPPED) # Overlapped
1222 ]
1223 UnlockFileEx.restype = ctypes.wintypes.BOOL
1224 whole_low = 0xffffffff
1225 whole_high = 0x7fffffff
1226
1227 def _lock_file(f, exclusive):
1228 overlapped = OVERLAPPED()
1229 overlapped.Offset = 0
1230 overlapped.OffsetHigh = 0
1231 overlapped.hEvent = 0
1232 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1233 handle = msvcrt.get_osfhandle(f.fileno())
1234 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1235 whole_low, whole_high, f._lock_file_overlapped_p):
1236 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1237
1238 def _unlock_file(f):
1239 assert f._lock_file_overlapped_p
1240 handle = msvcrt.get_osfhandle(f.fileno())
1241 if not UnlockFileEx(handle, 0,
1242 whole_low, whole_high, f._lock_file_overlapped_p):
1243 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1244
1245else:
1246 import fcntl
1247
1248 def _lock_file(f, exclusive):
2582bebe 1249 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1250
1251 def _unlock_file(f):
2582bebe 1252 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1253
1254
1255class locked_file(object):
1256 def __init__(self, filename, mode, encoding=None):
1257 assert mode in ['r', 'a', 'w']
1258 self.f = io.open(filename, mode, encoding=encoding)
1259 self.mode = mode
1260
1261 def __enter__(self):
1262 exclusive = self.mode != 'r'
1263 try:
1264 _lock_file(self.f, exclusive)
1265 except IOError:
1266 self.f.close()
1267 raise
1268 return self
1269
1270 def __exit__(self, etype, value, traceback):
1271 try:
1272 _unlock_file(self.f)
1273 finally:
1274 self.f.close()
1275
1276 def __iter__(self):
1277 return iter(self.f)
1278
1279 def write(self, *args):
1280 return self.f.write(*args)
1281
1282 def read(self, *args):
1283 return self.f.read(*args)
4eb7f1d1
JMF
1284
1285
4644ac55
S
1286def get_filesystem_encoding():
1287 encoding = sys.getfilesystemencoding()
1288 return encoding if encoding is not None else 'utf-8'
1289
1290
4eb7f1d1 1291def shell_quote(args):
a6a173c2 1292 quoted_args = []
4644ac55 1293 encoding = get_filesystem_encoding()
a6a173c2
JMF
1294 for a in args:
1295 if isinstance(a, bytes):
1296 # We may get a filename encoded with 'encodeFilename'
1297 a = a.decode(encoding)
1298 quoted_args.append(pipes.quote(a))
1299 return u' '.join(quoted_args)
9d4660ca
PH
1300
1301
f4d96df0
PH
1302def takewhile_inclusive(pred, seq):
1303 """ Like itertools.takewhile, but include the latest evaluated element
1304 (the first element so that Not pred(e)) """
1305 for e in seq:
1306 yield e
1307 if not pred(e):
1308 return
1309
1310
9d4660ca
PH
1311def smuggle_url(url, data):
1312 """ Pass additional data in a URL for internal use. """
1313
1314 sdata = compat_urllib_parse.urlencode(
1315 {u'__youtubedl_smuggle': json.dumps(data)})
1316 return url + u'#' + sdata
1317
1318
79f82953 1319def unsmuggle_url(smug_url, default=None):
9d4660ca 1320 if not '#__youtubedl_smuggle' in smug_url:
79f82953 1321 return smug_url, default
9d4660ca
PH
1322 url, _, sdata = smug_url.rpartition(u'#')
1323 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1324 data = json.loads(jsond)
1325 return url, data
02dbf93f
PH
1326
1327
02dbf93f
PH
1328def format_bytes(bytes):
1329 if bytes is None:
1330 return u'N/A'
1331 if type(bytes) is str:
1332 bytes = float(bytes)
1333 if bytes == 0.0:
1334 exponent = 0
1335 else:
1336 exponent = int(math.log(bytes, 1024.0))
1337 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1338 converted = float(bytes) / float(1024 ** exponent)
1339 return u'%.2f%s' % (converted, suffix)
f53c966a 1340
1c088fa8 1341
1c088fa8 1342def get_term_width():
4644ac55 1343 columns = compat_getenv('COLUMNS', None)
1c088fa8
PH
1344 if columns:
1345 return int(columns)
1346
1347 try:
1348 sp = subprocess.Popen(
1349 ['stty', 'size'],
1350 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1351 out, err = sp.communicate()
1352 return int(out.split()[1])
1353 except:
1354 pass
1355 return None
caefb1de
PH
1356
1357
1358def month_by_name(name):
1359 """ Return the number of a month by (locale-independently) English name """
1360
1361 ENGLISH_NAMES = [
dadb8184 1362 u'January', u'February', u'March', u'April', u'May', u'June',
caefb1de
PH
1363 u'July', u'August', u'September', u'October', u'November', u'December']
1364 try:
1365 return ENGLISH_NAMES.index(name) + 1
1366 except ValueError:
1367 return None
18258362
JMF
1368
1369
5aafe895 1370def fix_xml_ampersands(xml_str):
18258362 1371 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1372 return re.sub(
1373 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1374 u'&amp;',
1375 xml_str)
e3946f98
PH
1376
1377
1378def setproctitle(title):
8bf48f23 1379 assert isinstance(title, compat_str)
e3946f98
PH
1380 try:
1381 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1382 except OSError:
1383 return
6eefe533
PH
1384 title_bytes = title.encode('utf-8')
1385 buf = ctypes.create_string_buffer(len(title_bytes))
1386 buf.value = title_bytes
e3946f98 1387 try:
6eefe533 1388 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1389 except AttributeError:
1390 return # Strange libc, just skip this
d7dda168
PH
1391
1392
1393def remove_start(s, start):
1394 if s.startswith(start):
1395 return s[len(start):]
1396 return s
29eb5174
PH
1397
1398
2b9faf55
PH
1399def remove_end(s, end):
1400 if s.endswith(end):
1401 return s[:-len(end)]
1402 return s
1403
1404
29eb5174 1405def url_basename(url):
9b8aaeed
JMF
1406 path = compat_urlparse.urlparse(url).path
1407 return path.strip(u'/').split(u'/')[-1]
aa94a6d3
PH
1408
1409
1410class HEADRequest(compat_urllib_request.Request):
1411 def get_method(self):
1412 return "HEAD"
7217e148
PH
1413
1414
9732d77e 1415def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1416 if get_attr:
1417 if v is not None:
1418 v = getattr(v, get_attr, None)
9572013d
PH
1419 if v == '':
1420 v = None
9732d77e
PH
1421 return default if v is None else (int(v) * invscale // scale)
1422
9572013d 1423
40a90862
JMF
1424def str_or_none(v, default=None):
1425 return default if v is None else compat_str(v)
1426
9732d77e
PH
1427
1428def str_to_int(int_str):
48d4681e 1429 """ A more relaxed version of int_or_none """
9732d77e
PH
1430 if int_str is None:
1431 return None
884ae747 1432 int_str = re.sub(r'[,\.\+]', u'', int_str)
9732d77e 1433 return int(int_str)
608d11f5
PH
1434
1435
9732d77e
PH
1436def float_or_none(v, scale=1, invscale=1, default=None):
1437 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1438
1439
608d11f5
PH
1440def parse_duration(s):
1441 if s is None:
1442 return None
1443
ca7b3246
S
1444 s = s.strip()
1445
608d11f5 1446 m = re.match(
f164038b 1447 r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
608d11f5
PH
1448 if not m:
1449 return None
1450 res = int(m.group('secs'))
1451 if m.group('mins'):
1452 res += int(m.group('mins')) * 60
1453 if m.group('hours'):
1454 res += int(m.group('hours')) * 60 * 60
7adcbe75
PH
1455 if m.group('ms'):
1456 res += float(m.group('ms'))
608d11f5 1457 return res
91d7d0b3
JMF
1458
1459
1460def prepend_extension(filename, ext):
1461 name, real_ext = os.path.splitext(filename)
1462 return u'{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1463
1464
1465def check_executable(exe, args=[]):
1466 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1467 args can be a list of arguments for a short output (like -version) """
1468 try:
1469 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1470 except OSError:
1471 return False
1472 return exe
b7ab0590
PH
1473
1474
95807118
PH
1475def get_exe_version(exe, args=['--version'],
1476 version_re=r'version\s+([0-9._-a-zA-Z]+)',
1477 unrecognized=u'present'):
1478 """ Returns the version of the specified executable,
1479 or False if the executable is not present """
1480 try:
1481 out, err = subprocess.Popen(
1482 [exe] + args,
1483 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1484 except OSError:
1485 return False
1486 firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1487 m = re.search(version_re, firstline)
1488 if m:
1489 return m.group(1)
1490 else:
1491 return unrecognized
1492
1493
b7ab0590 1494class PagedList(object):
dd26ced1
PH
1495 def __len__(self):
1496 # This is only useful for tests
1497 return len(self.getslice())
1498
9c44d242
PH
1499
1500class OnDemandPagedList(PagedList):
1501 def __init__(self, pagefunc, pagesize):
1502 self._pagefunc = pagefunc
1503 self._pagesize = pagesize
1504
b7ab0590
PH
1505 def getslice(self, start=0, end=None):
1506 res = []
1507 for pagenum in itertools.count(start // self._pagesize):
1508 firstid = pagenum * self._pagesize
1509 nextfirstid = pagenum * self._pagesize + self._pagesize
1510 if start >= nextfirstid:
1511 continue
1512
1513 page_results = list(self._pagefunc(pagenum))
1514
1515 startv = (
1516 start % self._pagesize
1517 if firstid <= start < nextfirstid
1518 else 0)
1519
1520 endv = (
1521 ((end - 1) % self._pagesize) + 1
1522 if (end is not None and firstid <= end <= nextfirstid)
1523 else None)
1524
1525 if startv != 0 or endv is not None:
1526 page_results = page_results[startv:endv]
1527 res.extend(page_results)
1528
1529 # A little optimization - if current page is not "full", ie. does
1530 # not contain page_size videos then we can assume that this page
1531 # is the last one - there are no more ids on further pages -
1532 # i.e. no need to query again.
1533 if len(page_results) + startv < self._pagesize:
1534 break
1535
1536 # If we got the whole page, but the next page is not interesting,
1537 # break out early as well
1538 if end == nextfirstid:
1539 break
1540 return res
81c2f20b
PH
1541
1542
9c44d242
PH
1543class InAdvancePagedList(PagedList):
1544 def __init__(self, pagefunc, pagecount, pagesize):
1545 self._pagefunc = pagefunc
1546 self._pagecount = pagecount
1547 self._pagesize = pagesize
1548
1549 def getslice(self, start=0, end=None):
1550 res = []
1551 start_page = start // self._pagesize
1552 end_page = (
1553 self._pagecount if end is None else (end // self._pagesize + 1))
1554 skip_elems = start - start_page * self._pagesize
1555 only_more = None if end is None else end - start
1556 for pagenum in range(start_page, end_page):
1557 page = list(self._pagefunc(pagenum))
1558 if skip_elems:
1559 page = page[skip_elems:]
1560 skip_elems = None
1561 if only_more is not None:
1562 if len(page) < only_more:
1563 only_more -= len(page)
1564 else:
1565 page = page[:only_more]
1566 res.extend(page)
1567 break
1568 res.extend(page)
1569 return res
1570
1571
81c2f20b 1572def uppercase_escape(s):
676eb3f2 1573 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1574 return re.sub(
a612753d 1575 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1576 lambda m: unicode_escape(m.group(0))[0],
1577 s)
b53466e1 1578
d05cfe06
S
1579
1580def escape_rfc3986(s):
1581 """Escape non-ASCII characters as suggested by RFC 3986"""
1582 if sys.version_info < (3, 0) and isinstance(s, unicode):
1583 s = s.encode('utf-8')
984e8e14 1584 return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1585
1586
1587def escape_url(url):
1588 """Escape URL as suggested by RFC 3986"""
1589 url_parsed = compat_urllib_parse_urlparse(url)
1590 return url_parsed._replace(
1591 path=escape_rfc3986(url_parsed.path),
1592 params=escape_rfc3986(url_parsed.params),
1593 query=escape_rfc3986(url_parsed.query),
1594 fragment=escape_rfc3986(url_parsed.fragment)
1595 ).geturl()
1596
b53466e1
PH
1597try:
1598 struct.pack(u'!I', 0)
1599except TypeError:
1600 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1601 def struct_pack(spec, *args):
1602 if isinstance(spec, compat_str):
1603 spec = spec.encode('ascii')
1604 return struct.pack(spec, *args)
1605
1606 def struct_unpack(spec, *args):
1607 if isinstance(spec, compat_str):
1608 spec = spec.encode('ascii')
1609 return struct.unpack(spec, *args)
1610else:
1611 struct_pack = struct.pack
1612 struct_unpack = struct.unpack
62e609ab
PH
1613
1614
1615def read_batch_urls(batch_fd):
1616 def fixup(url):
1617 if not isinstance(url, compat_str):
1618 url = url.decode('utf-8', 'replace')
1619 BOM_UTF8 = u'\xef\xbb\xbf'
1620 if url.startswith(BOM_UTF8):
1621 url = url[len(BOM_UTF8):]
1622 url = url.strip()
1623 if url.startswith(('#', ';', ']')):
1624 return False
1625 return url
1626
1627 with contextlib.closing(batch_fd) as fd:
1628 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1629
1630
1631def urlencode_postdata(*args, **kargs):
1632 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1633
1634
0990305d
PH
1635try:
1636 etree_iter = xml.etree.ElementTree.Element.iter
1637except AttributeError: # Python <=2.6
1638 etree_iter = lambda n: n.findall('.//*')
1639
1640
bcf89ce6
PH
1641def parse_xml(s):
1642 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1643 def doctype(self, name, pubid, system):
1644 pass # Ignore doctypes
1645
1646 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1647 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1648 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1649 # Fix up XML parser in Python 2.x
1650 if sys.version_info < (3, 0):
1651 for n in etree_iter(tree):
1652 if n.text is not None:
1653 if not isinstance(n.text, compat_str):
1654 n.text = n.text.decode('utf-8')
1655 return tree
e68301af
PH
1656
1657
1658if sys.version_info < (3, 0) and sys.platform == 'win32':
1659 def compat_getpass(prompt, *args, **kwargs):
1660 if isinstance(prompt, compat_str):
4e6f9aec 1661 prompt = prompt.encode(preferredencoding())
e68301af
PH
1662 return getpass.getpass(prompt, *args, **kwargs)
1663else:
1664 compat_getpass = getpass.getpass
a1a530b0
PH
1665
1666
1667US_RATINGS = {
1668 'G': 0,
1669 'PG': 10,
1670 'PG-13': 13,
1671 'R': 16,
1672 'NC': 18,
1673}
fac55558
PH
1674
1675
146c80e2
S
1676def parse_age_limit(s):
1677 if s is None:
d838b1bd 1678 return None
146c80e2 1679 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d838b1bd 1680 return int(m.group('age')) if m else US_RATINGS.get(s, None)
146c80e2
S
1681
1682
fac55558 1683def strip_jsonp(code):
816930c4 1684 return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
478c2c61
PH
1685
1686
e05f6939
PH
1687def js_to_json(code):
1688 def fix_kv(m):
e7b6d122
PH
1689 v = m.group(0)
1690 if v in ('true', 'false', 'null'):
1691 return v
1692 if v.startswith('"'):
1693 return v
1694 if v.startswith("'"):
1695 v = v[1:-1]
1696 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1697 '\\\\': '\\\\',
1698 "\\'": "'",
1699 '"': '\\"',
1700 }[m.group(0)], v)
1701 return '"%s"' % v
e05f6939
PH
1702
1703 res = re.sub(r'''(?x)
e7b6d122
PH
1704 "(?:[^"\\]*(?:\\\\|\\")?)*"|
1705 '(?:[^'\\]*(?:\\\\|\\')?)*'|
1706 [a-zA-Z_][a-zA-Z_0-9]*
e05f6939
PH
1707 ''', fix_kv, code)
1708 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1709 return res
1710
1711
478c2c61
PH
1712def qualities(quality_ids):
1713 """ Get a numeric quality value out of a list of possible values """
1714 def q(qid):
1715 try:
1716 return quality_ids.index(qid)
1717 except ValueError:
1718 return -1
1719 return q
1720
acd69589
PH
1721
1722DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68
PH
1723
1724try:
1725 subprocess_check_output = subprocess.check_output
1726except AttributeError:
1727 def subprocess_check_output(*args, **kwargs):
1728 assert 'input' not in kwargs
1729 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
1730 output, _ = p.communicate()
1731 ret = p.poll()
1732 if ret:
1733 raise subprocess.CalledProcessError(ret, p.args, output=output)
1734 return output
a020a0dc
PH
1735
1736
1737def limit_length(s, length):
1738 """ Add ellipses to overly long strings """
1739 if s is None:
1740 return None
1741 ELLIPSES = '...'
1742 if len(s) > length:
1743 return s[:length - len(ELLIPSES)] + ELLIPSES
1744 return s
48844745
PH
1745
1746
1747def version_tuple(v):
1748 return [int(e) for e in v.split('.')]
1749
1750
1751def is_outdated_version(version, limit, assume_new=True):
1752 if not version:
1753 return not assume_new
1754 try:
1755 return version_tuple(version) < version_tuple(limit)
1756 except ValueError:
1757 return not assume_new