]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Merge remote-tracking branch 'peugeot/eporner'
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
912b38b4 4import calendar
676eb3f2 5import codecs
62e609ab 6import contextlib
e3946f98 7import ctypes
c496ca96
PH
8import datetime
9import email.utils
f45c185f 10import errno
e68301af 11import getpass
d77c3dfd 12import gzip
b7ab0590 13import itertools
03f9daab 14import io
f4bfd65f 15import json
d77c3dfd 16import locale
02dbf93f 17import math
d77c3dfd 18import os
4eb7f1d1 19import pipes
c496ca96 20import platform
d77c3dfd 21import re
13ebea79 22import ssl
c496ca96 23import socket
b53466e1 24import struct
1c088fa8 25import subprocess
d77c3dfd 26import sys
181c8655 27import tempfile
01951dda 28import traceback
bcf89ce6 29import xml.etree.ElementTree
d77c3dfd 30import zlib
d77c3dfd 31
01ba00ca 32try:
59ae15a5 33 import urllib.request as compat_urllib_request
01ba00ca 34except ImportError: # Python 2
59ae15a5 35 import urllib2 as compat_urllib_request
01ba00ca
PH
36
37try:
59ae15a5 38 import urllib.error as compat_urllib_error
01ba00ca 39except ImportError: # Python 2
59ae15a5 40 import urllib2 as compat_urllib_error
01ba00ca
PH
41
42try:
59ae15a5 43 import urllib.parse as compat_urllib_parse
01ba00ca 44except ImportError: # Python 2
59ae15a5 45 import urllib as compat_urllib_parse
01ba00ca 46
799c0763
PH
47try:
48 from urllib.parse import urlparse as compat_urllib_parse_urlparse
49except ImportError: # Python 2
50 from urlparse import urlparse as compat_urllib_parse_urlparse
51
6543f0dc
JMF
52try:
53 import urllib.parse as compat_urlparse
54except ImportError: # Python 2
55 import urlparse as compat_urlparse
56
01ba00ca 57try:
59ae15a5 58 import http.cookiejar as compat_cookiejar
01ba00ca 59except ImportError: # Python 2
59ae15a5 60 import cookielib as compat_cookiejar
01ba00ca 61
3e669f36 62try:
59ae15a5 63 import html.entities as compat_html_entities
9f37a959 64except ImportError: # Python 2
59ae15a5 65 import htmlentitydefs as compat_html_entities
3e669f36 66
a8156c1d 67try:
59ae15a5 68 import html.parser as compat_html_parser
9f37a959 69except ImportError: # Python 2
59ae15a5 70 import HTMLParser as compat_html_parser
a8156c1d 71
348d0a7a 72try:
59ae15a5 73 import http.client as compat_http_client
9f37a959 74except ImportError: # Python 2
59ae15a5 75 import httplib as compat_http_client
348d0a7a 76
2eabb802 77try:
0e283428 78 from urllib.error import HTTPError as compat_HTTPError
2eabb802
PH
79except ImportError: # Python 2
80 from urllib2 import HTTPError as compat_HTTPError
81
e0df6211
PH
82try:
83 from urllib.request import urlretrieve as compat_urlretrieve
84except ImportError: # Python 2
85 from urllib import urlretrieve as compat_urlretrieve
86
87
5910e210
PH
88try:
89 from subprocess import DEVNULL
90 compat_subprocess_get_DEVNULL = lambda: DEVNULL
91except ImportError:
92 compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
93
9f37a959 94try:
f1f725c6
PH
95 from urllib.parse import unquote as compat_urllib_parse_unquote
96except ImportError:
97 def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
59ae15a5
PH
98 if string == '':
99 return string
100 res = string.split('%')
101 if len(res) == 1:
102 return string
103 if encoding is None:
104 encoding = 'utf-8'
105 if errors is None:
106 errors = 'replace'
107 # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
108 pct_sequence = b''
109 string = res[0]
110 for item in res[1:]:
111 try:
112 if not item:
113 raise ValueError
114 pct_sequence += item[:2].decode('hex')
115 rest = item[2:]
116 if not rest:
117 # This segment was just a single percent-encoded character.
118 # May be part of a sequence of code units, so delay decoding.
119 # (Stored in pct_sequence).
120 continue
121 except ValueError:
122 rest = '%' + item
123 # Encountered non-percent-encoded characters. Flush the current
124 # pct_sequence.
125 string += pct_sequence.decode(encoding, errors) + rest
126 pct_sequence = b''
127 if pct_sequence:
128 # Flush the final pct_sequence
129 string += pct_sequence.decode(encoding, errors)
130 return string
131
f1f725c6
PH
132
133try:
134 from urllib.parse import parse_qs as compat_parse_qs
135except ImportError: # Python 2
136 # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
137 # Python 2's version is apparently totally broken
138
59ae15a5
PH
139 def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
140 encoding='utf-8', errors='replace'):
141 qs, _coerce_result = qs, unicode
142 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
143 r = []
144 for name_value in pairs:
145 if not name_value and not strict_parsing:
146 continue
147 nv = name_value.split('=', 1)
148 if len(nv) != 2:
149 if strict_parsing:
150 raise ValueError("bad query field: %r" % (name_value,))
151 # Handle case of a control-name with no equal sign
152 if keep_blank_values:
153 nv.append('')
154 else:
155 continue
156 if len(nv[1]) or keep_blank_values:
157 name = nv[0].replace('+', ' ')
f1f725c6
PH
158 name = compat_urllib_parse_unquote(
159 name, encoding=encoding, errors=errors)
59ae15a5
PH
160 name = _coerce_result(name)
161 value = nv[1].replace('+', ' ')
f1f725c6
PH
162 value = compat_urllib_parse_unquote(
163 value, encoding=encoding, errors=errors)
59ae15a5
PH
164 value = _coerce_result(value)
165 r.append((name, value))
166 return r
167
168 def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
169 encoding='utf-8', errors='replace'):
170 parsed_result = {}
171 pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
172 encoding=encoding, errors=errors)
173 for name, value in pairs:
174 if name in parsed_result:
175 parsed_result[name].append(value)
176 else:
177 parsed_result[name] = [value]
178 return parsed_result
348d0a7a 179
3e669f36 180try:
59ae15a5 181 compat_str = unicode # Python 2
3e669f36 182except NameError:
59ae15a5 183 compat_str = str
3e669f36
PH
184
185try:
59ae15a5 186 compat_chr = unichr # Python 2
3e669f36 187except NameError:
59ae15a5 188 compat_chr = chr
3e669f36 189
f7300c5c
JMF
190try:
191 from xml.etree.ElementTree import ParseError as compat_xml_parse_error
192except ImportError: # Python 2.6
193 from xml.parsers.expat import ExpatError as compat_xml_parse_error
194
8d31fa3c
PH
195try:
196 from shlex import quote as shlex_quote
197except ImportError: # Python < 3.3
198 def shlex_quote(s):
199 return "'" + s.replace("'", "'\"'\"'") + "'"
200
201
b31756c1
FV
202def compat_ord(c):
203 if type(c) is int: return c
204 else: return ord(c)
205
468e2e92
FV
206# This is not clearly defined otherwise
207compiled_regex_type = type(re.compile(''))
208
3e669f36 209std_headers = {
ae8f7871 210 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
59ae15a5
PH
211 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
212 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
213 'Accept-Encoding': 'gzip, deflate',
214 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 215}
f427df17 216
d77c3dfd 217def preferredencoding():
59ae15a5 218 """Get preferred encoding.
d77c3dfd 219
59ae15a5
PH
220 Returns the best encoding scheme for the system, based on
221 locale.getpreferredencoding() and some further tweaks.
222 """
223 try:
224 pref = locale.getpreferredencoding()
225 u'TEST'.encode(pref)
226 except:
227 pref = 'UTF-8'
bae611f2 228
59ae15a5 229 return pref
d77c3dfd 230
8cd10ac4 231if sys.version_info < (3,0):
59ae15a5
PH
232 def compat_print(s):
233 print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
8cd10ac4 234else:
59ae15a5
PH
235 def compat_print(s):
236 assert type(s) == type(u'')
237 print(s)
d77c3dfd 238
f4bfd65f 239
181c8655
PH
240def write_json_file(obj, fn):
241 """ Encode obj as JSON and write it to fn, atomically """
242
73159f99
S
243 args = {
244 'suffix': '.tmp',
245 'prefix': os.path.basename(fn) + '.',
246 'dir': os.path.dirname(fn),
247 'delete': False,
248 }
249
181c8655
PH
250 # In Python 2.x, json.dump expects a bytestream.
251 # In Python 3.x, it writes to a character stream
252 if sys.version_info < (3, 0):
73159f99 253 args['mode'] = 'wb'
181c8655 254 else:
73159f99
S
255 args.update({
256 'mode': 'w',
257 'encoding': 'utf-8',
258 })
259
260 tf = tempfile.NamedTemporaryFile(**args)
181c8655
PH
261
262 try:
263 with tf:
264 json.dump(obj, tf)
265 os.rename(tf.name, fn)
266 except:
267 try:
268 os.remove(tf.name)
269 except OSError:
270 pass
271 raise
272
273
274if sys.version_info >= (2, 7):
59ae56fa
PH
275 def find_xpath_attr(node, xpath, key, val):
276 """ Find the xpath xpath[@key=val] """
cbf915f3
PH
277 assert re.match(r'^[a-zA-Z-]+$', key)
278 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
59ae56fa
PH
279 expr = xpath + u"[@%s='%s']" % (key, val)
280 return node.find(expr)
281else:
282 def find_xpath_attr(node, xpath, key, val):
283 for f in node.findall(xpath):
284 if f.attrib.get(key) == val:
285 return f
286 return None
287
d7e66d39
JMF
288# On python2.6 the xml.etree.ElementTree.Element methods don't support
289# the namespace parameter
290def xpath_with_ns(path, ns_map):
291 components = [c.split(':') for c in path.split('/')]
292 replaced = []
293 for c in components:
294 if len(c) == 1:
295 replaced.append(c[0])
296 else:
297 ns, tag = c
298 replaced.append('{%s}%s' % (ns_map[ns], tag))
299 return '/'.join(replaced)
300
d77c3dfd 301
a8156c1d 302compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
a921f407
JMF
303class BaseHTMLParser(compat_html_parser.HTMLParser):
304 def __init(self):
305 compat_html_parser.HTMLParser.__init__(self)
306 self.html = None
307
308 def loads(self, html):
309 self.html = html
310 self.feed(html)
311 self.close()
312
313class AttrParser(BaseHTMLParser):
43e8fafd
ND
314 """Modified HTMLParser that isolates a tag with the specified attribute"""
315 def __init__(self, attribute, value):
316 self.attribute = attribute
317 self.value = value
59ae15a5
PH
318 self.result = None
319 self.started = False
320 self.depth = {}
59ae15a5
PH
321 self.watch_startpos = False
322 self.error_count = 0
a921f407 323 BaseHTMLParser.__init__(self)
59ae15a5
PH
324
325 def error(self, message):
326 if self.error_count > 10 or self.started:
327 raise compat_html_parser.HTMLParseError(message, self.getpos())
328 self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
329 self.error_count += 1
330 self.goahead(1)
331
59ae15a5
PH
332 def handle_starttag(self, tag, attrs):
333 attrs = dict(attrs)
334 if self.started:
335 self.find_startpos(None)
43e8fafd 336 if self.attribute in attrs and attrs[self.attribute] == self.value:
59ae15a5
PH
337 self.result = [tag]
338 self.started = True
339 self.watch_startpos = True
340 if self.started:
341 if not tag in self.depth: self.depth[tag] = 0
342 self.depth[tag] += 1
343
344 def handle_endtag(self, tag):
345 if self.started:
346 if tag in self.depth: self.depth[tag] -= 1
347 if self.depth[self.result[0]] == 0:
348 self.started = False
349 self.result.append(self.getpos())
350
351 def find_startpos(self, x):
352 """Needed to put the start position of the result (self.result[1])
353 after the opening tag with the requested id"""
354 if self.watch_startpos:
355 self.watch_startpos = False
356 self.result.append(self.getpos())
357 handle_entityref = handle_charref = handle_data = handle_comment = \
358 handle_decl = handle_pi = unknown_decl = find_startpos
359
360 def get_result(self):
361 if self.result is None:
362 return None
363 if len(self.result) != 3:
364 return None
365 lines = self.html.split('\n')
366 lines = lines[self.result[1][0]-1:self.result[2][0]]
367 lines[0] = lines[0][self.result[1][1]:]
368 if len(lines) == 1:
369 lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
370 lines[-1] = lines[-1][:self.result[2][1]]
371 return '\n'.join(lines).strip()
3b024e17
PH
372# Hack for https://github.com/rg3/youtube-dl/issues/662
373if sys.version_info < (2, 7, 3):
374 AttrParser.parse_endtag = (lambda self, i:
375 i + len("</scr'+'ipt>")
376 if self.rawdata[i:].startswith("</scr'+'ipt>")
377 else compat_html_parser.HTMLParser.parse_endtag(self, i))
9e6dd238
FV
378
379def get_element_by_id(id, html):
43e8fafd
ND
380 """Return the content of the tag with the specified ID in the passed HTML document"""
381 return get_element_by_attribute("id", id, html)
382
383def get_element_by_attribute(attribute, value, html):
384 """Return the content of the tag with the specified attribute in the passed HTML document"""
385 parser = AttrParser(attribute, value)
59ae15a5
PH
386 try:
387 parser.loads(html)
388 except compat_html_parser.HTMLParseError:
389 pass
390 return parser.get_result()
9e6dd238 391
a921f407
JMF
392class MetaParser(BaseHTMLParser):
393 """
394 Modified HTMLParser that isolates a meta tag with the specified name
395 attribute.
396 """
397 def __init__(self, name):
398 BaseHTMLParser.__init__(self)
399 self.name = name
400 self.content = None
401 self.result = None
402
403 def handle_starttag(self, tag, attrs):
404 if tag != 'meta':
405 return
406 attrs = dict(attrs)
407 if attrs.get('name') == self.name:
408 self.result = attrs.get('content')
409
410 def get_result(self):
411 return self.result
412
413def get_meta_content(name, html):
414 """
415 Return the content attribute from the meta tag with the given name attribute.
416 """
417 parser = MetaParser(name)
418 try:
419 parser.loads(html)
420 except compat_html_parser.HTMLParseError:
421 pass
422 return parser.get_result()
423
9e6dd238
FV
424
425def clean_html(html):
59ae15a5
PH
426 """Clean an HTML snippet into a readable string"""
427 # Newline vs <br />
428 html = html.replace('\n', ' ')
6b3aef80
FV
429 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
430 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
431 # Strip html tags
432 html = re.sub('<.*?>', '', html)
433 # Replace html entities
434 html = unescapeHTML(html)
7decf895 435 return html.strip()
9e6dd238
FV
436
437
d77c3dfd 438def sanitize_open(filename, open_mode):
59ae15a5
PH
439 """Try to open the given filename, and slightly tweak it if this fails.
440
441 Attempts to open the given filename. If this fails, it tries to change
442 the filename slightly, step by step, until it's either able to open it
443 or it fails and raises a final exception, like the standard open()
444 function.
445
446 It returns the tuple (stream, definitive_file_name).
447 """
448 try:
449 if filename == u'-':
450 if sys.platform == 'win32':
451 import msvcrt
452 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 453 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
454 stream = open(encodeFilename(filename), open_mode)
455 return (stream, filename)
456 except (IOError, OSError) as err:
f45c185f
PH
457 if err.errno in (errno.EACCES,):
458 raise
59ae15a5 459
f45c185f
PH
460 # In case of error, try to remove win32 forbidden chars
461 alt_filename = os.path.join(
462 re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
463 for path_part in os.path.split(filename)
464 )
465 if alt_filename == filename:
466 raise
467 else:
468 # An exception here should be caught in the caller
469 stream = open(encodeFilename(filename), open_mode)
470 return (stream, alt_filename)
d77c3dfd
FV
471
472
473def timeconvert(timestr):
59ae15a5
PH
474 """Convert RFC 2822 defined time string into system timestamp"""
475 timestamp = None
476 timetuple = email.utils.parsedate_tz(timestr)
477 if timetuple is not None:
478 timestamp = email.utils.mktime_tz(timetuple)
479 return timestamp
1c469a94 480
796173d0 481def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
482 """Sanitizes a string so it could be used as part of a filename.
483 If restricted is set, use a stricter subset of allowed characters.
796173d0 484 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
485 """
486 def replace_insane(char):
487 if char == '?' or ord(char) < 32 or ord(char) == 127:
488 return ''
489 elif char == '"':
490 return '' if restricted else '\''
491 elif char == ':':
492 return '_-' if restricted else ' -'
493 elif char in '\\/|*<>':
494 return '_'
627dcfff 495 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
496 return '_'
497 if restricted and ord(char) > 127:
498 return '_'
499 return char
500
501 result = u''.join(map(replace_insane, s))
796173d0
PH
502 if not is_id:
503 while '__' in result:
504 result = result.replace('__', '_')
505 result = result.strip('_')
506 # Common case of "Foreign band name - English song title"
507 if restricted and result.startswith('-_'):
508 result = result[2:]
509 if not result:
510 result = '_'
59ae15a5 511 return result
d77c3dfd
FV
512
513def orderedSet(iterable):
59ae15a5
PH
514 """ Remove all duplicates from the input iterable """
515 res = []
516 for el in iterable:
517 if el not in res:
518 res.append(el)
519 return res
d77c3dfd 520
912b38b4 521
4e408e47
PH
522def _htmlentity_transform(entity):
523 """Transforms an HTML entity to a character."""
524 # Known non-numeric HTML entity
525 if entity in compat_html_entities.name2codepoint:
526 return compat_chr(compat_html_entities.name2codepoint[entity])
527
528 mobj = re.match(r'#(x?[0-9]+)', entity)
529 if mobj is not None:
530 numstr = mobj.group(1)
531 if numstr.startswith(u'x'):
532 base = 16
533 numstr = u'0%s' % numstr
534 else:
535 base = 10
536 return compat_chr(int(numstr, base))
537
538 # Unknown entity in name, return its literal representation
539 return (u'&%s;' % entity)
540
541
d77c3dfd 542def unescapeHTML(s):
912b38b4
PH
543 if s is None:
544 return None
545 assert type(s) == compat_str
d77c3dfd 546
4e408e47
PH
547 return re.sub(
548 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 549
8bf48f23
PH
550
551def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
552 """
553 @param s The name of the file
554 """
d77c3dfd 555
8bf48f23 556 assert type(s) == compat_str
d77c3dfd 557
59ae15a5
PH
558 # Python 3 has a Unicode API
559 if sys.version_info >= (3, 0):
560 return s
0f00efed 561
59ae15a5
PH
562 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
563 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
564 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
565 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
8bf48f23
PH
566 if not for_subprocess:
567 return s
568 else:
569 # For subprocess calls, encode with locale encoding
570 # Refer to http://stackoverflow.com/a/9951851/35070
571 encoding = preferredencoding()
59ae15a5 572 else:
6df40dcb 573 encoding = sys.getfilesystemencoding()
8bf48f23
PH
574 if encoding is None:
575 encoding = 'utf-8'
576 return s.encode(encoding, 'ignore')
577
f07b74fc
PH
578
579def encodeArgument(s):
580 if not isinstance(s, compat_str):
581 # Legacy code that uses byte strings
582 # Uncomment the following line after fixing all post processors
583 #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
584 s = s.decode('ascii')
585 return encodeFilename(s, True)
586
587
8271226a
PH
588def decodeOption(optval):
589 if optval is None:
590 return optval
591 if isinstance(optval, bytes):
592 optval = optval.decode(preferredencoding())
593
594 assert isinstance(optval, compat_str)
595 return optval
1c256f70 596
4539dd30
PH
597def formatSeconds(secs):
598 if secs > 3600:
599 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
600 elif secs > 60:
601 return '%d:%02d' % (secs // 60, secs % 60)
602 else:
603 return '%d' % secs
604
a0ddb8a2
PH
605
606def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
13ebea79
PH
607 if sys.version_info < (3, 2):
608 import httplib
609
610 class HTTPSConnectionV3(httplib.HTTPSConnection):
611 def __init__(self, *args, **kwargs):
612 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
613
614 def connect(self):
615 sock = socket.create_connection((self.host, self.port), self.timeout)
ac79fa02 616 if getattr(self, '_tunnel_host', False):
13ebea79
PH
617 self.sock = sock
618 self._tunnel()
619 try:
620 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
de79c46c 621 except ssl.SSLError:
13ebea79
PH
622 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
623
624 class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
625 def https_open(self, req):
626 return self.do_open(HTTPSConnectionV3, req)
a0ddb8a2 627 return HTTPSHandlerV3(**kwargs)
ea6d901e 628 else:
13ebea79 629 context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
ea6d901e 630 context.verify_mode = (ssl.CERT_NONE
dca08720 631 if opts_no_check_certificate
ea6d901e 632 else ssl.CERT_REQUIRED)
303b479e
PH
633 context.set_default_verify_paths()
634 try:
635 context.load_default_certs()
636 except AttributeError:
637 pass # Python < 3.4
a0ddb8a2 638 return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
ea6d901e 639
1c256f70
PH
640class ExtractorError(Exception):
641 """Error during info extraction."""
d11271dd 642 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
643 """ tb, if given, is the original traceback (so that it can be printed out).
644 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
645 """
646
647 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
648 expected = True
d11271dd
PH
649 if video_id is not None:
650 msg = video_id + ': ' + msg
9a82b238 651 if not expected:
298f833b 652 msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
1c256f70 653 super(ExtractorError, self).__init__(msg)
d5979c5d 654
1c256f70 655 self.traceback = tb
8cc83b8d 656 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 657 self.cause = cause
d11271dd 658 self.video_id = video_id
1c256f70 659
01951dda
PH
660 def format_traceback(self):
661 if self.traceback is None:
662 return None
663 return u''.join(traceback.format_tb(self.traceback))
664
1c256f70 665
55b3e45b
JMF
666class RegexNotFoundError(ExtractorError):
667 """Error when a regex didn't match"""
668 pass
669
670
d77c3dfd 671class DownloadError(Exception):
59ae15a5 672 """Download Error exception.
d77c3dfd 673
59ae15a5
PH
674 This exception may be thrown by FileDownloader objects if they are not
675 configured to continue on errors. They will contain the appropriate
676 error message.
677 """
8cc83b8d
FV
678 def __init__(self, msg, exc_info=None):
679 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
680 super(DownloadError, self).__init__(msg)
681 self.exc_info = exc_info
d77c3dfd
FV
682
683
684class SameFileError(Exception):
59ae15a5 685 """Same File exception.
d77c3dfd 686
59ae15a5
PH
687 This exception will be thrown by FileDownloader objects if they detect
688 multiple files would have to be downloaded to the same file on disk.
689 """
690 pass
d77c3dfd
FV
691
692
693class PostProcessingError(Exception):
59ae15a5 694 """Post Processing exception.
d77c3dfd 695
59ae15a5
PH
696 This exception may be raised by PostProcessor's .run() method to
697 indicate an error in the postprocessing task.
698 """
7851b379
PH
699 def __init__(self, msg):
700 self.msg = msg
d77c3dfd
FV
701
702class MaxDownloadsReached(Exception):
59ae15a5
PH
703 """ --max-downloads limit has been reached. """
704 pass
d77c3dfd
FV
705
706
707class UnavailableVideoError(Exception):
59ae15a5 708 """Unavailable Format exception.
d77c3dfd 709
59ae15a5
PH
710 This exception will be thrown when a video is requested
711 in a format that is not available for that video.
712 """
713 pass
d77c3dfd
FV
714
715
716class ContentTooShortError(Exception):
59ae15a5 717 """Content Too Short exception.
d77c3dfd 718
59ae15a5
PH
719 This exception may be raised by FileDownloader objects when a file they
720 download is too small for what the server announced first, indicating
721 the connection was probably interrupted.
722 """
723 # Both in bytes
724 downloaded = None
725 expected = None
d77c3dfd 726
59ae15a5
PH
727 def __init__(self, downloaded, expected):
728 self.downloaded = downloaded
729 self.expected = expected
d77c3dfd 730
acebc9cd 731class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
732 """Handler for HTTP requests and responses.
733
734 This class, when installed with an OpenerDirector, automatically adds
735 the standard headers to every HTTP request and handles gzipped and
736 deflated responses from web servers. If compression is to be avoided in
737 a particular request, the original request in the program code only has
738 to include the HTTP header "Youtubedl-No-Compression", which will be
739 removed before making the real request.
740
741 Part of this code was copied from:
742
743 http://techknack.net/python-urllib2-handlers/
744
745 Andrew Rowls, the author of that code, agreed to release it to the
746 public domain.
747 """
748
749 @staticmethod
750 def deflate(data):
751 try:
752 return zlib.decompress(data, -zlib.MAX_WBITS)
753 except zlib.error:
754 return zlib.decompress(data)
755
756 @staticmethod
757 def addinfourl_wrapper(stream, headers, url, code):
758 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
759 return compat_urllib_request.addinfourl(stream, headers, url, code)
760 ret = compat_urllib_request.addinfourl(stream, headers, url)
761 ret.code = code
762 return ret
763
acebc9cd 764 def http_request(self, req):
33ac271b
PH
765 for h, v in std_headers.items():
766 if h not in req.headers:
767 req.add_header(h, v)
59ae15a5
PH
768 if 'Youtubedl-no-compression' in req.headers:
769 if 'Accept-encoding' in req.headers:
770 del req.headers['Accept-encoding']
771 del req.headers['Youtubedl-no-compression']
3446dfb7 772 if 'Youtubedl-user-agent' in req.headers:
335959e7
PH
773 if 'User-agent' in req.headers:
774 del req.headers['User-agent']
775 req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
3446dfb7 776 del req.headers['Youtubedl-user-agent']
59ae15a5
PH
777 return req
778
acebc9cd 779 def http_response(self, req, resp):
59ae15a5
PH
780 old_resp = resp
781 # gzip
782 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
783 content = resp.read()
784 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
785 try:
786 uncompressed = io.BytesIO(gz.read())
787 except IOError as original_ioerror:
788 # There may be junk add the end of the file
789 # See http://stackoverflow.com/q/4928560/35070 for details
790 for i in range(1, 1024):
791 try:
792 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
793 uncompressed = io.BytesIO(gz.read())
794 except IOError:
795 continue
796 break
797 else:
798 raise original_ioerror
799 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5
PH
800 resp.msg = old_resp.msg
801 # deflate
802 if resp.headers.get('Content-encoding', '') == 'deflate':
803 gz = io.BytesIO(self.deflate(resp.read()))
804 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
805 resp.msg = old_resp.msg
806 return resp
0f8d03f8 807
acebc9cd
PH
808 https_request = http_request
809 https_response = http_response
bf50b038 810
5de90176 811
305d0683 812def parse_iso8601(date_str, delimiter='T'):
912b38b4
PH
813 """ Return a UNIX timestamp from the given date """
814
815 if date_str is None:
816 return None
817
818 m = re.search(
819 r'Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$',
820 date_str)
821 if not m:
822 timezone = datetime.timedelta()
823 else:
824 date_str = date_str[:-len(m.group(0))]
825 if not m.group('sign'):
826 timezone = datetime.timedelta()
827 else:
828 sign = 1 if m.group('sign') == '+' else -1
829 timezone = datetime.timedelta(
830 hours=sign * int(m.group('hours')),
831 minutes=sign * int(m.group('minutes')))
305d0683
TB
832 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
833 dt = datetime.datetime.strptime(date_str, date_format) - timezone
912b38b4
PH
834 return calendar.timegm(dt.timetuple())
835
836
bf50b038
JMF
837def unified_strdate(date_str):
838 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
839
840 if date_str is None:
841 return None
842
bf50b038
JMF
843 upload_date = None
844 #Replace commas
026fcc04 845 date_str = date_str.replace(',', ' ')
bf50b038 846 # %z (UTC offset) is only supported in python>=3.2
026fcc04 847 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
19e1d359
JMF
848 format_expressions = [
849 '%d %B %Y',
0f99566c 850 '%d %b %Y',
19e1d359
JMF
851 '%B %d %Y',
852 '%b %d %Y',
78ff59d0
PP
853 '%b %dst %Y %I:%M%p',
854 '%b %dnd %Y %I:%M%p',
855 '%b %dth %Y %I:%M%p',
19e1d359 856 '%Y-%m-%d',
fe556f1b 857 '%Y/%m/%d',
4cf96546 858 '%d.%m.%Y',
19e1d359 859 '%d/%m/%Y',
423817c4 860 '%d/%m/%y',
19e1d359 861 '%Y/%m/%d %H:%M:%S',
5d73273f 862 '%Y-%m-%d %H:%M:%S',
19e1d359 863 '%d.%m.%Y %H:%M',
b047de6f 864 '%d.%m.%Y %H.%M',
19e1d359 865 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
866 '%Y-%m-%dT%H:%M:%S.%fZ',
867 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 868 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 869 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 870 '%Y-%m-%dT%H:%M',
19e1d359 871 ]
bf50b038
JMF
872 for expression in format_expressions:
873 try:
874 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 875 except ValueError:
bf50b038 876 pass
42393ce2
PH
877 if upload_date is None:
878 timetuple = email.utils.parsedate_tz(date_str)
879 if timetuple:
880 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
bf50b038
JMF
881 return upload_date
882
cbdbb766 883def determine_ext(url, default_ext=u'unknown_video'):
f4776371
S
884 if url is None:
885 return default_ext
73e79f2a
PH
886 guess = url.partition(u'?')[0].rpartition(u'.')[2]
887 if re.match(r'^[A-Za-z0-9]+$', guess):
888 return guess
889 else:
cbdbb766 890 return default_ext
73e79f2a 891
d4051a8e
JMF
892def subtitles_filename(filename, sub_lang, sub_format):
893 return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
894
bd558525 895def date_from_str(date_str):
37254abc
JMF
896 """
897 Return a datetime object from a string in the format YYYYMMDD or
898 (now|today)[+-][0-9](day|week|month|year)(s)?"""
899 today = datetime.date.today()
900 if date_str == 'now'or date_str == 'today':
901 return today
902 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
903 if match is not None:
904 sign = match.group('sign')
905 time = int(match.group('time'))
906 if sign == '-':
907 time = -time
908 unit = match.group('unit')
909 #A bad aproximation?
910 if unit == 'month':
911 unit = 'day'
912 time *= 30
913 elif unit == 'year':
914 unit = 'day'
915 time *= 365
916 unit += 's'
917 delta = datetime.timedelta(**{unit: time})
918 return today + delta
bd558525
JMF
919 return datetime.datetime.strptime(date_str, "%Y%m%d").date()
920
e63fc1be 921def hyphenate_date(date_str):
922 """
923 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
924 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
925 if match is not None:
926 return '-'.join(match.groups())
927 else:
928 return date_str
929
bd558525
JMF
930class DateRange(object):
931 """Represents a time interval between two dates"""
932 def __init__(self, start=None, end=None):
933 """start and end must be strings in the format accepted by date"""
934 if start is not None:
935 self.start = date_from_str(start)
936 else:
937 self.start = datetime.datetime.min.date()
938 if end is not None:
939 self.end = date_from_str(end)
940 else:
941 self.end = datetime.datetime.max.date()
37254abc 942 if self.start > self.end:
bd558525
JMF
943 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
944 @classmethod
945 def day(cls, day):
946 """Returns a range that only contains the given day"""
947 return cls(day,day)
948 def __contains__(self, date):
949 """Check if the date is in the range"""
37254abc
JMF
950 if not isinstance(date, datetime.date):
951 date = date_from_str(date)
952 return self.start <= date <= self.end
bd558525
JMF
953 def __str__(self):
954 return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
c496ca96
PH
955
956
957def platform_name():
958 """ Returns the platform name as a compat_str """
959 res = platform.platform()
960 if isinstance(res, bytes):
961 res = res.decode(preferredencoding())
962
963 assert isinstance(res, compat_str)
964 return res
c257baff
PH
965
966
b58ddb32
PH
967def _windows_write_string(s, out):
968 """ Returns True if the string was written using special methods,
969 False if it has yet to be written out."""
970 # Adapted from http://stackoverflow.com/a/3259271/35070
971
972 import ctypes
973 import ctypes.wintypes
974
975 WIN_OUTPUT_IDS = {
976 1: -11,
977 2: -12,
978 }
979
a383a98a
PH
980 try:
981 fileno = out.fileno()
982 except AttributeError:
983 # If the output stream doesn't have a fileno, it's virtual
984 return False
b58ddb32
PH
985 if fileno not in WIN_OUTPUT_IDS:
986 return False
987
988 GetStdHandle = ctypes.WINFUNCTYPE(
989 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
990 ("GetStdHandle", ctypes.windll.kernel32))
991 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
992
993 WriteConsoleW = ctypes.WINFUNCTYPE(
994 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
995 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
996 ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
997 written = ctypes.wintypes.DWORD(0)
998
999 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
1000 FILE_TYPE_CHAR = 0x0002
1001 FILE_TYPE_REMOTE = 0x8000
1002 GetConsoleMode = ctypes.WINFUNCTYPE(
1003 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1004 ctypes.POINTER(ctypes.wintypes.DWORD))(
1005 ("GetConsoleMode", ctypes.windll.kernel32))
1006 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1007
1008 def not_a_console(handle):
1009 if handle == INVALID_HANDLE_VALUE or handle is None:
1010 return True
1011 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
1012 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
1013
1014 if not_a_console(h):
1015 return False
1016
d1b9c912
PH
1017 def next_nonbmp_pos(s):
1018 try:
1019 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1020 except StopIteration:
1021 return len(s)
1022
1023 while s:
1024 count = min(next_nonbmp_pos(s), 1024)
1025
b58ddb32 1026 ret = WriteConsoleW(
d1b9c912 1027 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1028 if ret == 0:
1029 raise OSError('Failed to write string')
d1b9c912
PH
1030 if not count: # We just wrote a non-BMP character
1031 assert written.value == 2
1032 s = s[1:]
1033 else:
1034 assert written.value > 0
1035 s = s[written.value:]
b58ddb32
PH
1036 return True
1037
1038
734f90bb 1039def write_string(s, out=None, encoding=None):
7459e3a2
PH
1040 if out is None:
1041 out = sys.stderr
8bf48f23 1042 assert type(s) == compat_str
7459e3a2 1043
b58ddb32
PH
1044 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1045 if _windows_write_string(s, out):
1046 return
1047
7459e3a2
PH
1048 if ('b' in getattr(out, 'mode', '') or
1049 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1050 byt = s.encode(encoding or preferredencoding(), 'ignore')
1051 out.write(byt)
1052 elif hasattr(out, 'buffer'):
1053 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1054 byt = s.encode(enc, 'ignore')
1055 out.buffer.write(byt)
1056 else:
8bf48f23 1057 out.write(s)
7459e3a2
PH
1058 out.flush()
1059
1060
48ea9cea
PH
1061def bytes_to_intlist(bs):
1062 if not bs:
1063 return []
1064 if isinstance(bs[0], int): # Python 3
1065 return list(bs)
1066 else:
1067 return [ord(c) for c in bs]
1068
c257baff 1069
cba892fa 1070def intlist_to_bytes(xs):
1071 if not xs:
1072 return b''
1073 if isinstance(chr(0), bytes): # Python 2
1074 return ''.join([chr(x) for x in xs])
1075 else:
1076 return bytes(xs)
c38b1e77
PH
1077
1078
1079def get_cachedir(params={}):
1080 cache_root = os.environ.get('XDG_CACHE_HOME',
1081 os.path.expanduser('~/.cache'))
1082 return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
c1c9a79c
PH
1083
1084
1085# Cross-platform file locking
1086if sys.platform == 'win32':
1087 import ctypes.wintypes
1088 import msvcrt
1089
1090 class OVERLAPPED(ctypes.Structure):
1091 _fields_ = [
1092 ('Internal', ctypes.wintypes.LPVOID),
1093 ('InternalHigh', ctypes.wintypes.LPVOID),
1094 ('Offset', ctypes.wintypes.DWORD),
1095 ('OffsetHigh', ctypes.wintypes.DWORD),
1096 ('hEvent', ctypes.wintypes.HANDLE),
1097 ]
1098
1099 kernel32 = ctypes.windll.kernel32
1100 LockFileEx = kernel32.LockFileEx
1101 LockFileEx.argtypes = [
1102 ctypes.wintypes.HANDLE, # hFile
1103 ctypes.wintypes.DWORD, # dwFlags
1104 ctypes.wintypes.DWORD, # dwReserved
1105 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1106 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1107 ctypes.POINTER(OVERLAPPED) # Overlapped
1108 ]
1109 LockFileEx.restype = ctypes.wintypes.BOOL
1110 UnlockFileEx = kernel32.UnlockFileEx
1111 UnlockFileEx.argtypes = [
1112 ctypes.wintypes.HANDLE, # hFile
1113 ctypes.wintypes.DWORD, # dwReserved
1114 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1115 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1116 ctypes.POINTER(OVERLAPPED) # Overlapped
1117 ]
1118 UnlockFileEx.restype = ctypes.wintypes.BOOL
1119 whole_low = 0xffffffff
1120 whole_high = 0x7fffffff
1121
1122 def _lock_file(f, exclusive):
1123 overlapped = OVERLAPPED()
1124 overlapped.Offset = 0
1125 overlapped.OffsetHigh = 0
1126 overlapped.hEvent = 0
1127 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1128 handle = msvcrt.get_osfhandle(f.fileno())
1129 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1130 whole_low, whole_high, f._lock_file_overlapped_p):
1131 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1132
1133 def _unlock_file(f):
1134 assert f._lock_file_overlapped_p
1135 handle = msvcrt.get_osfhandle(f.fileno())
1136 if not UnlockFileEx(handle, 0,
1137 whole_low, whole_high, f._lock_file_overlapped_p):
1138 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1139
1140else:
1141 import fcntl
1142
1143 def _lock_file(f, exclusive):
1144 fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1145
1146 def _unlock_file(f):
1147 fcntl.lockf(f, fcntl.LOCK_UN)
1148
1149
1150class locked_file(object):
1151 def __init__(self, filename, mode, encoding=None):
1152 assert mode in ['r', 'a', 'w']
1153 self.f = io.open(filename, mode, encoding=encoding)
1154 self.mode = mode
1155
1156 def __enter__(self):
1157 exclusive = self.mode != 'r'
1158 try:
1159 _lock_file(self.f, exclusive)
1160 except IOError:
1161 self.f.close()
1162 raise
1163 return self
1164
1165 def __exit__(self, etype, value, traceback):
1166 try:
1167 _unlock_file(self.f)
1168 finally:
1169 self.f.close()
1170
1171 def __iter__(self):
1172 return iter(self.f)
1173
1174 def write(self, *args):
1175 return self.f.write(*args)
1176
1177 def read(self, *args):
1178 return self.f.read(*args)
4eb7f1d1
JMF
1179
1180
1181def shell_quote(args):
a6a173c2
JMF
1182 quoted_args = []
1183 encoding = sys.getfilesystemencoding()
1184 if encoding is None:
1185 encoding = 'utf-8'
1186 for a in args:
1187 if isinstance(a, bytes):
1188 # We may get a filename encoded with 'encodeFilename'
1189 a = a.decode(encoding)
1190 quoted_args.append(pipes.quote(a))
1191 return u' '.join(quoted_args)
9d4660ca
PH
1192
1193
f4d96df0
PH
1194def takewhile_inclusive(pred, seq):
1195 """ Like itertools.takewhile, but include the latest evaluated element
1196 (the first element so that Not pred(e)) """
1197 for e in seq:
1198 yield e
1199 if not pred(e):
1200 return
1201
1202
9d4660ca
PH
1203def smuggle_url(url, data):
1204 """ Pass additional data in a URL for internal use. """
1205
1206 sdata = compat_urllib_parse.urlencode(
1207 {u'__youtubedl_smuggle': json.dumps(data)})
1208 return url + u'#' + sdata
1209
1210
79f82953 1211def unsmuggle_url(smug_url, default=None):
9d4660ca 1212 if not '#__youtubedl_smuggle' in smug_url:
79f82953 1213 return smug_url, default
9d4660ca
PH
1214 url, _, sdata = smug_url.rpartition(u'#')
1215 jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
1216 data = json.loads(jsond)
1217 return url, data
02dbf93f
PH
1218
1219
02dbf93f
PH
1220def format_bytes(bytes):
1221 if bytes is None:
1222 return u'N/A'
1223 if type(bytes) is str:
1224 bytes = float(bytes)
1225 if bytes == 0.0:
1226 exponent = 0
1227 else:
1228 exponent = int(math.log(bytes, 1024.0))
1229 suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1230 converted = float(bytes) / float(1024 ** exponent)
1231 return u'%.2f%s' % (converted, suffix)
f53c966a 1232
1c088fa8 1233
1c088fa8
PH
1234def get_term_width():
1235 columns = os.environ.get('COLUMNS', None)
1236 if columns:
1237 return int(columns)
1238
1239 try:
1240 sp = subprocess.Popen(
1241 ['stty', 'size'],
1242 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1243 out, err = sp.communicate()
1244 return int(out.split()[1])
1245 except:
1246 pass
1247 return None
caefb1de
PH
1248
1249
1250def month_by_name(name):
1251 """ Return the number of a month by (locale-independently) English name """
1252
1253 ENGLISH_NAMES = [
dadb8184 1254 u'January', u'February', u'March', u'April', u'May', u'June',
caefb1de
PH
1255 u'July', u'August', u'September', u'October', u'November', u'December']
1256 try:
1257 return ENGLISH_NAMES.index(name) + 1
1258 except ValueError:
1259 return None
18258362
JMF
1260
1261
5aafe895 1262def fix_xml_ampersands(xml_str):
18258362 1263 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1264 return re.sub(
1265 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1266 u'&amp;',
1267 xml_str)
e3946f98
PH
1268
1269
1270def setproctitle(title):
8bf48f23 1271 assert isinstance(title, compat_str)
e3946f98
PH
1272 try:
1273 libc = ctypes.cdll.LoadLibrary("libc.so.6")
1274 except OSError:
1275 return
6eefe533
PH
1276 title_bytes = title.encode('utf-8')
1277 buf = ctypes.create_string_buffer(len(title_bytes))
1278 buf.value = title_bytes
e3946f98 1279 try:
6eefe533 1280 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1281 except AttributeError:
1282 return # Strange libc, just skip this
d7dda168
PH
1283
1284
1285def remove_start(s, start):
1286 if s.startswith(start):
1287 return s[len(start):]
1288 return s
29eb5174
PH
1289
1290
2b9faf55
PH
1291def remove_end(s, end):
1292 if s.endswith(end):
1293 return s[:-len(end)]
1294 return s
1295
1296
29eb5174 1297def url_basename(url):
9b8aaeed
JMF
1298 path = compat_urlparse.urlparse(url).path
1299 return path.strip(u'/').split(u'/')[-1]
aa94a6d3
PH
1300
1301
1302class HEADRequest(compat_urllib_request.Request):
1303 def get_method(self):
1304 return "HEAD"
7217e148
PH
1305
1306
9732d77e 1307def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1308 if get_attr:
1309 if v is not None:
1310 v = getattr(v, get_attr, None)
9572013d
PH
1311 if v == '':
1312 v = None
9732d77e
PH
1313 return default if v is None else (int(v) * invscale // scale)
1314
9572013d 1315
40a90862
JMF
1316def str_or_none(v, default=None):
1317 return default if v is None else compat_str(v)
1318
9732d77e
PH
1319
1320def str_to_int(int_str):
1321 if int_str is None:
1322 return None
1323 int_str = re.sub(r'[,\.]', u'', int_str)
1324 return int(int_str)
608d11f5
PH
1325
1326
9732d77e
PH
1327def float_or_none(v, scale=1, invscale=1, default=None):
1328 return default if v is None else (float(v) * invscale / scale)
43f775e4
PH
1329
1330
608d11f5
PH
1331def parse_duration(s):
1332 if s is None:
1333 return None
1334
ca7b3246
S
1335 s = s.strip()
1336
608d11f5 1337 m = re.match(
ca7b3246 1338 r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
608d11f5
PH
1339 if not m:
1340 return None
1341 res = int(m.group('secs'))
1342 if m.group('mins'):
1343 res += int(m.group('mins')) * 60
1344 if m.group('hours'):
1345 res += int(m.group('hours')) * 60 * 60
7adcbe75
PH
1346 if m.group('ms'):
1347 res += float(m.group('ms'))
608d11f5 1348 return res
91d7d0b3
JMF
1349
1350
1351def prepend_extension(filename, ext):
1352 name, real_ext = os.path.splitext(filename)
1353 return u'{0}.{1}{2}'.format(name, ext, real_ext)
d70ad093
PH
1354
1355
1356def check_executable(exe, args=[]):
1357 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1358 args can be a list of arguments for a short output (like -version) """
1359 try:
1360 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1361 except OSError:
1362 return False
1363 return exe
b7ab0590
PH
1364
1365
1366class PagedList(object):
1367 def __init__(self, pagefunc, pagesize):
1368 self._pagefunc = pagefunc
1369 self._pagesize = pagesize
1370
dd26ced1
PH
1371 def __len__(self):
1372 # This is only useful for tests
1373 return len(self.getslice())
1374
b7ab0590
PH
1375 def getslice(self, start=0, end=None):
1376 res = []
1377 for pagenum in itertools.count(start // self._pagesize):
1378 firstid = pagenum * self._pagesize
1379 nextfirstid = pagenum * self._pagesize + self._pagesize
1380 if start >= nextfirstid:
1381 continue
1382
1383 page_results = list(self._pagefunc(pagenum))
1384
1385 startv = (
1386 start % self._pagesize
1387 if firstid <= start < nextfirstid
1388 else 0)
1389
1390 endv = (
1391 ((end - 1) % self._pagesize) + 1
1392 if (end is not None and firstid <= end <= nextfirstid)
1393 else None)
1394
1395 if startv != 0 or endv is not None:
1396 page_results = page_results[startv:endv]
1397 res.extend(page_results)
1398
1399 # A little optimization - if current page is not "full", ie. does
1400 # not contain page_size videos then we can assume that this page
1401 # is the last one - there are no more ids on further pages -
1402 # i.e. no need to query again.
1403 if len(page_results) + startv < self._pagesize:
1404 break
1405
1406 # If we got the whole page, but the next page is not interesting,
1407 # break out early as well
1408 if end == nextfirstid:
1409 break
1410 return res
81c2f20b
PH
1411
1412
1413def uppercase_escape(s):
676eb3f2 1414 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1415 return re.sub(
a612753d 1416 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1417 lambda m: unicode_escape(m.group(0))[0],
1418 s)
b53466e1
PH
1419
1420try:
1421 struct.pack(u'!I', 0)
1422except TypeError:
1423 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1424 def struct_pack(spec, *args):
1425 if isinstance(spec, compat_str):
1426 spec = spec.encode('ascii')
1427 return struct.pack(spec, *args)
1428
1429 def struct_unpack(spec, *args):
1430 if isinstance(spec, compat_str):
1431 spec = spec.encode('ascii')
1432 return struct.unpack(spec, *args)
1433else:
1434 struct_pack = struct.pack
1435 struct_unpack = struct.unpack
62e609ab
PH
1436
1437
1438def read_batch_urls(batch_fd):
1439 def fixup(url):
1440 if not isinstance(url, compat_str):
1441 url = url.decode('utf-8', 'replace')
1442 BOM_UTF8 = u'\xef\xbb\xbf'
1443 if url.startswith(BOM_UTF8):
1444 url = url[len(BOM_UTF8):]
1445 url = url.strip()
1446 if url.startswith(('#', ';', ']')):
1447 return False
1448 return url
1449
1450 with contextlib.closing(batch_fd) as fd:
1451 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1452
1453
1454def urlencode_postdata(*args, **kargs):
1455 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1456
1457
0990305d
PH
1458try:
1459 etree_iter = xml.etree.ElementTree.Element.iter
1460except AttributeError: # Python <=2.6
1461 etree_iter = lambda n: n.findall('.//*')
1462
1463
bcf89ce6
PH
1464def parse_xml(s):
1465 class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1466 def doctype(self, name, pubid, system):
1467 pass # Ignore doctypes
1468
1469 parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1470 kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
0990305d
PH
1471 tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1472 # Fix up XML parser in Python 2.x
1473 if sys.version_info < (3, 0):
1474 for n in etree_iter(tree):
1475 if n.text is not None:
1476 if not isinstance(n.text, compat_str):
1477 n.text = n.text.decode('utf-8')
1478 return tree
e68301af
PH
1479
1480
1481if sys.version_info < (3, 0) and sys.platform == 'win32':
1482 def compat_getpass(prompt, *args, **kwargs):
1483 if isinstance(prompt, compat_str):
4e6f9aec 1484 prompt = prompt.encode(preferredencoding())
e68301af
PH
1485 return getpass.getpass(prompt, *args, **kwargs)
1486else:
1487 compat_getpass = getpass.getpass
a1a530b0
PH
1488
1489
1490US_RATINGS = {
1491 'G': 0,
1492 'PG': 10,
1493 'PG-13': 13,
1494 'R': 16,
1495 'NC': 18,
1496}
fac55558
PH
1497
1498
1499def strip_jsonp(code):
816930c4 1500 return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
478c2c61
PH
1501
1502
e05f6939
PH
1503def js_to_json(code):
1504 def fix_kv(m):
1505 key = m.group(2)
1506 if key.startswith("'"):
1507 assert key.endswith("'")
1508 assert '"' not in key
1509 key = '"%s"' % key[1:-1]
1510 elif not key.startswith('"'):
1511 key = '"%s"' % key
1512
1513 value = m.group(4)
1514 if value.startswith("'"):
1515 assert value.endswith("'")
1516 assert '"' not in value
1517 value = '"%s"' % value[1:-1]
1518
1519 return m.group(1) + key + m.group(3) + value
1520
1521 res = re.sub(r'''(?x)
1522 ([{,]\s*)
1523 ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
1524 (:\s*)
1525 ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{)
1526 ''', fix_kv, code)
1527 res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1528 return res
1529
1530
478c2c61
PH
1531def qualities(quality_ids):
1532 """ Get a numeric quality value out of a list of possible values """
1533 def q(qid):
1534 try:
1535 return quality_ids.index(qid)
1536 except ValueError:
1537 return -1
1538 return q
1539
acd69589
PH
1540
1541DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68
PH
1542
1543try:
1544 subprocess_check_output = subprocess.check_output
1545except AttributeError:
1546 def subprocess_check_output(*args, **kwargs):
1547 assert 'input' not in kwargs
1548 p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
1549 output, _ = p.communicate()
1550 ret = p.poll()
1551 if ret:
1552 raise subprocess.CalledProcessError(ret, p.args, output=output)
1553 return output