youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_parse_qs,
  38     compat_str,
  39     compat_urllib_error,
  40     compat_urllib_parse,
  41     compat_urllib_parse_urlparse,
  42     compat_urllib_request,
  43     compat_urlparse,
  44 )
  45
  46
  47 # This is not clearly defined otherwise
  48 compiled_regex_type = type(re.compile(''))
  49
  50 std_headers = {
  51     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  52     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  53     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  54     'Accept-Encoding': 'gzip, deflate',
  55     'Accept-Language': 'en-us,en;q=0.5',
  56 }
  57
  58 def preferredencoding():
  59     """Get preferred encoding.
  60
  61     Returns the best encoding scheme for the system, based on
  62     locale.getpreferredencoding() and some further tweaks.
  63     """
  64     try:
  65         pref = locale.getpreferredencoding()
  66         'TEST'.encode(pref)
  67     except:
  68         pref = 'UTF-8'
  69
  70     return pref
  71
  72
  73 def write_json_file(obj, fn):
  74     """ Encode obj as JSON and write it to fn, atomically if possible """
  75
  76     fn = encodeFilename(fn)
  77     if sys.version_info < (3, 0) and sys.platform != 'win32':
  78         encoding = get_filesystem_encoding()
  79         # os.path.basename returns a bytes object, but NamedTemporaryFile
  80         # will fail if the filename contains non ascii characters unless we
  81         # use a unicode object
  82         path_basename = lambda f: os.path.basename(fn).decode(encoding)
  83         # the same for os.path.dirname
  84         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
  85     else:
  86         path_basename = os.path.basename
  87         path_dirname = os.path.dirname
  88
  89     args = {
  90         'suffix': '.tmp',
  91         'prefix': path_basename(fn) + '.',
  92         'dir': path_dirname(fn),
  93         'delete': False,
  94     }
  95
  96     # In Python 2.x, json.dump expects a bytestream.
  97     # In Python 3.x, it writes to a character stream
  98     if sys.version_info < (3, 0):
  99         args['mode'] = 'wb'
 100     else:
 101         args.update({
 102             'mode': 'w',
 103             'encoding': 'utf-8',
 104         })
 105
 106     tf = tempfile.NamedTemporaryFile(**args)
 107
 108     try:
 109         with tf:
 110             json.dump(obj, tf)
 111         if sys.platform == 'win32':
 112             # Need to remove existing file on Windows, else os.rename raises
 113             # WindowsError or FileExistsError.
 114             try:
 115                 os.unlink(fn)
 116             except OSError:
 117                 pass
 118         os.rename(tf.name, fn)
 119     except:
 120         try:
 121             os.remove(tf.name)
 122         except OSError:
 123             pass
 124         raise
 125
 126
 127 if sys.version_info >= (2, 7):
 128     def find_xpath_attr(node, xpath, key, val):
 129         """ Find the xpath xpath[@key=val] """
 130         assert re.match(r'^[a-zA-Z-]+$', key)
 131         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 132         expr = xpath + u"[@%s='%s']" % (key, val)
 133         return node.find(expr)
 134 else:
 135     def find_xpath_attr(node, xpath, key, val):
 136         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 137         # .//node does not match if a node is a direct child of . !
 138         if isinstance(xpath, unicode):
 139             xpath = xpath.encode('ascii')
 140
 141         for f in node.findall(xpath):
 142             if f.attrib.get(key) == val:
 143                 return f
 144         return None
 145
 146 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 147 # the namespace parameter
 148 def xpath_with_ns(path, ns_map):
 149     components = [c.split(':') for c in path.split('/')]
 150     replaced = []
 151     for c in components:
 152         if len(c) == 1:
 153             replaced.append(c[0])
 154         else:
 155             ns, tag = c
 156             replaced.append('{%s}%s' % (ns_map[ns], tag))
 157     return '/'.join(replaced)
 158
 159
 160 def xpath_text(node, xpath, name=None, fatal=False):
 161     if sys.version_info < (2, 7):  # Crazy 2.6
 162         xpath = xpath.encode('ascii')
 163
 164     n = node.find(xpath)
 165     if n is None:
 166         if fatal:
 167             name = xpath if name is None else name
 168             raise ExtractorError('Could not find XML element %s' % name)
 169         else:
 170             return None
 171     return n.text
 172
 173
 174 def get_element_by_id(id, html):
 175     """Return the content of the tag with the specified ID in the passed HTML document"""
 176     return get_element_by_attribute("id", id, html)
 177
 178
 179 def get_element_by_attribute(attribute, value, html):
 180     """Return the content of the tag with the specified attribute in the passed HTML document"""
 181
 182     m = re.search(r'''(?xs)
 183         <([a-zA-Z0-9:._-]+)
 184          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 185          \s+%s=['"]?%s['"]?
 186          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 187         \s*>
 188         (?P<content>.*?)
 189         </\1>
 190     ''' % (re.escape(attribute), re.escape(value)), html)
 191
 192     if not m:
 193         return None
 194     res = m.group('content')
 195
 196     if res.startswith('"') or res.startswith("'"):
 197         res = res[1:-1]
 198
 199     return unescapeHTML(res)
 200
 201
 202 def clean_html(html):
 203     """Clean an HTML snippet into a readable string"""
 204     # Newline vs <br />
 205     html = html.replace('\n', ' ')
 206     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 207     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 208     # Strip html tags
 209     html = re.sub('<.*?>', '', html)
 210     # Replace html entities
 211     html = unescapeHTML(html)
 212     return html.strip()
 213
 214
 215 def sanitize_open(filename, open_mode):
 216     """Try to open the given filename, and slightly tweak it if this fails.
 217
 218     Attempts to open the given filename. If this fails, it tries to change
 219     the filename slightly, step by step, until it's either able to open it
 220     or it fails and raises a final exception, like the standard open()
 221     function.
 222
 223     It returns the tuple (stream, definitive_file_name).
 224     """
 225     try:
 226         if filename == '-':
 227             if sys.platform == 'win32':
 228                 import msvcrt
 229                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 230             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 231         stream = open(encodeFilename(filename), open_mode)
 232         return (stream, filename)
 233     except (IOError, OSError) as err:
 234         if err.errno in (errno.EACCES,):
 235             raise
 236
 237         # In case of error, try to remove win32 forbidden chars
 238         alt_filename = os.path.join(
 239                         re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
 240                         for path_part in os.path.split(filename)
 241                        )
 242         if alt_filename == filename:
 243             raise
 244         else:
 245             # An exception here should be caught in the caller
 246             stream = open(encodeFilename(filename), open_mode)
 247             return (stream, alt_filename)
 248
 249
 250 def timeconvert(timestr):
 251     """Convert RFC 2822 defined time string into system timestamp"""
 252     timestamp = None
 253     timetuple = email.utils.parsedate_tz(timestr)
 254     if timetuple is not None:
 255         timestamp = email.utils.mktime_tz(timetuple)
 256     return timestamp
 257
 258 def sanitize_filename(s, restricted=False, is_id=False):
 259     """Sanitizes a string so it could be used as part of a filename.
 260     If restricted is set, use a stricter subset of allowed characters.
 261     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 262     """
 263     def replace_insane(char):
 264         if char == '?' or ord(char) < 32 or ord(char) == 127:
 265             return ''
 266         elif char == '"':
 267             return '' if restricted else '\''
 268         elif char == ':':
 269             return '_-' if restricted else ' -'
 270         elif char in '\\/|*<>':
 271             return '_'
 272         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 273             return '_'
 274         if restricted and ord(char) > 127:
 275             return '_'
 276         return char
 277
 278     result = ''.join(map(replace_insane, s))
 279     if not is_id:
 280         while '__' in result:
 281             result = result.replace('__', '_')
 282         result = result.strip('_')
 283         # Common case of "Foreign band name - English song title"
 284         if restricted and result.startswith('-_'):
 285             result = result[2:]
 286         if not result:
 287             result = '_'
 288     return result
 289
 290 def orderedSet(iterable):
 291     """ Remove all duplicates from the input iterable """
 292     res = []
 293     for el in iterable:
 294         if el not in res:
 295             res.append(el)
 296     return res
 297
 298
 299 def _htmlentity_transform(entity):
 300     """Transforms an HTML entity to a character."""
 301     # Known non-numeric HTML entity
 302     if entity in compat_html_entities.name2codepoint:
 303         return compat_chr(compat_html_entities.name2codepoint[entity])
 304
 305     mobj = re.match(r'#(x?[0-9]+)', entity)
 306     if mobj is not None:
 307         numstr = mobj.group(1)
 308         if numstr.startswith('x'):
 309             base = 16
 310             numstr = '0%s' % numstr
 311         else:
 312             base = 10
 313         return compat_chr(int(numstr, base))
 314
 315     # Unknown entity in name, return its literal representation
 316     return ('&%s;' % entity)
 317
 318
 319 def unescapeHTML(s):
 320     if s is None:
 321         return None
 322     assert type(s) == compat_str
 323
 324     return re.sub(
 325         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 326
 327
 328 def encodeFilename(s, for_subprocess=False):
 329     """
 330     @param s The name of the file
 331     """
 332
 333     assert type(s) == compat_str
 334
 335     # Python 3 has a Unicode API
 336     if sys.version_info >= (3, 0):
 337         return s
 338
 339     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 340         # Pass '' directly to use Unicode APIs on Windows 2000 and up
 341         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 342         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 343         if not for_subprocess:
 344             return s
 345         else:
 346             # For subprocess calls, encode with locale encoding
 347             # Refer to http://stackoverflow.com/a/9951851/35070
 348             encoding = preferredencoding()
 349     else:
 350         encoding = sys.getfilesystemencoding()
 351     if encoding is None:
 352         encoding = 'utf-8'
 353     return s.encode(encoding, 'ignore')
 354
 355
 356 def encodeArgument(s):
 357     if not isinstance(s, compat_str):
 358         # Legacy code that uses byte strings
 359         # Uncomment the following line after fixing all post processors
 360         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 361         s = s.decode('ascii')
 362     return encodeFilename(s, True)
 363
 364
 365 def decodeOption(optval):
 366     if optval is None:
 367         return optval
 368     if isinstance(optval, bytes):
 369         optval = optval.decode(preferredencoding())
 370
 371     assert isinstance(optval, compat_str)
 372     return optval
 373
 374 def formatSeconds(secs):
 375     if secs > 3600:
 376         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 377     elif secs > 60:
 378         return '%d:%02d' % (secs // 60, secs % 60)
 379     else:
 380         return '%d' % secs
 381
 382
 383 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 384     if sys.version_info < (3, 2):
 385         import httplib
 386
 387         class HTTPSConnectionV3(httplib.HTTPSConnection):
 388             def __init__(self, *args, **kwargs):
 389                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 390
 391             def connect(self):
 392                 sock = socket.create_connection((self.host, self.port), self.timeout)
 393                 if getattr(self, '_tunnel_host', False):
 394                     self.sock = sock
 395                     self._tunnel()
 396                 try:
 397                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 398                 except ssl.SSLError:
 399                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 400
 401         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 402             def https_open(self, req):
 403                 return self.do_open(HTTPSConnectionV3, req)
 404         return HTTPSHandlerV3(**kwargs)
 405     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 406         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 407         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 408         if opts_no_check_certificate:
 409             context.verify_mode = ssl.CERT_NONE
 410         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 411     else:  # Python < 3.4
 412         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 413         context.verify_mode = (ssl.CERT_NONE
 414                                if opts_no_check_certificate
 415                                else ssl.CERT_REQUIRED)
 416         context.set_default_verify_paths()
 417         try:
 418             context.load_default_certs()
 419         except AttributeError:
 420             pass  # Python < 3.4
 421         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 422
 423 class ExtractorError(Exception):
 424     """Error during info extraction."""
 425     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 426         """ tb, if given, is the original traceback (so that it can be printed out).
 427         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 428         """
 429
 430         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 431             expected = True
 432         if video_id is not None:
 433             msg = video_id + ': ' + msg
 434         if cause:
 435             msg += ' (caused by %r)' % cause
 436         if not expected:
 437             msg = msg + '; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 438         super(ExtractorError, self).__init__(msg)
 439
 440         self.traceback = tb
 441         self.exc_info = sys.exc_info()  # preserve original exception
 442         self.cause = cause
 443         self.video_id = video_id
 444
 445     def format_traceback(self):
 446         if self.traceback is None:
 447             return None
 448         return ''.join(traceback.format_tb(self.traceback))
 449
 450
 451 class RegexNotFoundError(ExtractorError):
 452     """Error when a regex didn't match"""
 453     pass
 454
 455
 456 class DownloadError(Exception):
 457     """Download Error exception.
 458
 459     This exception may be thrown by FileDownloader objects if they are not
 460     configured to continue on errors. They will contain the appropriate
 461     error message.
 462     """
 463     def __init__(self, msg, exc_info=None):
 464         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 465         super(DownloadError, self).__init__(msg)
 466         self.exc_info = exc_info
 467
 468
 469 class SameFileError(Exception):
 470     """Same File exception.
 471
 472     This exception will be thrown by FileDownloader objects if they detect
 473     multiple files would have to be downloaded to the same file on disk.
 474     """
 475     pass
 476
 477
 478 class PostProcessingError(Exception):
 479     """Post Processing exception.
 480
 481     This exception may be raised by PostProcessor's .run() method to
 482     indicate an error in the postprocessing task.
 483     """
 484     def __init__(self, msg):
 485         self.msg = msg
 486
 487 class MaxDownloadsReached(Exception):
 488     """ --max-downloads limit has been reached. """
 489     pass
 490
 491
 492 class UnavailableVideoError(Exception):
 493     """Unavailable Format exception.
 494
 495     This exception will be thrown when a video is requested
 496     in a format that is not available for that video.
 497     """
 498     pass
 499
 500
 501 class ContentTooShortError(Exception):
 502     """Content Too Short exception.
 503
 504     This exception may be raised by FileDownloader objects when a file they
 505     download is too small for what the server announced first, indicating
 506     the connection was probably interrupted.
 507     """
 508     # Both in bytes
 509     downloaded = None
 510     expected = None
 511
 512     def __init__(self, downloaded, expected):
 513         self.downloaded = downloaded
 514         self.expected = expected
 515
 516 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 517     """Handler for HTTP requests and responses.
 518
 519     This class, when installed with an OpenerDirector, automatically adds
 520     the standard headers to every HTTP request and handles gzipped and
 521     deflated responses from web servers. If compression is to be avoided in
 522     a particular request, the original request in the program code only has
 523     to include the HTTP header "Youtubedl-No-Compression", which will be
 524     removed before making the real request.
 525
 526     Part of this code was copied from:
 527
 528     http://techknack.net/python-urllib2-handlers/
 529
 530     Andrew Rowls, the author of that code, agreed to release it to the
 531     public domain.
 532     """
 533
 534     @staticmethod
 535     def deflate(data):
 536         try:
 537             return zlib.decompress(data, -zlib.MAX_WBITS)
 538         except zlib.error:
 539             return zlib.decompress(data)
 540
 541     @staticmethod
 542     def addinfourl_wrapper(stream, headers, url, code):
 543         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 544             return compat_urllib_request.addinfourl(stream, headers, url, code)
 545         ret = compat_urllib_request.addinfourl(stream, headers, url)
 546         ret.code = code
 547         return ret
 548
 549     def http_request(self, req):
 550         for h, v in std_headers.items():
 551             if h not in req.headers:
 552                 req.add_header(h, v)
 553         if 'Youtubedl-no-compression' in req.headers:
 554             if 'Accept-encoding' in req.headers:
 555                 del req.headers['Accept-encoding']
 556             del req.headers['Youtubedl-no-compression']
 557         if 'Youtubedl-user-agent' in req.headers:
 558             if 'User-agent' in req.headers:
 559                 del req.headers['User-agent']
 560             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 561             del req.headers['Youtubedl-user-agent']
 562
 563         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 564             # Python 2.6 is brain-dead when it comes to fragments
 565             req._Request__original = req._Request__original.partition('#')[0]
 566             req._Request__r_type = req._Request__r_type.partition('#')[0]
 567
 568         return req
 569
 570     def http_response(self, req, resp):
 571         old_resp = resp
 572         # gzip
 573         if resp.headers.get('Content-encoding', '') == 'gzip':
 574             content = resp.read()
 575             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 576             try:
 577                 uncompressed = io.BytesIO(gz.read())
 578             except IOError as original_ioerror:
 579                 # There may be junk add the end of the file
 580                 # See http://stackoverflow.com/q/4928560/35070 for details
 581                 for i in range(1, 1024):
 582                     try:
 583                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 584                         uncompressed = io.BytesIO(gz.read())
 585                     except IOError:
 586                         continue
 587                     break
 588                 else:
 589                     raise original_ioerror
 590             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 591             resp.msg = old_resp.msg
 592         # deflate
 593         if resp.headers.get('Content-encoding', '') == 'deflate':
 594             gz = io.BytesIO(self.deflate(resp.read()))
 595             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 596             resp.msg = old_resp.msg
 597         return resp
 598
 599     https_request = http_request
 600     https_response = http_response
 601
 602
 603 def parse_iso8601(date_str, delimiter='T'):
 604     """ Return a UNIX timestamp from the given date """
 605
 606     if date_str is None:
 607         return None
 608
 609     m = re.search(
 610         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 611         date_str)
 612     if not m:
 613         timezone = datetime.timedelta()
 614     else:
 615         date_str = date_str[:-len(m.group(0))]
 616         if not m.group('sign'):
 617             timezone = datetime.timedelta()
 618         else:
 619             sign = 1 if m.group('sign') == '+' else -1
 620             timezone = datetime.timedelta(
 621                 hours=sign * int(m.group('hours')),
 622                 minutes=sign * int(m.group('minutes')))
 623     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 624     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 625     return calendar.timegm(dt.timetuple())
 626
 627
 628 def unified_strdate(date_str):
 629     """Return a string with the date in the format YYYYMMDD"""
 630
 631     if date_str is None:
 632         return None
 633
 634     upload_date = None
 635     #Replace commas
 636     date_str = date_str.replace(',', ' ')
 637     # %z (UTC offset) is only supported in python>=3.2
 638     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 639     format_expressions = [
 640         '%d %B %Y',
 641         '%d %b %Y',
 642         '%B %d %Y',
 643         '%b %d %Y',
 644         '%b %dst %Y %I:%M%p',
 645         '%b %dnd %Y %I:%M%p',
 646         '%b %dth %Y %I:%M%p',
 647         '%Y-%m-%d',
 648         '%Y/%m/%d',
 649         '%d.%m.%Y',
 650         '%d/%m/%Y',
 651         '%d/%m/%y',
 652         '%Y/%m/%d %H:%M:%S',
 653         '%d/%m/%Y %H:%M:%S',
 654         '%Y-%m-%d %H:%M:%S',
 655         '%Y-%m-%d %H:%M:%S.%f',
 656         '%d.%m.%Y %H:%M',
 657         '%d.%m.%Y %H.%M',
 658         '%Y-%m-%dT%H:%M:%SZ',
 659         '%Y-%m-%dT%H:%M:%S.%fZ',
 660         '%Y-%m-%dT%H:%M:%S.%f0Z',
 661         '%Y-%m-%dT%H:%M:%S',
 662         '%Y-%m-%dT%H:%M:%S.%f',
 663         '%Y-%m-%dT%H:%M',
 664     ]
 665     for expression in format_expressions:
 666         try:
 667             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 668         except ValueError:
 669             pass
 670     if upload_date is None:
 671         timetuple = email.utils.parsedate_tz(date_str)
 672         if timetuple:
 673             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 674     return upload_date
 675
 676 def determine_ext(url, default_ext='unknown_video'):
 677     if url is None:
 678         return default_ext
 679     guess = url.partition('?')[0].rpartition('.')[2]
 680     if re.match(r'^[A-Za-z0-9]+$', guess):
 681         return guess
 682     else:
 683         return default_ext
 684
 685 def subtitles_filename(filename, sub_lang, sub_format):
 686     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
 687
 688 def date_from_str(date_str):
 689     """
 690     Return a datetime object from a string in the format YYYYMMDD or
 691     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 692     today = datetime.date.today()
 693     if date_str == 'now'or date_str == 'today':
 694         return today
 695     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 696     if match is not None:
 697         sign = match.group('sign')
 698         time = int(match.group('time'))
 699         if sign == '-':
 700             time = -time
 701         unit = match.group('unit')
 702         #A bad aproximation?
 703         if unit == 'month':
 704             unit = 'day'
 705             time *= 30
 706         elif unit == 'year':
 707             unit = 'day'
 708             time *= 365
 709         unit += 's'
 710         delta = datetime.timedelta(**{unit: time})
 711         return today + delta
 712     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 713
 714 def hyphenate_date(date_str):
 715     """
 716     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 717     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 718     if match is not None:
 719         return '-'.join(match.groups())
 720     else:
 721         return date_str
 722
 723 class DateRange(object):
 724     """Represents a time interval between two dates"""
 725     def __init__(self, start=None, end=None):
 726         """start and end must be strings in the format accepted by date"""
 727         if start is not None:
 728             self.start = date_from_str(start)
 729         else:
 730             self.start = datetime.datetime.min.date()
 731         if end is not None:
 732             self.end = date_from_str(end)
 733         else:
 734             self.end = datetime.datetime.max.date()
 735         if self.start > self.end:
 736             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 737     @classmethod
 738     def day(cls, day):
 739         """Returns a range that only contains the given day"""
 740         return cls(day,day)
 741     def __contains__(self, date):
 742         """Check if the date is in the range"""
 743         if not isinstance(date, datetime.date):
 744             date = date_from_str(date)
 745         return self.start <= date <= self.end
 746     def __str__(self):
 747         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 748
 749
 750 def platform_name():
 751     """ Returns the platform name as a compat_str """
 752     res = platform.platform()
 753     if isinstance(res, bytes):
 754         res = res.decode(preferredencoding())
 755
 756     assert isinstance(res, compat_str)
 757     return res
 758
 759
 760 def _windows_write_string(s, out):
 761     """ Returns True if the string was written using special methods,
 762     False if it has yet to be written out."""
 763     # Adapted from http://stackoverflow.com/a/3259271/35070
 764
 765     import ctypes
 766     import ctypes.wintypes
 767
 768     WIN_OUTPUT_IDS = {
 769         1: -11,
 770         2: -12,
 771     }
 772
 773     try:
 774         fileno = out.fileno()
 775     except AttributeError:
 776         # If the output stream doesn't have a fileno, it's virtual
 777         return False
 778     if fileno not in WIN_OUTPUT_IDS:
 779         return False
 780
 781     GetStdHandle = ctypes.WINFUNCTYPE(
 782         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 783         ("GetStdHandle", ctypes.windll.kernel32))
 784     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 785
 786     WriteConsoleW = ctypes.WINFUNCTYPE(
 787         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 788         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 789         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 790     written = ctypes.wintypes.DWORD(0)
 791
 792     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 793     FILE_TYPE_CHAR = 0x0002
 794     FILE_TYPE_REMOTE = 0x8000
 795     GetConsoleMode = ctypes.WINFUNCTYPE(
 796         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 797         ctypes.POINTER(ctypes.wintypes.DWORD))(
 798         ("GetConsoleMode", ctypes.windll.kernel32))
 799     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 800
 801     def not_a_console(handle):
 802         if handle == INVALID_HANDLE_VALUE or handle is None:
 803             return True
 804         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 805                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 806
 807     if not_a_console(h):
 808         return False
 809
 810     def next_nonbmp_pos(s):
 811         try:
 812             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 813         except StopIteration:
 814             return len(s)
 815
 816     while s:
 817         count = min(next_nonbmp_pos(s), 1024)
 818
 819         ret = WriteConsoleW(
 820             h, s, count if count else 2, ctypes.byref(written), None)
 821         if ret == 0:
 822             raise OSError('Failed to write string')
 823         if not count:  # We just wrote a non-BMP character
 824             assert written.value == 2
 825             s = s[1:]
 826         else:
 827             assert written.value > 0
 828             s = s[written.value:]
 829     return True
 830
 831
 832 def write_string(s, out=None, encoding=None):
 833     if out is None:
 834         out = sys.stderr
 835     assert type(s) == compat_str
 836
 837     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 838         if _windows_write_string(s, out):
 839             return
 840
 841     if ('b' in getattr(out, 'mode', '') or
 842             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 843         byt = s.encode(encoding or preferredencoding(), 'ignore')
 844         out.write(byt)
 845     elif hasattr(out, 'buffer'):
 846         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 847         byt = s.encode(enc, 'ignore')
 848         out.buffer.write(byt)
 849     else:
 850         out.write(s)
 851     out.flush()
 852
 853
 854 def bytes_to_intlist(bs):
 855     if not bs:
 856         return []
 857     if isinstance(bs[0], int):  # Python 3
 858         return list(bs)
 859     else:
 860         return [ord(c) for c in bs]
 861
 862
 863 def intlist_to_bytes(xs):
 864     if not xs:
 865         return b''
 866     return struct_pack('%dB' % len(xs), *xs)
 867
 868
 869 # Cross-platform file locking
 870 if sys.platform == 'win32':
 871     import ctypes.wintypes
 872     import msvcrt
 873
 874     class OVERLAPPED(ctypes.Structure):
 875         _fields_ = [
 876             ('Internal', ctypes.wintypes.LPVOID),
 877             ('InternalHigh', ctypes.wintypes.LPVOID),
 878             ('Offset', ctypes.wintypes.DWORD),
 879             ('OffsetHigh', ctypes.wintypes.DWORD),
 880             ('hEvent', ctypes.wintypes.HANDLE),
 881         ]
 882
 883     kernel32 = ctypes.windll.kernel32
 884     LockFileEx = kernel32.LockFileEx
 885     LockFileEx.argtypes = [
 886         ctypes.wintypes.HANDLE,     # hFile
 887         ctypes.wintypes.DWORD,      # dwFlags
 888         ctypes.wintypes.DWORD,      # dwReserved
 889         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 890         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 891         ctypes.POINTER(OVERLAPPED)  # Overlapped
 892     ]
 893     LockFileEx.restype = ctypes.wintypes.BOOL
 894     UnlockFileEx = kernel32.UnlockFileEx
 895     UnlockFileEx.argtypes = [
 896         ctypes.wintypes.HANDLE,     # hFile
 897         ctypes.wintypes.DWORD,      # dwReserved
 898         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 899         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 900         ctypes.POINTER(OVERLAPPED)  # Overlapped
 901     ]
 902     UnlockFileEx.restype = ctypes.wintypes.BOOL
 903     whole_low = 0xffffffff
 904     whole_high = 0x7fffffff
 905
 906     def _lock_file(f, exclusive):
 907         overlapped = OVERLAPPED()
 908         overlapped.Offset = 0
 909         overlapped.OffsetHigh = 0
 910         overlapped.hEvent = 0
 911         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 912         handle = msvcrt.get_osfhandle(f.fileno())
 913         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 914                           whole_low, whole_high, f._lock_file_overlapped_p):
 915             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 916
 917     def _unlock_file(f):
 918         assert f._lock_file_overlapped_p
 919         handle = msvcrt.get_osfhandle(f.fileno())
 920         if not UnlockFileEx(handle, 0,
 921                             whole_low, whole_high, f._lock_file_overlapped_p):
 922             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 923
 924 else:
 925     import fcntl
 926
 927     def _lock_file(f, exclusive):
 928         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 929
 930     def _unlock_file(f):
 931         fcntl.flock(f, fcntl.LOCK_UN)
 932
 933
 934 class locked_file(object):
 935     def __init__(self, filename, mode, encoding=None):
 936         assert mode in ['r', 'a', 'w']
 937         self.f = io.open(filename, mode, encoding=encoding)
 938         self.mode = mode
 939
 940     def __enter__(self):
 941         exclusive = self.mode != 'r'
 942         try:
 943             _lock_file(self.f, exclusive)
 944         except IOError:
 945             self.f.close()
 946             raise
 947         return self
 948
 949     def __exit__(self, etype, value, traceback):
 950         try:
 951             _unlock_file(self.f)
 952         finally:
 953             self.f.close()
 954
 955     def __iter__(self):
 956         return iter(self.f)
 957
 958     def write(self, *args):
 959         return self.f.write(*args)
 960
 961     def read(self, *args):
 962         return self.f.read(*args)
 963
 964
 965 def get_filesystem_encoding():
 966     encoding = sys.getfilesystemencoding()
 967     return encoding if encoding is not None else 'utf-8'
 968
 969
 970 def shell_quote(args):
 971     quoted_args = []
 972     encoding = get_filesystem_encoding()
 973     for a in args:
 974         if isinstance(a, bytes):
 975             # We may get a filename encoded with 'encodeFilename'
 976             a = a.decode(encoding)
 977         quoted_args.append(pipes.quote(a))
 978     return ' '.join(quoted_args)
 979
 980
 981 def takewhile_inclusive(pred, seq):
 982     """ Like itertools.takewhile, but include the latest evaluated element
 983         (the first element so that Not pred(e)) """
 984     for e in seq:
 985         yield e
 986         if not pred(e):
 987             return
 988
 989
 990 def smuggle_url(url, data):
 991     """ Pass additional data in a URL for internal use. """
 992
 993     sdata = compat_urllib_parse.urlencode(
 994         {'__youtubedl_smuggle': json.dumps(data)})
 995     return url + '#' + sdata
 996
 997
 998 def unsmuggle_url(smug_url, default=None):
 999     if not '#__youtubedl_smuggle' in smug_url:
1000         return smug_url, default
1001     url, _, sdata = smug_url.rpartition('#')
1002     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1003     data = json.loads(jsond)
1004     return url, data
1005
1006
1007 def format_bytes(bytes):
1008     if bytes is None:
1009         return 'N/A'
1010     if type(bytes) is str:
1011         bytes = float(bytes)
1012     if bytes == 0.0:
1013         exponent = 0
1014     else:
1015         exponent = int(math.log(bytes, 1024.0))
1016     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1017     converted = float(bytes) / float(1024 ** exponent)
1018     return '%.2f%s' % (converted, suffix)
1019
1020
1021 def get_term_width():
1022     columns = compat_getenv('COLUMNS', None)
1023     if columns:
1024         return int(columns)
1025
1026     try:
1027         sp = subprocess.Popen(
1028             ['stty', 'size'],
1029             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1030         out, err = sp.communicate()
1031         return int(out.split()[1])
1032     except:
1033         pass
1034     return None
1035
1036
1037 def month_by_name(name):
1038     """ Return the number of a month by (locale-independently) English name """
1039
1040     ENGLISH_NAMES = [
1041         'January', 'February', 'March', 'April', 'May', 'June',
1042         'July', 'August', 'September', 'October', 'November', 'December']
1043     try:
1044         return ENGLISH_NAMES.index(name) + 1
1045     except ValueError:
1046         return None
1047
1048
1049 def fix_xml_ampersands(xml_str):
1050     """Replace all the '&' by '&amp;' in XML"""
1051     return re.sub(
1052         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1053         '&amp;',
1054         xml_str)
1055
1056
1057 def setproctitle(title):
1058     assert isinstance(title, compat_str)
1059     try:
1060         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1061     except OSError:
1062         return
1063     title_bytes = title.encode('utf-8')
1064     buf = ctypes.create_string_buffer(len(title_bytes))
1065     buf.value = title_bytes
1066     try:
1067         libc.prctl(15, buf, 0, 0, 0)
1068     except AttributeError:
1069         return  # Strange libc, just skip this
1070
1071
1072 def remove_start(s, start):
1073     if s.startswith(start):
1074         return s[len(start):]
1075     return s
1076
1077
1078 def remove_end(s, end):
1079     if s.endswith(end):
1080         return s[:-len(end)]
1081     return s
1082
1083
1084 def url_basename(url):
1085     path = compat_urlparse.urlparse(url).path
1086     return path.strip('/').split('/')[-1]
1087
1088
1089 class HEADRequest(compat_urllib_request.Request):
1090     def get_method(self):
1091         return "HEAD"
1092
1093
1094 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1095     if get_attr:
1096         if v is not None:
1097             v = getattr(v, get_attr, None)
1098     if v == '':
1099         v = None
1100     return default if v is None else (int(v) * invscale // scale)
1101
1102
1103 def str_or_none(v, default=None):
1104     return default if v is None else compat_str(v)
1105
1106
1107 def str_to_int(int_str):
1108     """ A more relaxed version of int_or_none """
1109     if int_str is None:
1110         return None
1111     int_str = re.sub(r'[,\.\+]', '', int_str)
1112     return int(int_str)
1113
1114
1115 def float_or_none(v, scale=1, invscale=1, default=None):
1116     return default if v is None else (float(v) * invscale / scale)
1117
1118
1119 def parse_duration(s):
1120     if s is None:
1121         return None
1122
1123     s = s.strip()
1124
1125     m = re.match(
1126         r'''(?ix)T?
1127             (?:
1128                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1129                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1130             )?
1131             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1132     if not m:
1133         return None
1134     res = int(m.group('secs'))
1135     if m.group('mins'):
1136         res += int(m.group('mins')) * 60
1137         if m.group('hours'):
1138             res += int(m.group('hours')) * 60 * 60
1139     if m.group('ms'):
1140         res += float(m.group('ms'))
1141     return res
1142
1143
1144 def prepend_extension(filename, ext):
1145     name, real_ext = os.path.splitext(filename)
1146     return '{0}.{1}{2}'.format(name, ext, real_ext)
1147
1148
1149 def check_executable(exe, args=[]):
1150     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1151     args can be a list of arguments for a short output (like -version) """
1152     try:
1153         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1154     except OSError:
1155         return False
1156     return exe
1157
1158
1159 def get_exe_version(exe, args=['--version'],
1160                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1161                     unrecognized='present'):
1162     """ Returns the version of the specified executable,
1163     or False if the executable is not present """
1164     try:
1165         out, err = subprocess.Popen(
1166             [exe] + args,
1167             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1168     except OSError:
1169         return False
1170     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1171     m = re.search(version_re, firstline)
1172     if m:
1173         return m.group(1)
1174     else:
1175         return unrecognized
1176
1177
1178 class PagedList(object):
1179     def __len__(self):
1180         # This is only useful for tests
1181         return len(self.getslice())
1182
1183
1184 class OnDemandPagedList(PagedList):
1185     def __init__(self, pagefunc, pagesize):
1186         self._pagefunc = pagefunc
1187         self._pagesize = pagesize
1188
1189     def getslice(self, start=0, end=None):
1190         res = []
1191         for pagenum in itertools.count(start // self._pagesize):
1192             firstid = pagenum * self._pagesize
1193             nextfirstid = pagenum * self._pagesize + self._pagesize
1194             if start >= nextfirstid:
1195                 continue
1196
1197             page_results = list(self._pagefunc(pagenum))
1198
1199             startv = (
1200                 start % self._pagesize
1201                 if firstid <= start < nextfirstid
1202                 else 0)
1203
1204             endv = (
1205                 ((end - 1) % self._pagesize) + 1
1206                 if (end is not None and firstid <= end <= nextfirstid)
1207                 else None)
1208
1209             if startv != 0 or endv is not None:
1210                 page_results = page_results[startv:endv]
1211             res.extend(page_results)
1212
1213             # A little optimization - if current page is not "full", ie. does
1214             # not contain page_size videos then we can assume that this page
1215             # is the last one - there are no more ids on further pages -
1216             # i.e. no need to query again.
1217             if len(page_results) + startv < self._pagesize:
1218                 break
1219
1220             # If we got the whole page, but the next page is not interesting,
1221             # break out early as well
1222             if end == nextfirstid:
1223                 break
1224         return res
1225
1226
1227 class InAdvancePagedList(PagedList):
1228     def __init__(self, pagefunc, pagecount, pagesize):
1229         self._pagefunc = pagefunc
1230         self._pagecount = pagecount
1231         self._pagesize = pagesize
1232
1233     def getslice(self, start=0, end=None):
1234         res = []
1235         start_page = start // self._pagesize
1236         end_page = (
1237             self._pagecount if end is None else (end // self._pagesize + 1))
1238         skip_elems = start - start_page * self._pagesize
1239         only_more = None if end is None else end - start
1240         for pagenum in range(start_page, end_page):
1241             page = list(self._pagefunc(pagenum))
1242             if skip_elems:
1243                 page = page[skip_elems:]
1244                 skip_elems = None
1245             if only_more is not None:
1246                 if len(page) < only_more:
1247                     only_more -= len(page)
1248                 else:
1249                     page = page[:only_more]
1250                     res.extend(page)
1251                     break
1252             res.extend(page)
1253         return res
1254
1255
1256 def uppercase_escape(s):
1257     unicode_escape = codecs.getdecoder('unicode_escape')
1258     return re.sub(
1259         r'\\U[0-9a-fA-F]{8}',
1260         lambda m: unicode_escape(m.group(0))[0],
1261         s)
1262
1263
1264 def escape_rfc3986(s):
1265     """Escape non-ASCII characters as suggested by RFC 3986"""
1266     if sys.version_info < (3, 0) and isinstance(s, unicode):
1267         s = s.encode('utf-8')
1268     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1269
1270
1271 def escape_url(url):
1272     """Escape URL as suggested by RFC 3986"""
1273     url_parsed = compat_urllib_parse_urlparse(url)
1274     return url_parsed._replace(
1275         path=escape_rfc3986(url_parsed.path),
1276         params=escape_rfc3986(url_parsed.params),
1277         query=escape_rfc3986(url_parsed.query),
1278         fragment=escape_rfc3986(url_parsed.fragment)
1279     ).geturl()
1280
1281 try:
1282     struct.pack('!I', 0)
1283 except TypeError:
1284     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1285     def struct_pack(spec, *args):
1286         if isinstance(spec, compat_str):
1287             spec = spec.encode('ascii')
1288         return struct.pack(spec, *args)
1289
1290     def struct_unpack(spec, *args):
1291         if isinstance(spec, compat_str):
1292             spec = spec.encode('ascii')
1293         return struct.unpack(spec, *args)
1294 else:
1295     struct_pack = struct.pack
1296     struct_unpack = struct.unpack
1297
1298
1299 def read_batch_urls(batch_fd):
1300     def fixup(url):
1301         if not isinstance(url, compat_str):
1302             url = url.decode('utf-8', 'replace')
1303         BOM_UTF8 = '\xef\xbb\xbf'
1304         if url.startswith(BOM_UTF8):
1305             url = url[len(BOM_UTF8):]
1306         url = url.strip()
1307         if url.startswith(('#', ';', ']')):
1308             return False
1309         return url
1310
1311     with contextlib.closing(batch_fd) as fd:
1312         return [url for url in map(fixup, fd) if url]
1313
1314
1315 def urlencode_postdata(*args, **kargs):
1316     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1317
1318
1319 try:
1320     etree_iter = xml.etree.ElementTree.Element.iter
1321 except AttributeError:  # Python <=2.6
1322     etree_iter = lambda n: n.findall('.//*')
1323
1324
1325 def parse_xml(s):
1326     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1327         def doctype(self, name, pubid, system):
1328             pass  # Ignore doctypes
1329
1330     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1331     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1332     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1333     # Fix up XML parser in Python 2.x
1334     if sys.version_info < (3, 0):
1335         for n in etree_iter(tree):
1336             if n.text is not None:
1337                 if not isinstance(n.text, compat_str):
1338                     n.text = n.text.decode('utf-8')
1339     return tree
1340
1341
1342 US_RATINGS = {
1343     'G': 0,
1344     'PG': 10,
1345     'PG-13': 13,
1346     'R': 16,
1347     'NC': 18,
1348 }
1349
1350
1351 def parse_age_limit(s):
1352     if s is None:
1353         return None
1354     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1355     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1356
1357
1358 def strip_jsonp(code):
1359     return re.sub(
1360         r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1361
1362
1363 def js_to_json(code):
1364     def fix_kv(m):
1365         v = m.group(0)
1366         if v in ('true', 'false', 'null'):
1367             return v
1368         if v.startswith('"'):
1369             return v
1370         if v.startswith("'"):
1371             v = v[1:-1]
1372             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1373                 '\\\\': '\\\\',
1374                 "\\'": "'",
1375                 '"': '\\"',
1376             }[m.group(0)], v)
1377         return '"%s"' % v
1378
1379     res = re.sub(r'''(?x)
1380         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1381         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1382         [a-zA-Z_][a-zA-Z_0-9]*
1383         ''', fix_kv, code)
1384     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1385     return res
1386
1387
1388 def qualities(quality_ids):
1389     """ Get a numeric quality value out of a list of possible values """
1390     def q(qid):
1391         try:
1392             return quality_ids.index(qid)
1393         except ValueError:
1394             return -1
1395     return q
1396
1397
1398 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1399
1400
1401 def limit_length(s, length):
1402     """ Add ellipses to overly long strings """
1403     if s is None:
1404         return None
1405     ELLIPSES = '...'
1406     if len(s) > length:
1407         return s[:length - len(ELLIPSES)] + ELLIPSES
1408     return s
1409
1410
1411 def version_tuple(v):
1412     return [int(e) for e in v.split('.')]
1413
1414
1415 def is_outdated_version(version, limit, assume_new=True):
1416     if not version:
1417         return not assume_new
1418     try:
1419         return version_tuple(version) < version_tuple(limit)
1420     except ValueError:
1421         return not assume_new