youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_urlunparse,
  64     compat_urllib_parse_quote,
  65     compat_urllib_parse_quote_plus,
  66     compat_urllib_parse_unquote_plus,
  67     compat_urllib_request,
  68     compat_urlparse,
  69     compat_xpath,
  70 )
  71
  72 from .socks import (
  73     ProxyType,
  74     sockssocket,
  75 )
  76
  77
  78 def register_socks_protocols():
  79     # "Register" SOCKS protocols
  80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  83         if scheme not in compat_urlparse.uses_netloc:
  84             compat_urlparse.uses_netloc.append(scheme)
  85
  86
  87 # This is not clearly defined otherwise
  88 compiled_regex_type = type(re.compile(''))
  89
  90
  91 def random_user_agent():
  92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  93     _CHROME_VERSIONS = (
  94         '74.0.3729.129',
  95         '76.0.3780.3',
  96         '76.0.3780.2',
  97         '74.0.3729.128',
  98         '76.0.3780.1',
  99         '76.0.3780.0',
 100         '75.0.3770.15',
 101         '74.0.3729.127',
 102         '74.0.3729.126',
 103         '76.0.3779.1',
 104         '76.0.3779.0',
 105         '75.0.3770.14',
 106         '74.0.3729.125',
 107         '76.0.3778.1',
 108         '76.0.3778.0',
 109         '75.0.3770.13',
 110         '74.0.3729.124',
 111         '74.0.3729.123',
 112         '73.0.3683.121',
 113         '76.0.3777.1',
 114         '76.0.3777.0',
 115         '75.0.3770.12',
 116         '74.0.3729.122',
 117         '76.0.3776.4',
 118         '75.0.3770.11',
 119         '74.0.3729.121',
 120         '76.0.3776.3',
 121         '76.0.3776.2',
 122         '73.0.3683.120',
 123         '74.0.3729.120',
 124         '74.0.3729.119',
 125         '74.0.3729.118',
 126         '76.0.3776.1',
 127         '76.0.3776.0',
 128         '76.0.3775.5',
 129         '75.0.3770.10',
 130         '74.0.3729.117',
 131         '76.0.3775.4',
 132         '76.0.3775.3',
 133         '74.0.3729.116',
 134         '75.0.3770.9',
 135         '76.0.3775.2',
 136         '76.0.3775.1',
 137         '76.0.3775.0',
 138         '75.0.3770.8',
 139         '74.0.3729.115',
 140         '74.0.3729.114',
 141         '76.0.3774.1',
 142         '76.0.3774.0',
 143         '75.0.3770.7',
 144         '74.0.3729.113',
 145         '74.0.3729.112',
 146         '74.0.3729.111',
 147         '76.0.3773.1',
 148         '76.0.3773.0',
 149         '75.0.3770.6',
 150         '74.0.3729.110',
 151         '74.0.3729.109',
 152         '76.0.3772.1',
 153         '76.0.3772.0',
 154         '75.0.3770.5',
 155         '74.0.3729.108',
 156         '74.0.3729.107',
 157         '76.0.3771.1',
 158         '76.0.3771.0',
 159         '75.0.3770.4',
 160         '74.0.3729.106',
 161         '74.0.3729.105',
 162         '75.0.3770.3',
 163         '74.0.3729.104',
 164         '74.0.3729.103',
 165         '74.0.3729.102',
 166         '75.0.3770.2',
 167         '74.0.3729.101',
 168         '75.0.3770.1',
 169         '75.0.3770.0',
 170         '74.0.3729.100',
 171         '75.0.3769.5',
 172         '75.0.3769.4',
 173         '74.0.3729.99',
 174         '75.0.3769.3',
 175         '75.0.3769.2',
 176         '75.0.3768.6',
 177         '74.0.3729.98',
 178         '75.0.3769.1',
 179         '75.0.3769.0',
 180         '74.0.3729.97',
 181         '73.0.3683.119',
 182         '73.0.3683.118',
 183         '74.0.3729.96',
 184         '75.0.3768.5',
 185         '75.0.3768.4',
 186         '75.0.3768.3',
 187         '75.0.3768.2',
 188         '74.0.3729.95',
 189         '74.0.3729.94',
 190         '75.0.3768.1',
 191         '75.0.3768.0',
 192         '74.0.3729.93',
 193         '74.0.3729.92',
 194         '73.0.3683.117',
 195         '74.0.3729.91',
 196         '75.0.3766.3',
 197         '74.0.3729.90',
 198         '75.0.3767.2',
 199         '75.0.3767.1',
 200         '75.0.3767.0',
 201         '74.0.3729.89',
 202         '73.0.3683.116',
 203         '75.0.3766.2',
 204         '74.0.3729.88',
 205         '75.0.3766.1',
 206         '75.0.3766.0',
 207         '74.0.3729.87',
 208         '73.0.3683.115',
 209         '74.0.3729.86',
 210         '75.0.3765.1',
 211         '75.0.3765.0',
 212         '74.0.3729.85',
 213         '73.0.3683.114',
 214         '74.0.3729.84',
 215         '75.0.3764.1',
 216         '75.0.3764.0',
 217         '74.0.3729.83',
 218         '73.0.3683.113',
 219         '75.0.3763.2',
 220         '75.0.3761.4',
 221         '74.0.3729.82',
 222         '75.0.3763.1',
 223         '75.0.3763.0',
 224         '74.0.3729.81',
 225         '73.0.3683.112',
 226         '75.0.3762.1',
 227         '75.0.3762.0',
 228         '74.0.3729.80',
 229         '75.0.3761.3',
 230         '74.0.3729.79',
 231         '73.0.3683.111',
 232         '75.0.3761.2',
 233         '74.0.3729.78',
 234         '74.0.3729.77',
 235         '75.0.3761.1',
 236         '75.0.3761.0',
 237         '73.0.3683.110',
 238         '74.0.3729.76',
 239         '74.0.3729.75',
 240         '75.0.3760.0',
 241         '74.0.3729.74',
 242         '75.0.3759.8',
 243         '75.0.3759.7',
 244         '75.0.3759.6',
 245         '74.0.3729.73',
 246         '75.0.3759.5',
 247         '74.0.3729.72',
 248         '73.0.3683.109',
 249         '75.0.3759.4',
 250         '75.0.3759.3',
 251         '74.0.3729.71',
 252         '75.0.3759.2',
 253         '74.0.3729.70',
 254         '73.0.3683.108',
 255         '74.0.3729.69',
 256         '75.0.3759.1',
 257         '75.0.3759.0',
 258         '74.0.3729.68',
 259         '73.0.3683.107',
 260         '74.0.3729.67',
 261         '75.0.3758.1',
 262         '75.0.3758.0',
 263         '74.0.3729.66',
 264         '73.0.3683.106',
 265         '74.0.3729.65',
 266         '75.0.3757.1',
 267         '75.0.3757.0',
 268         '74.0.3729.64',
 269         '73.0.3683.105',
 270         '74.0.3729.63',
 271         '75.0.3756.1',
 272         '75.0.3756.0',
 273         '74.0.3729.62',
 274         '73.0.3683.104',
 275         '75.0.3755.3',
 276         '75.0.3755.2',
 277         '73.0.3683.103',
 278         '75.0.3755.1',
 279         '75.0.3755.0',
 280         '74.0.3729.61',
 281         '73.0.3683.102',
 282         '74.0.3729.60',
 283         '75.0.3754.2',
 284         '74.0.3729.59',
 285         '75.0.3753.4',
 286         '74.0.3729.58',
 287         '75.0.3754.1',
 288         '75.0.3754.0',
 289         '74.0.3729.57',
 290         '73.0.3683.101',
 291         '75.0.3753.3',
 292         '75.0.3752.2',
 293         '75.0.3753.2',
 294         '74.0.3729.56',
 295         '75.0.3753.1',
 296         '75.0.3753.0',
 297         '74.0.3729.55',
 298         '73.0.3683.100',
 299         '74.0.3729.54',
 300         '75.0.3752.1',
 301         '75.0.3752.0',
 302         '74.0.3729.53',
 303         '73.0.3683.99',
 304         '74.0.3729.52',
 305         '75.0.3751.1',
 306         '75.0.3751.0',
 307         '74.0.3729.51',
 308         '73.0.3683.98',
 309         '74.0.3729.50',
 310         '75.0.3750.0',
 311         '74.0.3729.49',
 312         '74.0.3729.48',
 313         '74.0.3729.47',
 314         '75.0.3749.3',
 315         '74.0.3729.46',
 316         '73.0.3683.97',
 317         '75.0.3749.2',
 318         '74.0.3729.45',
 319         '75.0.3749.1',
 320         '75.0.3749.0',
 321         '74.0.3729.44',
 322         '73.0.3683.96',
 323         '74.0.3729.43',
 324         '74.0.3729.42',
 325         '75.0.3748.1',
 326         '75.0.3748.0',
 327         '74.0.3729.41',
 328         '75.0.3747.1',
 329         '73.0.3683.95',
 330         '75.0.3746.4',
 331         '74.0.3729.40',
 332         '74.0.3729.39',
 333         '75.0.3747.0',
 334         '75.0.3746.3',
 335         '75.0.3746.2',
 336         '74.0.3729.38',
 337         '75.0.3746.1',
 338         '75.0.3746.0',
 339         '74.0.3729.37',
 340         '73.0.3683.94',
 341         '75.0.3745.5',
 342         '75.0.3745.4',
 343         '75.0.3745.3',
 344         '75.0.3745.2',
 345         '74.0.3729.36',
 346         '75.0.3745.1',
 347         '75.0.3745.0',
 348         '75.0.3744.2',
 349         '74.0.3729.35',
 350         '73.0.3683.93',
 351         '74.0.3729.34',
 352         '75.0.3744.1',
 353         '75.0.3744.0',
 354         '74.0.3729.33',
 355         '73.0.3683.92',
 356         '74.0.3729.32',
 357         '74.0.3729.31',
 358         '73.0.3683.91',
 359         '75.0.3741.2',
 360         '75.0.3740.5',
 361         '74.0.3729.30',
 362         '75.0.3741.1',
 363         '75.0.3741.0',
 364         '74.0.3729.29',
 365         '75.0.3740.4',
 366         '73.0.3683.90',
 367         '74.0.3729.28',
 368         '75.0.3740.3',
 369         '73.0.3683.89',
 370         '75.0.3740.2',
 371         '74.0.3729.27',
 372         '75.0.3740.1',
 373         '75.0.3740.0',
 374         '74.0.3729.26',
 375         '73.0.3683.88',
 376         '73.0.3683.87',
 377         '74.0.3729.25',
 378         '75.0.3739.1',
 379         '75.0.3739.0',
 380         '73.0.3683.86',
 381         '74.0.3729.24',
 382         '73.0.3683.85',
 383         '75.0.3738.4',
 384         '75.0.3738.3',
 385         '75.0.3738.2',
 386         '75.0.3738.1',
 387         '75.0.3738.0',
 388         '74.0.3729.23',
 389         '73.0.3683.84',
 390         '74.0.3729.22',
 391         '74.0.3729.21',
 392         '75.0.3737.1',
 393         '75.0.3737.0',
 394         '74.0.3729.20',
 395         '73.0.3683.83',
 396         '74.0.3729.19',
 397         '75.0.3736.1',
 398         '75.0.3736.0',
 399         '74.0.3729.18',
 400         '73.0.3683.82',
 401         '74.0.3729.17',
 402         '75.0.3735.1',
 403         '75.0.3735.0',
 404         '74.0.3729.16',
 405         '73.0.3683.81',
 406         '75.0.3734.1',
 407         '75.0.3734.0',
 408         '74.0.3729.15',
 409         '73.0.3683.80',
 410         '74.0.3729.14',
 411         '75.0.3733.1',
 412         '75.0.3733.0',
 413         '75.0.3732.1',
 414         '74.0.3729.13',
 415         '74.0.3729.12',
 416         '73.0.3683.79',
 417         '74.0.3729.11',
 418         '75.0.3732.0',
 419         '74.0.3729.10',
 420         '73.0.3683.78',
 421         '74.0.3729.9',
 422         '74.0.3729.8',
 423         '74.0.3729.7',
 424         '75.0.3731.3',
 425         '75.0.3731.2',
 426         '75.0.3731.0',
 427         '74.0.3729.6',
 428         '73.0.3683.77',
 429         '73.0.3683.76',
 430         '75.0.3730.5',
 431         '75.0.3730.4',
 432         '73.0.3683.75',
 433         '74.0.3729.5',
 434         '73.0.3683.74',
 435         '75.0.3730.3',
 436         '75.0.3730.2',
 437         '74.0.3729.4',
 438         '73.0.3683.73',
 439         '73.0.3683.72',
 440         '75.0.3730.1',
 441         '75.0.3730.0',
 442         '74.0.3729.3',
 443         '73.0.3683.71',
 444         '74.0.3729.2',
 445         '73.0.3683.70',
 446         '74.0.3729.1',
 447         '74.0.3729.0',
 448         '74.0.3726.4',
 449         '73.0.3683.69',
 450         '74.0.3726.3',
 451         '74.0.3728.0',
 452         '74.0.3726.2',
 453         '73.0.3683.68',
 454         '74.0.3726.1',
 455         '74.0.3726.0',
 456         '74.0.3725.4',
 457         '73.0.3683.67',
 458         '73.0.3683.66',
 459         '74.0.3725.3',
 460         '74.0.3725.2',
 461         '74.0.3725.1',
 462         '74.0.3724.8',
 463         '74.0.3725.0',
 464         '73.0.3683.65',
 465         '74.0.3724.7',
 466         '74.0.3724.6',
 467         '74.0.3724.5',
 468         '74.0.3724.4',
 469         '74.0.3724.3',
 470         '74.0.3724.2',
 471         '74.0.3724.1',
 472         '74.0.3724.0',
 473         '73.0.3683.64',
 474         '74.0.3723.1',
 475         '74.0.3723.0',
 476         '73.0.3683.63',
 477         '74.0.3722.1',
 478         '74.0.3722.0',
 479         '73.0.3683.62',
 480         '74.0.3718.9',
 481         '74.0.3702.3',
 482         '74.0.3721.3',
 483         '74.0.3721.2',
 484         '74.0.3721.1',
 485         '74.0.3721.0',
 486         '74.0.3720.6',
 487         '73.0.3683.61',
 488         '72.0.3626.122',
 489         '73.0.3683.60',
 490         '74.0.3720.5',
 491         '72.0.3626.121',
 492         '74.0.3718.8',
 493         '74.0.3720.4',
 494         '74.0.3720.3',
 495         '74.0.3718.7',
 496         '74.0.3720.2',
 497         '74.0.3720.1',
 498         '74.0.3720.0',
 499         '74.0.3718.6',
 500         '74.0.3719.5',
 501         '73.0.3683.59',
 502         '74.0.3718.5',
 503         '74.0.3718.4',
 504         '74.0.3719.4',
 505         '74.0.3719.3',
 506         '74.0.3719.2',
 507         '74.0.3719.1',
 508         '73.0.3683.58',
 509         '74.0.3719.0',
 510         '73.0.3683.57',
 511         '73.0.3683.56',
 512         '74.0.3718.3',
 513         '73.0.3683.55',
 514         '74.0.3718.2',
 515         '74.0.3718.1',
 516         '74.0.3718.0',
 517         '73.0.3683.54',
 518         '74.0.3717.2',
 519         '73.0.3683.53',
 520         '74.0.3717.1',
 521         '74.0.3717.0',
 522         '73.0.3683.52',
 523         '74.0.3716.1',
 524         '74.0.3716.0',
 525         '73.0.3683.51',
 526         '74.0.3715.1',
 527         '74.0.3715.0',
 528         '73.0.3683.50',
 529         '74.0.3711.2',
 530         '74.0.3714.2',
 531         '74.0.3713.3',
 532         '74.0.3714.1',
 533         '74.0.3714.0',
 534         '73.0.3683.49',
 535         '74.0.3713.1',
 536         '74.0.3713.0',
 537         '72.0.3626.120',
 538         '73.0.3683.48',
 539         '74.0.3712.2',
 540         '74.0.3712.1',
 541         '74.0.3712.0',
 542         '73.0.3683.47',
 543         '72.0.3626.119',
 544         '73.0.3683.46',
 545         '74.0.3710.2',
 546         '72.0.3626.118',
 547         '74.0.3711.1',
 548         '74.0.3711.0',
 549         '73.0.3683.45',
 550         '72.0.3626.117',
 551         '74.0.3710.1',
 552         '74.0.3710.0',
 553         '73.0.3683.44',
 554         '72.0.3626.116',
 555         '74.0.3709.1',
 556         '74.0.3709.0',
 557         '74.0.3704.9',
 558         '73.0.3683.43',
 559         '72.0.3626.115',
 560         '74.0.3704.8',
 561         '74.0.3704.7',
 562         '74.0.3708.0',
 563         '74.0.3706.7',
 564         '74.0.3704.6',
 565         '73.0.3683.42',
 566         '72.0.3626.114',
 567         '74.0.3706.6',
 568         '72.0.3626.113',
 569         '74.0.3704.5',
 570         '74.0.3706.5',
 571         '74.0.3706.4',
 572         '74.0.3706.3',
 573         '74.0.3706.2',
 574         '74.0.3706.1',
 575         '74.0.3706.0',
 576         '73.0.3683.41',
 577         '72.0.3626.112',
 578         '74.0.3705.1',
 579         '74.0.3705.0',
 580         '73.0.3683.40',
 581         '72.0.3626.111',
 582         '73.0.3683.39',
 583         '74.0.3704.4',
 584         '73.0.3683.38',
 585         '74.0.3704.3',
 586         '74.0.3704.2',
 587         '74.0.3704.1',
 588         '74.0.3704.0',
 589         '73.0.3683.37',
 590         '72.0.3626.110',
 591         '72.0.3626.109',
 592         '74.0.3703.3',
 593         '74.0.3703.2',
 594         '73.0.3683.36',
 595         '74.0.3703.1',
 596         '74.0.3703.0',
 597         '73.0.3683.35',
 598         '72.0.3626.108',
 599         '74.0.3702.2',
 600         '74.0.3699.3',
 601         '74.0.3702.1',
 602         '74.0.3702.0',
 603         '73.0.3683.34',
 604         '72.0.3626.107',
 605         '73.0.3683.33',
 606         '74.0.3701.1',
 607         '74.0.3701.0',
 608         '73.0.3683.32',
 609         '73.0.3683.31',
 610         '72.0.3626.105',
 611         '74.0.3700.1',
 612         '74.0.3700.0',
 613         '73.0.3683.29',
 614         '72.0.3626.103',
 615         '74.0.3699.2',
 616         '74.0.3699.1',
 617         '74.0.3699.0',
 618         '73.0.3683.28',
 619         '72.0.3626.102',
 620         '73.0.3683.27',
 621         '73.0.3683.26',
 622         '74.0.3698.0',
 623         '74.0.3696.2',
 624         '72.0.3626.101',
 625         '73.0.3683.25',
 626         '74.0.3696.1',
 627         '74.0.3696.0',
 628         '74.0.3694.8',
 629         '72.0.3626.100',
 630         '74.0.3694.7',
 631         '74.0.3694.6',
 632         '74.0.3694.5',
 633         '74.0.3694.4',
 634         '72.0.3626.99',
 635         '72.0.3626.98',
 636         '74.0.3694.3',
 637         '73.0.3683.24',
 638         '72.0.3626.97',
 639         '72.0.3626.96',
 640         '72.0.3626.95',
 641         '73.0.3683.23',
 642         '72.0.3626.94',
 643         '73.0.3683.22',
 644         '73.0.3683.21',
 645         '72.0.3626.93',
 646         '74.0.3694.2',
 647         '72.0.3626.92',
 648         '74.0.3694.1',
 649         '74.0.3694.0',
 650         '74.0.3693.6',
 651         '73.0.3683.20',
 652         '72.0.3626.91',
 653         '74.0.3693.5',
 654         '74.0.3693.4',
 655         '74.0.3693.3',
 656         '74.0.3693.2',
 657         '73.0.3683.19',
 658         '74.0.3693.1',
 659         '74.0.3693.0',
 660         '73.0.3683.18',
 661         '72.0.3626.90',
 662         '74.0.3692.1',
 663         '74.0.3692.0',
 664         '73.0.3683.17',
 665         '72.0.3626.89',
 666         '74.0.3687.3',
 667         '74.0.3691.1',
 668         '74.0.3691.0',
 669         '73.0.3683.16',
 670         '72.0.3626.88',
 671         '72.0.3626.87',
 672         '73.0.3683.15',
 673         '74.0.3690.1',
 674         '74.0.3690.0',
 675         '73.0.3683.14',
 676         '72.0.3626.86',
 677         '73.0.3683.13',
 678         '73.0.3683.12',
 679         '74.0.3689.1',
 680         '74.0.3689.0',
 681         '73.0.3683.11',
 682         '72.0.3626.85',
 683         '73.0.3683.10',
 684         '72.0.3626.84',
 685         '73.0.3683.9',
 686         '74.0.3688.1',
 687         '74.0.3688.0',
 688         '73.0.3683.8',
 689         '72.0.3626.83',
 690         '74.0.3687.2',
 691         '74.0.3687.1',
 692         '74.0.3687.0',
 693         '73.0.3683.7',
 694         '72.0.3626.82',
 695         '74.0.3686.4',
 696         '72.0.3626.81',
 697         '74.0.3686.3',
 698         '74.0.3686.2',
 699         '74.0.3686.1',
 700         '74.0.3686.0',
 701         '73.0.3683.6',
 702         '72.0.3626.80',
 703         '74.0.3685.1',
 704         '74.0.3685.0',
 705         '73.0.3683.5',
 706         '72.0.3626.79',
 707         '74.0.3684.1',
 708         '74.0.3684.0',
 709         '73.0.3683.4',
 710         '72.0.3626.78',
 711         '72.0.3626.77',
 712         '73.0.3683.3',
 713         '73.0.3683.2',
 714         '72.0.3626.76',
 715         '73.0.3683.1',
 716         '73.0.3683.0',
 717         '72.0.3626.75',
 718         '71.0.3578.141',
 719         '73.0.3682.1',
 720         '73.0.3682.0',
 721         '72.0.3626.74',
 722         '71.0.3578.140',
 723         '73.0.3681.4',
 724         '73.0.3681.3',
 725         '73.0.3681.2',
 726         '73.0.3681.1',
 727         '73.0.3681.0',
 728         '72.0.3626.73',
 729         '71.0.3578.139',
 730         '72.0.3626.72',
 731         '72.0.3626.71',
 732         '73.0.3680.1',
 733         '73.0.3680.0',
 734         '72.0.3626.70',
 735         '71.0.3578.138',
 736         '73.0.3678.2',
 737         '73.0.3679.1',
 738         '73.0.3679.0',
 739         '72.0.3626.69',
 740         '71.0.3578.137',
 741         '73.0.3678.1',
 742         '73.0.3678.0',
 743         '71.0.3578.136',
 744         '73.0.3677.1',
 745         '73.0.3677.0',
 746         '72.0.3626.68',
 747         '72.0.3626.67',
 748         '71.0.3578.135',
 749         '73.0.3676.1',
 750         '73.0.3676.0',
 751         '73.0.3674.2',
 752         '72.0.3626.66',
 753         '71.0.3578.134',
 754         '73.0.3674.1',
 755         '73.0.3674.0',
 756         '72.0.3626.65',
 757         '71.0.3578.133',
 758         '73.0.3673.2',
 759         '73.0.3673.1',
 760         '73.0.3673.0',
 761         '72.0.3626.64',
 762         '71.0.3578.132',
 763         '72.0.3626.63',
 764         '72.0.3626.62',
 765         '72.0.3626.61',
 766         '72.0.3626.60',
 767         '73.0.3672.1',
 768         '73.0.3672.0',
 769         '72.0.3626.59',
 770         '71.0.3578.131',
 771         '73.0.3671.3',
 772         '73.0.3671.2',
 773         '73.0.3671.1',
 774         '73.0.3671.0',
 775         '72.0.3626.58',
 776         '71.0.3578.130',
 777         '73.0.3670.1',
 778         '73.0.3670.0',
 779         '72.0.3626.57',
 780         '71.0.3578.129',
 781         '73.0.3669.1',
 782         '73.0.3669.0',
 783         '72.0.3626.56',
 784         '71.0.3578.128',
 785         '73.0.3668.2',
 786         '73.0.3668.1',
 787         '73.0.3668.0',
 788         '72.0.3626.55',
 789         '71.0.3578.127',
 790         '73.0.3667.2',
 791         '73.0.3667.1',
 792         '73.0.3667.0',
 793         '72.0.3626.54',
 794         '71.0.3578.126',
 795         '73.0.3666.1',
 796         '73.0.3666.0',
 797         '72.0.3626.53',
 798         '71.0.3578.125',
 799         '73.0.3665.4',
 800         '73.0.3665.3',
 801         '72.0.3626.52',
 802         '73.0.3665.2',
 803         '73.0.3664.4',
 804         '73.0.3665.1',
 805         '73.0.3665.0',
 806         '72.0.3626.51',
 807         '71.0.3578.124',
 808         '72.0.3626.50',
 809         '73.0.3664.3',
 810         '73.0.3664.2',
 811         '73.0.3664.1',
 812         '73.0.3664.0',
 813         '73.0.3663.2',
 814         '72.0.3626.49',
 815         '71.0.3578.123',
 816         '73.0.3663.1',
 817         '73.0.3663.0',
 818         '72.0.3626.48',
 819         '71.0.3578.122',
 820         '73.0.3662.1',
 821         '73.0.3662.0',
 822         '72.0.3626.47',
 823         '71.0.3578.121',
 824         '73.0.3661.1',
 825         '72.0.3626.46',
 826         '73.0.3661.0',
 827         '72.0.3626.45',
 828         '71.0.3578.120',
 829         '73.0.3660.2',
 830         '73.0.3660.1',
 831         '73.0.3660.0',
 832         '72.0.3626.44',
 833         '71.0.3578.119',
 834         '73.0.3659.1',
 835         '73.0.3659.0',
 836         '72.0.3626.43',
 837         '71.0.3578.118',
 838         '73.0.3658.1',
 839         '73.0.3658.0',
 840         '72.0.3626.42',
 841         '71.0.3578.117',
 842         '73.0.3657.1',
 843         '73.0.3657.0',
 844         '72.0.3626.41',
 845         '71.0.3578.116',
 846         '73.0.3656.1',
 847         '73.0.3656.0',
 848         '72.0.3626.40',
 849         '71.0.3578.115',
 850         '73.0.3655.1',
 851         '73.0.3655.0',
 852         '72.0.3626.39',
 853         '71.0.3578.114',
 854         '73.0.3654.1',
 855         '73.0.3654.0',
 856         '72.0.3626.38',
 857         '71.0.3578.113',
 858         '73.0.3653.1',
 859         '73.0.3653.0',
 860         '72.0.3626.37',
 861         '71.0.3578.112',
 862         '73.0.3652.1',
 863         '73.0.3652.0',
 864         '72.0.3626.36',
 865         '71.0.3578.111',
 866         '73.0.3651.1',
 867         '73.0.3651.0',
 868         '72.0.3626.35',
 869         '71.0.3578.110',
 870         '73.0.3650.1',
 871         '73.0.3650.0',
 872         '72.0.3626.34',
 873         '71.0.3578.109',
 874         '73.0.3649.1',
 875         '73.0.3649.0',
 876         '72.0.3626.33',
 877         '71.0.3578.108',
 878         '73.0.3648.2',
 879         '73.0.3648.1',
 880         '73.0.3648.0',
 881         '72.0.3626.32',
 882         '71.0.3578.107',
 883         '73.0.3647.2',
 884         '73.0.3647.1',
 885         '73.0.3647.0',
 886         '72.0.3626.31',
 887         '71.0.3578.106',
 888         '73.0.3635.3',
 889         '73.0.3646.2',
 890         '73.0.3646.1',
 891         '73.0.3646.0',
 892         '72.0.3626.30',
 893         '71.0.3578.105',
 894         '72.0.3626.29',
 895         '73.0.3645.2',
 896         '73.0.3645.1',
 897         '73.0.3645.0',
 898         '72.0.3626.28',
 899         '71.0.3578.104',
 900         '72.0.3626.27',
 901         '72.0.3626.26',
 902         '72.0.3626.25',
 903         '72.0.3626.24',
 904         '73.0.3644.0',
 905         '73.0.3643.2',
 906         '72.0.3626.23',
 907         '71.0.3578.103',
 908         '73.0.3643.1',
 909         '73.0.3643.0',
 910         '72.0.3626.22',
 911         '71.0.3578.102',
 912         '73.0.3642.1',
 913         '73.0.3642.0',
 914         '72.0.3626.21',
 915         '71.0.3578.101',
 916         '73.0.3641.1',
 917         '73.0.3641.0',
 918         '72.0.3626.20',
 919         '71.0.3578.100',
 920         '72.0.3626.19',
 921         '73.0.3640.1',
 922         '73.0.3640.0',
 923         '72.0.3626.18',
 924         '73.0.3639.1',
 925         '71.0.3578.99',
 926         '73.0.3639.0',
 927         '72.0.3626.17',
 928         '73.0.3638.2',
 929         '72.0.3626.16',
 930         '73.0.3638.1',
 931         '73.0.3638.0',
 932         '72.0.3626.15',
 933         '71.0.3578.98',
 934         '73.0.3635.2',
 935         '71.0.3578.97',
 936         '73.0.3637.1',
 937         '73.0.3637.0',
 938         '72.0.3626.14',
 939         '71.0.3578.96',
 940         '71.0.3578.95',
 941         '72.0.3626.13',
 942         '71.0.3578.94',
 943         '73.0.3636.2',
 944         '71.0.3578.93',
 945         '73.0.3636.1',
 946         '73.0.3636.0',
 947         '72.0.3626.12',
 948         '71.0.3578.92',
 949         '73.0.3635.1',
 950         '73.0.3635.0',
 951         '72.0.3626.11',
 952         '71.0.3578.91',
 953         '73.0.3634.2',
 954         '73.0.3634.1',
 955         '73.0.3634.0',
 956         '72.0.3626.10',
 957         '71.0.3578.90',
 958         '71.0.3578.89',
 959         '73.0.3633.2',
 960         '73.0.3633.1',
 961         '73.0.3633.0',
 962         '72.0.3610.4',
 963         '72.0.3626.9',
 964         '71.0.3578.88',
 965         '73.0.3632.5',
 966         '73.0.3632.4',
 967         '73.0.3632.3',
 968         '73.0.3632.2',
 969         '73.0.3632.1',
 970         '73.0.3632.0',
 971         '72.0.3626.8',
 972         '71.0.3578.87',
 973         '73.0.3631.2',
 974         '73.0.3631.1',
 975         '73.0.3631.0',
 976         '72.0.3626.7',
 977         '71.0.3578.86',
 978         '72.0.3626.6',
 979         '73.0.3630.1',
 980         '73.0.3630.0',
 981         '72.0.3626.5',
 982         '71.0.3578.85',
 983         '72.0.3626.4',
 984         '73.0.3628.3',
 985         '73.0.3628.2',
 986         '73.0.3629.1',
 987         '73.0.3629.0',
 988         '72.0.3626.3',
 989         '71.0.3578.84',
 990         '73.0.3628.1',
 991         '73.0.3628.0',
 992         '71.0.3578.83',
 993         '73.0.3627.1',
 994         '73.0.3627.0',
 995         '72.0.3626.2',
 996         '71.0.3578.82',
 997         '71.0.3578.81',
 998         '71.0.3578.80',
 999         '72.0.3626.1',
1000         '72.0.3626.0',
1001         '71.0.3578.79',
1002         '70.0.3538.124',
1003         '71.0.3578.78',
1004         '72.0.3623.4',
1005         '72.0.3625.2',
1006         '72.0.3625.1',
1007         '72.0.3625.0',
1008         '71.0.3578.77',
1009         '70.0.3538.123',
1010         '72.0.3624.4',
1011         '72.0.3624.3',
1012         '72.0.3624.2',
1013         '71.0.3578.76',
1014         '72.0.3624.1',
1015         '72.0.3624.0',
1016         '72.0.3623.3',
1017         '71.0.3578.75',
1018         '70.0.3538.122',
1019         '71.0.3578.74',
1020         '72.0.3623.2',
1021         '72.0.3610.3',
1022         '72.0.3623.1',
1023         '72.0.3623.0',
1024         '72.0.3622.3',
1025         '72.0.3622.2',
1026         '71.0.3578.73',
1027         '70.0.3538.121',
1028         '72.0.3622.1',
1029         '72.0.3622.0',
1030         '71.0.3578.72',
1031         '70.0.3538.120',
1032         '72.0.3621.1',
1033         '72.0.3621.0',
1034         '71.0.3578.71',
1035         '70.0.3538.119',
1036         '72.0.3620.1',
1037         '72.0.3620.0',
1038         '71.0.3578.70',
1039         '70.0.3538.118',
1040         '71.0.3578.69',
1041         '72.0.3619.1',
1042         '72.0.3619.0',
1043         '71.0.3578.68',
1044         '70.0.3538.117',
1045         '71.0.3578.67',
1046         '72.0.3618.1',
1047         '72.0.3618.0',
1048         '71.0.3578.66',
1049         '70.0.3538.116',
1050         '72.0.3617.1',
1051         '72.0.3617.0',
1052         '71.0.3578.65',
1053         '70.0.3538.115',
1054         '72.0.3602.3',
1055         '71.0.3578.64',
1056         '72.0.3616.1',
1057         '72.0.3616.0',
1058         '71.0.3578.63',
1059         '70.0.3538.114',
1060         '71.0.3578.62',
1061         '72.0.3615.1',
1062         '72.0.3615.0',
1063         '71.0.3578.61',
1064         '70.0.3538.113',
1065         '72.0.3614.1',
1066         '72.0.3614.0',
1067         '71.0.3578.60',
1068         '70.0.3538.112',
1069         '72.0.3613.1',
1070         '72.0.3613.0',
1071         '71.0.3578.59',
1072         '70.0.3538.111',
1073         '72.0.3612.2',
1074         '72.0.3612.1',
1075         '72.0.3612.0',
1076         '70.0.3538.110',
1077         '71.0.3578.58',
1078         '70.0.3538.109',
1079         '72.0.3611.2',
1080         '72.0.3611.1',
1081         '72.0.3611.0',
1082         '71.0.3578.57',
1083         '70.0.3538.108',
1084         '72.0.3610.2',
1085         '71.0.3578.56',
1086         '71.0.3578.55',
1087         '72.0.3610.1',
1088         '72.0.3610.0',
1089         '71.0.3578.54',
1090         '70.0.3538.107',
1091         '71.0.3578.53',
1092         '72.0.3609.3',
1093         '71.0.3578.52',
1094         '72.0.3609.2',
1095         '71.0.3578.51',
1096         '72.0.3608.5',
1097         '72.0.3609.1',
1098         '72.0.3609.0',
1099         '71.0.3578.50',
1100         '70.0.3538.106',
1101         '72.0.3608.4',
1102         '72.0.3608.3',
1103         '72.0.3608.2',
1104         '71.0.3578.49',
1105         '72.0.3608.1',
1106         '72.0.3608.0',
1107         '70.0.3538.105',
1108         '71.0.3578.48',
1109         '72.0.3607.1',
1110         '72.0.3607.0',
1111         '71.0.3578.47',
1112         '70.0.3538.104',
1113         '72.0.3606.2',
1114         '72.0.3606.1',
1115         '72.0.3606.0',
1116         '71.0.3578.46',
1117         '70.0.3538.103',
1118         '70.0.3538.102',
1119         '72.0.3605.3',
1120         '72.0.3605.2',
1121         '72.0.3605.1',
1122         '72.0.3605.0',
1123         '71.0.3578.45',
1124         '70.0.3538.101',
1125         '71.0.3578.44',
1126         '71.0.3578.43',
1127         '70.0.3538.100',
1128         '70.0.3538.99',
1129         '71.0.3578.42',
1130         '72.0.3604.1',
1131         '72.0.3604.0',
1132         '71.0.3578.41',
1133         '70.0.3538.98',
1134         '71.0.3578.40',
1135         '72.0.3603.2',
1136         '72.0.3603.1',
1137         '72.0.3603.0',
1138         '71.0.3578.39',
1139         '70.0.3538.97',
1140         '72.0.3602.2',
1141         '71.0.3578.38',
1142         '71.0.3578.37',
1143         '72.0.3602.1',
1144         '72.0.3602.0',
1145         '71.0.3578.36',
1146         '70.0.3538.96',
1147         '72.0.3601.1',
1148         '72.0.3601.0',
1149         '71.0.3578.35',
1150         '70.0.3538.95',
1151         '72.0.3600.1',
1152         '72.0.3600.0',
1153         '71.0.3578.34',
1154         '70.0.3538.94',
1155         '72.0.3599.3',
1156         '72.0.3599.2',
1157         '72.0.3599.1',
1158         '72.0.3599.0',
1159         '71.0.3578.33',
1160         '70.0.3538.93',
1161         '72.0.3598.1',
1162         '72.0.3598.0',
1163         '71.0.3578.32',
1164         '70.0.3538.87',
1165         '72.0.3597.1',
1166         '72.0.3597.0',
1167         '72.0.3596.2',
1168         '71.0.3578.31',
1169         '70.0.3538.86',
1170         '71.0.3578.30',
1171         '71.0.3578.29',
1172         '72.0.3596.1',
1173         '72.0.3596.0',
1174         '71.0.3578.28',
1175         '70.0.3538.85',
1176         '72.0.3595.2',
1177         '72.0.3591.3',
1178         '72.0.3595.1',
1179         '72.0.3595.0',
1180         '71.0.3578.27',
1181         '70.0.3538.84',
1182         '72.0.3594.1',
1183         '72.0.3594.0',
1184         '71.0.3578.26',
1185         '70.0.3538.83',
1186         '72.0.3593.2',
1187         '72.0.3593.1',
1188         '72.0.3593.0',
1189         '71.0.3578.25',
1190         '70.0.3538.82',
1191         '72.0.3589.3',
1192         '72.0.3592.2',
1193         '72.0.3592.1',
1194         '72.0.3592.0',
1195         '71.0.3578.24',
1196         '72.0.3589.2',
1197         '70.0.3538.81',
1198         '70.0.3538.80',
1199         '72.0.3591.2',
1200         '72.0.3591.1',
1201         '72.0.3591.0',
1202         '71.0.3578.23',
1203         '70.0.3538.79',
1204         '71.0.3578.22',
1205         '72.0.3590.1',
1206         '72.0.3590.0',
1207         '71.0.3578.21',
1208         '70.0.3538.78',
1209         '70.0.3538.77',
1210         '72.0.3589.1',
1211         '72.0.3589.0',
1212         '71.0.3578.20',
1213         '70.0.3538.76',
1214         '71.0.3578.19',
1215         '70.0.3538.75',
1216         '72.0.3588.1',
1217         '72.0.3588.0',
1218         '71.0.3578.18',
1219         '70.0.3538.74',
1220         '72.0.3586.2',
1221         '72.0.3587.0',
1222         '71.0.3578.17',
1223         '70.0.3538.73',
1224         '72.0.3586.1',
1225         '72.0.3586.0',
1226         '71.0.3578.16',
1227         '70.0.3538.72',
1228         '72.0.3585.1',
1229         '72.0.3585.0',
1230         '71.0.3578.15',
1231         '70.0.3538.71',
1232         '71.0.3578.14',
1233         '72.0.3584.1',
1234         '72.0.3584.0',
1235         '71.0.3578.13',
1236         '70.0.3538.70',
1237         '72.0.3583.2',
1238         '71.0.3578.12',
1239         '72.0.3583.1',
1240         '72.0.3583.0',
1241         '71.0.3578.11',
1242         '70.0.3538.69',
1243         '71.0.3578.10',
1244         '72.0.3582.0',
1245         '72.0.3581.4',
1246         '71.0.3578.9',
1247         '70.0.3538.67',
1248         '72.0.3581.3',
1249         '72.0.3581.2',
1250         '72.0.3581.1',
1251         '72.0.3581.0',
1252         '71.0.3578.8',
1253         '70.0.3538.66',
1254         '72.0.3580.1',
1255         '72.0.3580.0',
1256         '71.0.3578.7',
1257         '70.0.3538.65',
1258         '71.0.3578.6',
1259         '72.0.3579.1',
1260         '72.0.3579.0',
1261         '71.0.3578.5',
1262         '70.0.3538.64',
1263         '71.0.3578.4',
1264         '71.0.3578.3',
1265         '71.0.3578.2',
1266         '71.0.3578.1',
1267         '71.0.3578.0',
1268         '70.0.3538.63',
1269         '69.0.3497.128',
1270         '70.0.3538.62',
1271         '70.0.3538.61',
1272         '70.0.3538.60',
1273         '70.0.3538.59',
1274         '71.0.3577.1',
1275         '71.0.3577.0',
1276         '70.0.3538.58',
1277         '69.0.3497.127',
1278         '71.0.3576.2',
1279         '71.0.3576.1',
1280         '71.0.3576.0',
1281         '70.0.3538.57',
1282         '70.0.3538.56',
1283         '71.0.3575.2',
1284         '70.0.3538.55',
1285         '69.0.3497.126',
1286         '70.0.3538.54',
1287         '71.0.3575.1',
1288         '71.0.3575.0',
1289         '71.0.3574.1',
1290         '71.0.3574.0',
1291         '70.0.3538.53',
1292         '69.0.3497.125',
1293         '70.0.3538.52',
1294         '71.0.3573.1',
1295         '71.0.3573.0',
1296         '70.0.3538.51',
1297         '69.0.3497.124',
1298         '71.0.3572.1',
1299         '71.0.3572.0',
1300         '70.0.3538.50',
1301         '69.0.3497.123',
1302         '71.0.3571.2',
1303         '70.0.3538.49',
1304         '69.0.3497.122',
1305         '71.0.3571.1',
1306         '71.0.3571.0',
1307         '70.0.3538.48',
1308         '69.0.3497.121',
1309         '71.0.3570.1',
1310         '71.0.3570.0',
1311         '70.0.3538.47',
1312         '69.0.3497.120',
1313         '71.0.3568.2',
1314         '71.0.3569.1',
1315         '71.0.3569.0',
1316         '70.0.3538.46',
1317         '69.0.3497.119',
1318         '70.0.3538.45',
1319         '71.0.3568.1',
1320         '71.0.3568.0',
1321         '70.0.3538.44',
1322         '69.0.3497.118',
1323         '70.0.3538.43',
1324         '70.0.3538.42',
1325         '71.0.3567.1',
1326         '71.0.3567.0',
1327         '70.0.3538.41',
1328         '69.0.3497.117',
1329         '71.0.3566.1',
1330         '71.0.3566.0',
1331         '70.0.3538.40',
1332         '69.0.3497.116',
1333         '71.0.3565.1',
1334         '71.0.3565.0',
1335         '70.0.3538.39',
1336         '69.0.3497.115',
1337         '71.0.3564.1',
1338         '71.0.3564.0',
1339         '70.0.3538.38',
1340         '69.0.3497.114',
1341         '71.0.3563.0',
1342         '71.0.3562.2',
1343         '70.0.3538.37',
1344         '69.0.3497.113',
1345         '70.0.3538.36',
1346         '70.0.3538.35',
1347         '71.0.3562.1',
1348         '71.0.3562.0',
1349         '70.0.3538.34',
1350         '69.0.3497.112',
1351         '70.0.3538.33',
1352         '71.0.3561.1',
1353         '71.0.3561.0',
1354         '70.0.3538.32',
1355         '69.0.3497.111',
1356         '71.0.3559.6',
1357         '71.0.3560.1',
1358         '71.0.3560.0',
1359         '71.0.3559.5',
1360         '71.0.3559.4',
1361         '70.0.3538.31',
1362         '69.0.3497.110',
1363         '71.0.3559.3',
1364         '70.0.3538.30',
1365         '69.0.3497.109',
1366         '71.0.3559.2',
1367         '71.0.3559.1',
1368         '71.0.3559.0',
1369         '70.0.3538.29',
1370         '69.0.3497.108',
1371         '71.0.3558.2',
1372         '71.0.3558.1',
1373         '71.0.3558.0',
1374         '70.0.3538.28',
1375         '69.0.3497.107',
1376         '71.0.3557.2',
1377         '71.0.3557.1',
1378         '71.0.3557.0',
1379         '70.0.3538.27',
1380         '69.0.3497.106',
1381         '71.0.3554.4',
1382         '70.0.3538.26',
1383         '71.0.3556.1',
1384         '71.0.3556.0',
1385         '70.0.3538.25',
1386         '71.0.3554.3',
1387         '69.0.3497.105',
1388         '71.0.3554.2',
1389         '70.0.3538.24',
1390         '69.0.3497.104',
1391         '71.0.3555.2',
1392         '70.0.3538.23',
1393         '71.0.3555.1',
1394         '71.0.3555.0',
1395         '70.0.3538.22',
1396         '69.0.3497.103',
1397         '71.0.3554.1',
1398         '71.0.3554.0',
1399         '70.0.3538.21',
1400         '69.0.3497.102',
1401         '71.0.3553.3',
1402         '70.0.3538.20',
1403         '69.0.3497.101',
1404         '71.0.3553.2',
1405         '69.0.3497.100',
1406         '71.0.3553.1',
1407         '71.0.3553.0',
1408         '70.0.3538.19',
1409         '69.0.3497.99',
1410         '69.0.3497.98',
1411         '69.0.3497.97',
1412         '71.0.3552.6',
1413         '71.0.3552.5',
1414         '71.0.3552.4',
1415         '71.0.3552.3',
1416         '71.0.3552.2',
1417         '71.0.3552.1',
1418         '71.0.3552.0',
1419         '70.0.3538.18',
1420         '69.0.3497.96',
1421         '71.0.3551.3',
1422         '71.0.3551.2',
1423         '71.0.3551.1',
1424         '71.0.3551.0',
1425         '70.0.3538.17',
1426         '69.0.3497.95',
1427         '71.0.3550.3',
1428         '71.0.3550.2',
1429         '71.0.3550.1',
1430         '71.0.3550.0',
1431         '70.0.3538.16',
1432         '69.0.3497.94',
1433         '71.0.3549.1',
1434         '71.0.3549.0',
1435         '70.0.3538.15',
1436         '69.0.3497.93',
1437         '69.0.3497.92',
1438         '71.0.3548.1',
1439         '71.0.3548.0',
1440         '70.0.3538.14',
1441         '69.0.3497.91',
1442         '71.0.3547.1',
1443         '71.0.3547.0',
1444         '70.0.3538.13',
1445         '69.0.3497.90',
1446         '71.0.3546.2',
1447         '69.0.3497.89',
1448         '71.0.3546.1',
1449         '71.0.3546.0',
1450         '70.0.3538.12',
1451         '69.0.3497.88',
1452         '71.0.3545.4',
1453         '71.0.3545.3',
1454         '71.0.3545.2',
1455         '71.0.3545.1',
1456         '71.0.3545.0',
1457         '70.0.3538.11',
1458         '69.0.3497.87',
1459         '71.0.3544.5',
1460         '71.0.3544.4',
1461         '71.0.3544.3',
1462         '71.0.3544.2',
1463         '71.0.3544.1',
1464         '71.0.3544.0',
1465         '69.0.3497.86',
1466         '70.0.3538.10',
1467         '69.0.3497.85',
1468         '70.0.3538.9',
1469         '69.0.3497.84',
1470         '71.0.3543.4',
1471         '70.0.3538.8',
1472         '71.0.3543.3',
1473         '71.0.3543.2',
1474         '71.0.3543.1',
1475         '71.0.3543.0',
1476         '70.0.3538.7',
1477         '69.0.3497.83',
1478         '71.0.3542.2',
1479         '71.0.3542.1',
1480         '71.0.3542.0',
1481         '70.0.3538.6',
1482         '69.0.3497.82',
1483         '69.0.3497.81',
1484         '71.0.3541.1',
1485         '71.0.3541.0',
1486         '70.0.3538.5',
1487         '69.0.3497.80',
1488         '71.0.3540.1',
1489         '71.0.3540.0',
1490         '70.0.3538.4',
1491         '69.0.3497.79',
1492         '70.0.3538.3',
1493         '71.0.3539.1',
1494         '71.0.3539.0',
1495         '69.0.3497.78',
1496         '68.0.3440.134',
1497         '69.0.3497.77',
1498         '70.0.3538.2',
1499         '70.0.3538.1',
1500         '70.0.3538.0',
1501         '69.0.3497.76',
1502         '68.0.3440.133',
1503         '69.0.3497.75',
1504         '70.0.3537.2',
1505         '70.0.3537.1',
1506         '70.0.3537.0',
1507         '69.0.3497.74',
1508         '68.0.3440.132',
1509         '70.0.3536.0',
1510         '70.0.3535.5',
1511         '70.0.3535.4',
1512         '70.0.3535.3',
1513         '69.0.3497.73',
1514         '68.0.3440.131',
1515         '70.0.3532.8',
1516         '70.0.3532.7',
1517         '69.0.3497.72',
1518         '69.0.3497.71',
1519         '70.0.3535.2',
1520         '70.0.3535.1',
1521         '70.0.3535.0',
1522         '69.0.3497.70',
1523         '68.0.3440.130',
1524         '69.0.3497.69',
1525         '68.0.3440.129',
1526         '70.0.3534.4',
1527         '70.0.3534.3',
1528         '70.0.3534.2',
1529         '70.0.3534.1',
1530         '70.0.3534.0',
1531         '69.0.3497.68',
1532         '68.0.3440.128',
1533         '70.0.3533.2',
1534         '70.0.3533.1',
1535         '70.0.3533.0',
1536         '69.0.3497.67',
1537         '68.0.3440.127',
1538         '70.0.3532.6',
1539         '70.0.3532.5',
1540         '70.0.3532.4',
1541         '69.0.3497.66',
1542         '68.0.3440.126',
1543         '70.0.3532.3',
1544         '70.0.3532.2',
1545         '70.0.3532.1',
1546         '69.0.3497.60',
1547         '69.0.3497.65',
1548         '69.0.3497.64',
1549         '70.0.3532.0',
1550         '70.0.3531.0',
1551         '70.0.3530.4',
1552         '70.0.3530.3',
1553         '70.0.3530.2',
1554         '69.0.3497.58',
1555         '68.0.3440.125',
1556         '69.0.3497.57',
1557         '69.0.3497.56',
1558         '69.0.3497.55',
1559         '69.0.3497.54',
1560         '70.0.3530.1',
1561         '70.0.3530.0',
1562         '69.0.3497.53',
1563         '68.0.3440.124',
1564         '69.0.3497.52',
1565         '70.0.3529.3',
1566         '70.0.3529.2',
1567         '70.0.3529.1',
1568         '70.0.3529.0',
1569         '69.0.3497.51',
1570         '70.0.3528.4',
1571         '68.0.3440.123',
1572         '70.0.3528.3',
1573         '70.0.3528.2',
1574         '70.0.3528.1',
1575         '70.0.3528.0',
1576         '69.0.3497.50',
1577         '68.0.3440.122',
1578         '70.0.3527.1',
1579         '70.0.3527.0',
1580         '69.0.3497.49',
1581         '68.0.3440.121',
1582         '70.0.3526.1',
1583         '70.0.3526.0',
1584         '68.0.3440.120',
1585         '69.0.3497.48',
1586         '69.0.3497.47',
1587         '68.0.3440.119',
1588         '68.0.3440.118',
1589         '70.0.3525.5',
1590         '70.0.3525.4',
1591         '70.0.3525.3',
1592         '68.0.3440.117',
1593         '69.0.3497.46',
1594         '70.0.3525.2',
1595         '70.0.3525.1',
1596         '70.0.3525.0',
1597         '69.0.3497.45',
1598         '68.0.3440.116',
1599         '70.0.3524.4',
1600         '70.0.3524.3',
1601         '69.0.3497.44',
1602         '70.0.3524.2',
1603         '70.0.3524.1',
1604         '70.0.3524.0',
1605         '70.0.3523.2',
1606         '69.0.3497.43',
1607         '68.0.3440.115',
1608         '70.0.3505.9',
1609         '69.0.3497.42',
1610         '70.0.3505.8',
1611         '70.0.3523.1',
1612         '70.0.3523.0',
1613         '69.0.3497.41',
1614         '68.0.3440.114',
1615         '70.0.3505.7',
1616         '69.0.3497.40',
1617         '70.0.3522.1',
1618         '70.0.3522.0',
1619         '70.0.3521.2',
1620         '69.0.3497.39',
1621         '68.0.3440.113',
1622         '70.0.3505.6',
1623         '70.0.3521.1',
1624         '70.0.3521.0',
1625         '69.0.3497.38',
1626         '68.0.3440.112',
1627         '70.0.3520.1',
1628         '70.0.3520.0',
1629         '69.0.3497.37',
1630         '68.0.3440.111',
1631         '70.0.3519.3',
1632         '70.0.3519.2',
1633         '70.0.3519.1',
1634         '70.0.3519.0',
1635         '69.0.3497.36',
1636         '68.0.3440.110',
1637         '70.0.3518.1',
1638         '70.0.3518.0',
1639         '69.0.3497.35',
1640         '69.0.3497.34',
1641         '68.0.3440.109',
1642         '70.0.3517.1',
1643         '70.0.3517.0',
1644         '69.0.3497.33',
1645         '68.0.3440.108',
1646         '69.0.3497.32',
1647         '70.0.3516.3',
1648         '70.0.3516.2',
1649         '70.0.3516.1',
1650         '70.0.3516.0',
1651         '69.0.3497.31',
1652         '68.0.3440.107',
1653         '70.0.3515.4',
1654         '68.0.3440.106',
1655         '70.0.3515.3',
1656         '70.0.3515.2',
1657         '70.0.3515.1',
1658         '70.0.3515.0',
1659         '69.0.3497.30',
1660         '68.0.3440.105',
1661         '68.0.3440.104',
1662         '70.0.3514.2',
1663         '70.0.3514.1',
1664         '70.0.3514.0',
1665         '69.0.3497.29',
1666         '68.0.3440.103',
1667         '70.0.3513.1',
1668         '70.0.3513.0',
1669         '69.0.3497.28',
1670     )
1671     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675     'User-Agent': random_user_agent(),
1676     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678     'Accept-Encoding': 'gzip, deflate',
1679     'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691     'January', 'February', 'March', 'April', 'May', 'June',
1692     'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695     'en': ENGLISH_MONTH_NAMES,
1696     'fr': [
1697         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703     'flv', 'f4v', 'f4a', 'f4b',
1704     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705     'mkv', 'mka', 'mk3d',
1706     'avi', 'divx',
1707     'mov',
1708     'asf', 'wmv', 'wma',
1709     '3gp', '3g2',
1710     'mp3',
1711     'flac',
1712     'ape',
1713     'wav',
1714     'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722     '%d %B %Y',
1723     '%d %b %Y',
1724     '%B %d %Y',
1725     '%B %dst %Y',
1726     '%B %dnd %Y',
1727     '%B %drd %Y',
1728     '%B %dth %Y',
1729     '%b %d %Y',
1730     '%b %dst %Y',
1731     '%b %dnd %Y',
1732     '%b %drd %Y',
1733     '%b %dth %Y',
1734     '%b %dst %Y %I:%M',
1735     '%b %dnd %Y %I:%M',
1736     '%b %drd %Y %I:%M',
1737     '%b %dth %Y %I:%M',
1738     '%Y %m %d',
1739     '%Y-%m-%d',
1740     '%Y/%m/%d',
1741     '%Y/%m/%d %H:%M',
1742     '%Y/%m/%d %H:%M:%S',
1743     '%Y-%m-%d %H:%M',
1744     '%Y-%m-%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M:%S.%f',
1746     '%d.%m.%Y %H:%M',
1747     '%d.%m.%Y %H.%M',
1748     '%Y-%m-%dT%H:%M:%SZ',
1749     '%Y-%m-%dT%H:%M:%S.%fZ',
1750     '%Y-%m-%dT%H:%M:%S.%f0Z',
1751     '%Y-%m-%dT%H:%M:%S',
1752     '%Y-%m-%dT%H:%M:%S.%f',
1753     '%Y-%m-%dT%H:%M',
1754     '%b %d %Y at %H:%M',
1755     '%b %d %Y at %H:%M:%S',
1756     '%B %d %Y at %H:%M',
1757     '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762     '%d-%m-%Y',
1763     '%d.%m.%Y',
1764     '%d.%m.%y',
1765     '%d/%m/%Y',
1766     '%d/%m/%y',
1767     '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772     '%m-%d-%Y',
1773     '%m.%d.%Y',
1774     '%m/%d/%Y',
1775     '%m/%d/%y',
1776     '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784     """Get preferred encoding.
1785
1786     Returns the best encoding scheme for the system, based on
1787     locale.getpreferredencoding() and some further tweaks.
1788     """
1789     try:
1790         pref = locale.getpreferredencoding()
1791         'TEST'.encode(pref)
1792     except Exception:
1793         pref = 'UTF-8'
1794
1795     return pref
1796
1797
1798 def write_json_file(obj, fn):
1799     """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801     fn = encodeFilename(fn)
1802     if sys.version_info < (3, 0) and sys.platform != 'win32':
1803         encoding = get_filesystem_encoding()
1804         # os.path.basename returns a bytes object, but NamedTemporaryFile
1805         # will fail if the filename contains non ascii characters unless we
1806         # use a unicode object
1807         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808         # the same for os.path.dirname
1809         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810     else:
1811         path_basename = os.path.basename
1812         path_dirname = os.path.dirname
1813
1814     args = {
1815         'suffix': '.tmp',
1816         'prefix': path_basename(fn) + '.',
1817         'dir': path_dirname(fn),
1818         'delete': False,
1819     }
1820
1821     # In Python 2.x, json.dump expects a bytestream.
1822     # In Python 3.x, it writes to a character stream
1823     if sys.version_info < (3, 0):
1824         args['mode'] = 'wb'
1825     else:
1826         args.update({
1827             'mode': 'w',
1828             'encoding': 'utf-8',
1829         })
1830
1831     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833     try:
1834         with tf:
1835             json.dump(obj, tf)
1836         if sys.platform == 'win32':
1837             # Need to remove existing file on Windows, else os.rename raises
1838             # WindowsError or FileExistsError.
1839             try:
1840                 os.unlink(fn)
1841             except OSError:
1842                 pass
1843         try:
1844             mask = os.umask(0)
1845             os.umask(mask)
1846             os.chmod(tf.name, 0o666 & ~mask)
1847         except OSError:
1848             pass
1849         os.rename(tf.name, fn)
1850     except Exception:
1851         try:
1852             os.remove(tf.name)
1853         except OSError:
1854             pass
1855         raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859     def find_xpath_attr(node, xpath, key, val=None):
1860         """ Find the xpath xpath[@key=val] """
1861         assert re.match(r'^[a-zA-Z_-]+$', key)
1862         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863         return node.find(expr)
1864 else:
1865     def find_xpath_attr(node, xpath, key, val=None):
1866         for f in node.findall(compat_xpath(xpath)):
1867             if key not in f.attrib:
1868                 continue
1869             if val is None or f.attrib.get(key) == val:
1870                 return f
1871         return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878     components = [c.split(':') for c in path.split('/')]
1879     replaced = []
1880     for c in components:
1881         if len(c) == 1:
1882             replaced.append(c[0])
1883         else:
1884             ns, tag = c
1885             replaced.append('{%s}%s' % (ns_map[ns], tag))
1886     return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890     def _find_xpath(xpath):
1891         return node.find(compat_xpath(xpath))
1892
1893     if isinstance(xpath, (str, compat_str)):
1894         n = _find_xpath(xpath)
1895     else:
1896         for xp in xpath:
1897             n = _find_xpath(xp)
1898             if n is not None:
1899                 break
1900
1901     if n is None:
1902         if default is not NO_DEFAULT:
1903             return default
1904         elif fatal:
1905             name = xpath if name is None else name
1906             raise ExtractorError('Could not find XML element %s' % name)
1907         else:
1908             return None
1909     return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914     if n is None or n == default:
1915         return n
1916     if n.text is None:
1917         if default is not NO_DEFAULT:
1918             return default
1919         elif fatal:
1920             name = xpath if name is None else name
1921             raise ExtractorError('Could not find XML element\'s text %s' % name)
1922         else:
1923             return None
1924     return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928     n = find_xpath_attr(node, xpath, key)
1929     if n is None:
1930         if default is not NO_DEFAULT:
1931             return default
1932         elif fatal:
1933             name = '%s[@%s]' % (xpath, key) if name is None else name
1934             raise ExtractorError('Could not find XML attribute %s' % name)
1935         else:
1936             return None
1937     return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941     """Return the content of the tag with the specified ID in the passed HTML document"""
1942     return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946     """Return the content of the first tag with the specified class in the passed HTML document"""
1947     retval = get_elements_by_class(class_name, html)
1948     return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953     return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958     return get_elements_by_attribute(
1959         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960         html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964     """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966     value = re.escape(value) if escape_value else value
1967
1968     retlist = []
1969     for m in re.finditer(r'''(?xs)
1970         <([a-zA-Z0-9:._-]+)
1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972          \s+%s=['"]?%s['"]?
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974         \s*>
1975         (?P<content>.*?)
1976         </\1>
1977     ''' % (re.escape(attribute), value), html):
1978         res = m.group('content')
1979
1980         if res.startswith('"') or res.startswith("'"):
1981             res = res[1:-1]
1982
1983         retlist.append(unescapeHTML(res))
1984
1985     return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989     """Trivial HTML parser to gather the attributes for a single element"""
1990
1991     def __init__(self):
1992         self.attrs = {}
1993         compat_HTMLParser.__init__(self)
1994
1995     def handle_starttag(self, tag, attrs):
1996         self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000     """Given a string for an HTML element such as
2001     <el
2002          a="foo" B="bar" c="&98;az" d=boz
2003          empty= noval entity="&amp;"
2004          sq='"' dq="'"
2005     >
2006     Decode and return a dictionary of attributes.
2007     {
2008         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009         'empty': '', 'noval': None, 'entity': '&',
2010         'sq': '"', 'dq': '\''
2011     }.
2012     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014     """
2015     parser = HTMLAttributeParser()
2016     try:
2017         parser.feed(html_element)
2018         parser.close()
2019     # Older Python may throw HTMLParseError in case of malformed HTML
2020     except compat_HTMLParseError:
2021         pass
2022     return parser.attrs
2023
2024
2025 def clean_html(html):
2026     """Clean an HTML snippet into a readable string"""
2027
2028     if html is None:  # Convenience for sanitizing descriptions etc.
2029         return html
2030
2031     # Newline vs <br />
2032     html = html.replace('\n', ' ')
2033     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035     # Strip html tags
2036     html = re.sub('<.*?>', '', html)
2037     # Replace html entities
2038     html = unescapeHTML(html)
2039     return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043     """Try to open the given filename, and slightly tweak it if this fails.
2044
2045     Attempts to open the given filename. If this fails, it tries to change
2046     the filename slightly, step by step, until it's either able to open it
2047     or it fails and raises a final exception, like the standard open()
2048     function.
2049
2050     It returns the tuple (stream, definitive_file_name).
2051     """
2052     try:
2053         if filename == '-':
2054             if sys.platform == 'win32':
2055                 import msvcrt
2056                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058         stream = open(encodeFilename(filename), open_mode)
2059         return (stream, filename)
2060     except (IOError, OSError) as err:
2061         if err.errno in (errno.EACCES,):
2062             raise
2063
2064         # In case of error, try to remove win32 forbidden chars
2065         alt_filename = sanitize_path(filename)
2066         if alt_filename == filename:
2067             raise
2068         else:
2069             # An exception here should be caught in the caller
2070             stream = open(encodeFilename(alt_filename), open_mode)
2071             return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075     """Convert RFC 2822 defined time string into system timestamp"""
2076     timestamp = None
2077     timetuple = email.utils.parsedate_tz(timestr)
2078     if timetuple is not None:
2079         timestamp = email.utils.mktime_tz(timetuple)
2080     return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084     """Sanitizes a string so it could be used as part of a filename.
2085     If restricted is set, use a stricter subset of allowed characters.
2086     Set is_id if this is not an arbitrary string, but an ID that should be kept
2087     if possible.
2088     """
2089     def replace_insane(char):
2090         if restricted and char in ACCENT_CHARS:
2091             return ACCENT_CHARS[char]
2092         if char == '?' or ord(char) < 32 or ord(char) == 127:
2093             return ''
2094         elif char == '"':
2095             return '' if restricted else '\''
2096         elif char == ':':
2097             return '_-' if restricted else ' -'
2098         elif char in '\\/|*<>':
2099             return '_'
2100         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101             return '_'
2102         if restricted and ord(char) > 127:
2103             return '_'
2104         return char
2105
2106     # Handle timestamps
2107     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108     result = ''.join(map(replace_insane, s))
2109     if not is_id:
2110         while '__' in result:
2111             result = result.replace('__', '_')
2112         result = result.strip('_')
2113         # Common case of "Foreign band name - English song title"
2114         if restricted and result.startswith('-_'):
2115             result = result[2:]
2116         if result.startswith('-'):
2117             result = '_' + result[len('-'):]
2118         result = result.lstrip('.')
2119         if not result:
2120             result = '_'
2121     return result
2122
2123
2124 def sanitize_path(s):
2125     """Sanitizes and normalizes path on Windows"""
2126     if sys.platform != 'win32':
2127         return s
2128     drive_or_unc, _ = os.path.splitdrive(s)
2129     if sys.version_info < (2, 7) and not drive_or_unc:
2130         drive_or_unc, _ = os.path.splitunc(s)
2131     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132     if drive_or_unc:
2133         norm_path.pop(0)
2134     sanitized_path = [
2135         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136         for path_part in norm_path]
2137     if drive_or_unc:
2138         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139     return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144     # the number of unwanted failures due to missing protocol
2145     if url.startswith('//'):
2146         return 'http:%s' % url
2147     # Fix some common typos seen so far
2148     COMMON_TYPOS = (
2149         # https://github.com/ytdl-org/youtube-dl/issues/15649
2150         (r'^httpss://', r'https://'),
2151         # https://bx1.be/lives/direct-tv/
2152         (r'^rmtp([es]?)://', r'rtmp\1://'),
2153     )
2154     for mistake, fixup in COMMON_TYPOS:
2155         if re.match(mistake, url):
2156             return re.sub(mistake, fixup, url)
2157     return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165     """Expand shell variables and ~"""
2166     return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170     """ Remove all duplicates from the input iterable """
2171     res = []
2172     for el in iterable:
2173         if el not in res:
2174             res.append(el)
2175     return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179     """Transforms an HTML entity to a character."""
2180     entity = entity_with_semicolon[:-1]
2181
2182     # Known non-numeric HTML entity
2183     if entity in compat_html_entities.name2codepoint:
2184         return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186     # TODO: HTML5 allows entities without a semicolon. For example,
2187     # '&Eacuteric' should be decoded as 'Éric'.
2188     if entity_with_semicolon in compat_html_entities_html5:
2189         return compat_html_entities_html5[entity_with_semicolon]
2190
2191     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192     if mobj is not None:
2193         numstr = mobj.group(1)
2194         if numstr.startswith('x'):
2195             base = 16
2196             numstr = '0%s' % numstr
2197         else:
2198             base = 10
2199         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200         try:
2201             return compat_chr(int(numstr, base))
2202         except ValueError:
2203             pass
2204
2205     # Unknown entity in name, return its literal representation
2206     return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210     if s is None:
2211         return None
2212     assert type(s) == compat_str
2213
2214     return re.sub(
2215         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def get_subprocess_encoding():
2219     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2220         # For subprocess calls, encode with locale encoding
2221         # Refer to http://stackoverflow.com/a/9951851/35070
2222         encoding = preferredencoding()
2223     else:
2224         encoding = sys.getfilesystemencoding()
2225     if encoding is None:
2226         encoding = 'utf-8'
2227     return encoding
2228
2229
2230 def encodeFilename(s, for_subprocess=False):
2231     """
2232     @param s The name of the file
2233     """
2234
2235     assert type(s) == compat_str
2236
2237     # Python 3 has a Unicode API
2238     if sys.version_info >= (3, 0):
2239         return s
2240
2241     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2242     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2243     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2244     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2245         return s
2246
2247     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2248     if sys.platform.startswith('java'):
2249         return s
2250
2251     return s.encode(get_subprocess_encoding(), 'ignore')
2252
2253
2254 def decodeFilename(b, for_subprocess=False):
2255
2256     if sys.version_info >= (3, 0):
2257         return b
2258
2259     if not isinstance(b, bytes):
2260         return b
2261
2262     return b.decode(get_subprocess_encoding(), 'ignore')
2263
2264
2265 def encodeArgument(s):
2266     if not isinstance(s, compat_str):
2267         # Legacy code that uses byte strings
2268         # Uncomment the following line after fixing all post processors
2269         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2270         s = s.decode('ascii')
2271     return encodeFilename(s, True)
2272
2273
2274 def decodeArgument(b):
2275     return decodeFilename(b, True)
2276
2277
2278 def decodeOption(optval):
2279     if optval is None:
2280         return optval
2281     if isinstance(optval, bytes):
2282         optval = optval.decode(preferredencoding())
2283
2284     assert isinstance(optval, compat_str)
2285     return optval
2286
2287
2288 def formatSeconds(secs):
2289     if secs > 3600:
2290         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2291     elif secs > 60:
2292         return '%d:%02d' % (secs // 60, secs % 60)
2293     else:
2294         return '%d' % secs
2295
2296
2297 def make_HTTPS_handler(params, **kwargs):
2298     opts_no_check_certificate = params.get('nocheckcertificate', False)
2299     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2300         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2301         if opts_no_check_certificate:
2302             context.check_hostname = False
2303             context.verify_mode = ssl.CERT_NONE
2304         try:
2305             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2306         except TypeError:
2307             # Python 2.7.8
2308             # (create_default_context present but HTTPSHandler has no context=)
2309             pass
2310
2311     if sys.version_info < (3, 2):
2312         return YoutubeDLHTTPSHandler(params, **kwargs)
2313     else:  # Python < 3.4
2314         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2315         context.verify_mode = (ssl.CERT_NONE
2316                                if opts_no_check_certificate
2317                                else ssl.CERT_REQUIRED)
2318         context.set_default_verify_paths()
2319         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2320
2321
2322 def bug_reports_message():
2323     if ytdl_is_updateable():
2324         update_cmd = 'type  youtube-dlc -U  to update'
2325     else:
2326         update_cmd = 'see  https://github.com/blackjack4494/yt-dlc  on how to update'
2327     msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
2328     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2329     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2330     return msg
2331
2332
2333 class YoutubeDLError(Exception):
2334     """Base exception for YoutubeDL errors."""
2335     pass
2336
2337
2338 class ExtractorError(YoutubeDLError):
2339     """Error during info extraction."""
2340
2341     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2342         """ tb, if given, is the original traceback (so that it can be printed out).
2343         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2344         """
2345
2346         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2347             expected = True
2348         if video_id is not None:
2349             msg = video_id + ': ' + msg
2350         if cause:
2351             msg += ' (caused by %r)' % cause
2352         if not expected:
2353             msg += bug_reports_message()
2354         super(ExtractorError, self).__init__(msg)
2355
2356         self.traceback = tb
2357         self.exc_info = sys.exc_info()  # preserve original exception
2358         self.cause = cause
2359         self.video_id = video_id
2360
2361     def format_traceback(self):
2362         if self.traceback is None:
2363             return None
2364         return ''.join(traceback.format_tb(self.traceback))
2365
2366
2367 class UnsupportedError(ExtractorError):
2368     def __init__(self, url):
2369         super(UnsupportedError, self).__init__(
2370             'Unsupported URL: %s' % url, expected=True)
2371         self.url = url
2372
2373
2374 class RegexNotFoundError(ExtractorError):
2375     """Error when a regex didn't match"""
2376     pass
2377
2378
2379 class GeoRestrictedError(ExtractorError):
2380     """Geographic restriction Error exception.
2381
2382     This exception may be thrown when a video is not available from your
2383     geographic location due to geographic restrictions imposed by a website.
2384     """
2385
2386     def __init__(self, msg, countries=None):
2387         super(GeoRestrictedError, self).__init__(msg, expected=True)
2388         self.msg = msg
2389         self.countries = countries
2390
2391
2392 class DownloadError(YoutubeDLError):
2393     """Download Error exception.
2394
2395     This exception may be thrown by FileDownloader objects if they are not
2396     configured to continue on errors. They will contain the appropriate
2397     error message.
2398     """
2399
2400     def __init__(self, msg, exc_info=None):
2401         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2402         super(DownloadError, self).__init__(msg)
2403         self.exc_info = exc_info
2404
2405
2406 class SameFileError(YoutubeDLError):
2407     """Same File exception.
2408
2409     This exception will be thrown by FileDownloader objects if they detect
2410     multiple files would have to be downloaded to the same file on disk.
2411     """
2412     pass
2413
2414
2415 class PostProcessingError(YoutubeDLError):
2416     """Post Processing exception.
2417
2418     This exception may be raised by PostProcessor's .run() method to
2419     indicate an error in the postprocessing task.
2420     """
2421
2422     def __init__(self, msg):
2423         super(PostProcessingError, self).__init__(msg)
2424         self.msg = msg
2425
2426
2427 class MaxDownloadsReached(YoutubeDLError):
2428     """ --max-downloads limit has been reached. """
2429     pass
2430
2431
2432 class UnavailableVideoError(YoutubeDLError):
2433     """Unavailable Format exception.
2434
2435     This exception will be thrown when a video is requested
2436     in a format that is not available for that video.
2437     """
2438     pass
2439
2440
2441 class ContentTooShortError(YoutubeDLError):
2442     """Content Too Short exception.
2443
2444     This exception may be raised by FileDownloader objects when a file they
2445     download is too small for what the server announced first, indicating
2446     the connection was probably interrupted.
2447     """
2448
2449     def __init__(self, downloaded, expected):
2450         super(ContentTooShortError, self).__init__(
2451             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2452         )
2453         # Both in bytes
2454         self.downloaded = downloaded
2455         self.expected = expected
2456
2457
2458 class XAttrMetadataError(YoutubeDLError):
2459     def __init__(self, code=None, msg='Unknown error'):
2460         super(XAttrMetadataError, self).__init__(msg)
2461         self.code = code
2462         self.msg = msg
2463
2464         # Parsing code and msg
2465         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2466                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2467             self.reason = 'NO_SPACE'
2468         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2469             self.reason = 'VALUE_TOO_LONG'
2470         else:
2471             self.reason = 'NOT_SUPPORTED'
2472
2473
2474 class XAttrUnavailableError(YoutubeDLError):
2475     pass
2476
2477
2478 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2479     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2480     # expected HTTP responses to meet HTTP/1.0 or later (see also
2481     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2482     if sys.version_info < (3, 0):
2483         kwargs['strict'] = True
2484     hc = http_class(*args, **compat_kwargs(kwargs))
2485     source_address = ydl_handler._params.get('source_address')
2486
2487     if source_address is not None:
2488         # This is to workaround _create_connection() from socket where it will try all
2489         # address data from getaddrinfo() including IPv6. This filters the result from
2490         # getaddrinfo() based on the source_address value.
2491         # This is based on the cpython socket.create_connection() function.
2492         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2493         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2494             host, port = address
2495             err = None
2496             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2497             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2498             ip_addrs = [addr for addr in addrs if addr[0] == af]
2499             if addrs and not ip_addrs:
2500                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2501                 raise socket.error(
2502                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2503                     % (ip_version, source_address[0]))
2504             for res in ip_addrs:
2505                 af, socktype, proto, canonname, sa = res
2506                 sock = None
2507                 try:
2508                     sock = socket.socket(af, socktype, proto)
2509                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2510                         sock.settimeout(timeout)
2511                     sock.bind(source_address)
2512                     sock.connect(sa)
2513                     err = None  # Explicitly break reference cycle
2514                     return sock
2515                 except socket.error as _:
2516                     err = _
2517                     if sock is not None:
2518                         sock.close()
2519             if err is not None:
2520                 raise err
2521             else:
2522                 raise socket.error('getaddrinfo returns an empty list')
2523         if hasattr(hc, '_create_connection'):
2524             hc._create_connection = _create_connection
2525         sa = (source_address, 0)
2526         if hasattr(hc, 'source_address'):  # Python 2.7+
2527             hc.source_address = sa
2528         else:  # Python 2.6
2529             def _hc_connect(self, *args, **kwargs):
2530                 sock = _create_connection(
2531                     (self.host, self.port), self.timeout, sa)
2532                 if is_https:
2533                     self.sock = ssl.wrap_socket(
2534                         sock, self.key_file, self.cert_file,
2535                         ssl_version=ssl.PROTOCOL_TLSv1)
2536                 else:
2537                     self.sock = sock
2538             hc.connect = functools.partial(_hc_connect, hc)
2539
2540     return hc
2541
2542
2543 def handle_youtubedl_headers(headers):
2544     filtered_headers = headers
2545
2546     if 'Youtubedl-no-compression' in filtered_headers:
2547         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2548         del filtered_headers['Youtubedl-no-compression']
2549
2550     return filtered_headers
2551
2552
2553 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2554     """Handler for HTTP requests and responses.
2555
2556     This class, when installed with an OpenerDirector, automatically adds
2557     the standard headers to every HTTP request and handles gzipped and
2558     deflated responses from web servers. If compression is to be avoided in
2559     a particular request, the original request in the program code only has
2560     to include the HTTP header "Youtubedl-no-compression", which will be
2561     removed before making the real request.
2562
2563     Part of this code was copied from:
2564
2565     http://techknack.net/python-urllib2-handlers/
2566
2567     Andrew Rowls, the author of that code, agreed to release it to the
2568     public domain.
2569     """
2570
2571     def __init__(self, params, *args, **kwargs):
2572         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2573         self._params = params
2574
2575     def http_open(self, req):
2576         conn_class = compat_http_client.HTTPConnection
2577
2578         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2579         if socks_proxy:
2580             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2581             del req.headers['Ytdl-socks-proxy']
2582
2583         return self.do_open(functools.partial(
2584             _create_http_connection, self, conn_class, False),
2585             req)
2586
2587     @staticmethod
2588     def deflate(data):
2589         try:
2590             return zlib.decompress(data, -zlib.MAX_WBITS)
2591         except zlib.error:
2592             return zlib.decompress(data)
2593
2594     def http_request(self, req):
2595         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2596         # always respected by websites, some tend to give out URLs with non percent-encoded
2597         # non-ASCII characters (see telemb.py, ard.py [#3412])
2598         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2599         # To work around aforementioned issue we will replace request's original URL with
2600         # percent-encoded one
2601         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2602         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2603         url = req.get_full_url()
2604         url_escaped = escape_url(url)
2605
2606         # Substitute URL if any change after escaping
2607         if url != url_escaped:
2608             req = update_Request(req, url=url_escaped)
2609
2610         for h, v in std_headers.items():
2611             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2612             # The dict keys are capitalized because of this bug by urllib
2613             if h.capitalize() not in req.headers:
2614                 req.add_header(h, v)
2615
2616         req.headers = handle_youtubedl_headers(req.headers)
2617
2618         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2619             # Python 2.6 is brain-dead when it comes to fragments
2620             req._Request__original = req._Request__original.partition('#')[0]
2621             req._Request__r_type = req._Request__r_type.partition('#')[0]
2622
2623         return req
2624
2625     def http_response(self, req, resp):
2626         old_resp = resp
2627         # gzip
2628         if resp.headers.get('Content-encoding', '') == 'gzip':
2629             content = resp.read()
2630             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2631             try:
2632                 uncompressed = io.BytesIO(gz.read())
2633             except IOError as original_ioerror:
2634                 # There may be junk add the end of the file
2635                 # See http://stackoverflow.com/q/4928560/35070 for details
2636                 for i in range(1, 1024):
2637                     try:
2638                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2639                         uncompressed = io.BytesIO(gz.read())
2640                     except IOError:
2641                         continue
2642                     break
2643                 else:
2644                     raise original_ioerror
2645             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2646             resp.msg = old_resp.msg
2647             del resp.headers['Content-encoding']
2648         # deflate
2649         if resp.headers.get('Content-encoding', '') == 'deflate':
2650             gz = io.BytesIO(self.deflate(resp.read()))
2651             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2652             resp.msg = old_resp.msg
2653             del resp.headers['Content-encoding']
2654         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2655         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2656         if 300 <= resp.code < 400:
2657             location = resp.headers.get('Location')
2658             if location:
2659                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2660                 if sys.version_info >= (3, 0):
2661                     location = location.encode('iso-8859-1').decode('utf-8')
2662                 else:
2663                     location = location.decode('utf-8')
2664                 location_escaped = escape_url(location)
2665                 if location != location_escaped:
2666                     del resp.headers['Location']
2667                     if sys.version_info < (3, 0):
2668                         location_escaped = location_escaped.encode('utf-8')
2669                     resp.headers['Location'] = location_escaped
2670         return resp
2671
2672     https_request = http_request
2673     https_response = http_response
2674
2675
2676 def make_socks_conn_class(base_class, socks_proxy):
2677     assert issubclass(base_class, (
2678         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2679
2680     url_components = compat_urlparse.urlparse(socks_proxy)
2681     if url_components.scheme.lower() == 'socks5':
2682         socks_type = ProxyType.SOCKS5
2683     elif url_components.scheme.lower() in ('socks', 'socks4'):
2684         socks_type = ProxyType.SOCKS4
2685     elif url_components.scheme.lower() == 'socks4a':
2686         socks_type = ProxyType.SOCKS4A
2687
2688     def unquote_if_non_empty(s):
2689         if not s:
2690             return s
2691         return compat_urllib_parse_unquote_plus(s)
2692
2693     proxy_args = (
2694         socks_type,
2695         url_components.hostname, url_components.port or 1080,
2696         True,  # Remote DNS
2697         unquote_if_non_empty(url_components.username),
2698         unquote_if_non_empty(url_components.password),
2699     )
2700
2701     class SocksConnection(base_class):
2702         def connect(self):
2703             self.sock = sockssocket()
2704             self.sock.setproxy(*proxy_args)
2705             if type(self.timeout) in (int, float):
2706                 self.sock.settimeout(self.timeout)
2707             self.sock.connect((self.host, self.port))
2708
2709             if isinstance(self, compat_http_client.HTTPSConnection):
2710                 if hasattr(self, '_context'):  # Python > 2.6
2711                     self.sock = self._context.wrap_socket(
2712                         self.sock, server_hostname=self.host)
2713                 else:
2714                     self.sock = ssl.wrap_socket(self.sock)
2715
2716     return SocksConnection
2717
2718
2719 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2720     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2721         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2722         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2723         self._params = params
2724
2725     def https_open(self, req):
2726         kwargs = {}
2727         conn_class = self._https_conn_class
2728
2729         if hasattr(self, '_context'):  # python > 2.6
2730             kwargs['context'] = self._context
2731         if hasattr(self, '_check_hostname'):  # python 3.x
2732             kwargs['check_hostname'] = self._check_hostname
2733
2734         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2735         if socks_proxy:
2736             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2737             del req.headers['Ytdl-socks-proxy']
2738
2739         return self.do_open(functools.partial(
2740             _create_http_connection, self, conn_class, True),
2741             req, **kwargs)
2742
2743
2744 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2745     """
2746     See [1] for cookie file format.
2747
2748     1. https://curl.haxx.se/docs/http-cookies.html
2749     """
2750     _HTTPONLY_PREFIX = '#HttpOnly_'
2751     _ENTRY_LEN = 7
2752     _HEADER = '''# Netscape HTTP Cookie File
2753 # This file is generated by youtube-dlc.  Do not edit.
2754
2755 '''
2756     _CookieFileEntry = collections.namedtuple(
2757         'CookieFileEntry',
2758         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2759
2760     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2761         """
2762         Save cookies to a file.
2763
2764         Most of the code is taken from CPython 3.8 and slightly adapted
2765         to support cookie files with UTF-8 in both python 2 and 3.
2766         """
2767         if filename is None:
2768             if self.filename is not None:
2769                 filename = self.filename
2770             else:
2771                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2772
2773         # Store session cookies with `expires` set to 0 instead of an empty
2774         # string
2775         for cookie in self:
2776             if cookie.expires is None:
2777                 cookie.expires = 0
2778
2779         with io.open(filename, 'w', encoding='utf-8') as f:
2780             f.write(self._HEADER)
2781             now = time.time()
2782             for cookie in self:
2783                 if not ignore_discard and cookie.discard:
2784                     continue
2785                 if not ignore_expires and cookie.is_expired(now):
2786                     continue
2787                 if cookie.secure:
2788                     secure = 'TRUE'
2789                 else:
2790                     secure = 'FALSE'
2791                 if cookie.domain.startswith('.'):
2792                     initial_dot = 'TRUE'
2793                 else:
2794                     initial_dot = 'FALSE'
2795                 if cookie.expires is not None:
2796                     expires = compat_str(cookie.expires)
2797                 else:
2798                     expires = ''
2799                 if cookie.value is None:
2800                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2801                     # with no name, whereas http.cookiejar regards it as a
2802                     # cookie with no value.
2803                     name = ''
2804                     value = cookie.name
2805                 else:
2806                     name = cookie.name
2807                     value = cookie.value
2808                 f.write(
2809                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2810                                secure, expires, name, value]) + '\n')
2811
2812     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2813         """Load cookies from a file."""
2814         if filename is None:
2815             if self.filename is not None:
2816                 filename = self.filename
2817             else:
2818                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2819
2820         def prepare_line(line):
2821             if line.startswith(self._HTTPONLY_PREFIX):
2822                 line = line[len(self._HTTPONLY_PREFIX):]
2823             # comments and empty lines are fine
2824             if line.startswith('#') or not line.strip():
2825                 return line
2826             cookie_list = line.split('\t')
2827             if len(cookie_list) != self._ENTRY_LEN:
2828                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2829             cookie = self._CookieFileEntry(*cookie_list)
2830             if cookie.expires_at and not cookie.expires_at.isdigit():
2831                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2832             return line
2833
2834         cf = io.StringIO()
2835         with io.open(filename, encoding='utf-8') as f:
2836             for line in f:
2837                 try:
2838                     cf.write(prepare_line(line))
2839                 except compat_cookiejar.LoadError as e:
2840                     write_string(
2841                         'WARNING: skipping cookie file entry due to %s: %r\n'
2842                         % (e, line), sys.stderr)
2843                     continue
2844         cf.seek(0)
2845         self._really_load(cf, filename, ignore_discard, ignore_expires)
2846         # Session cookies are denoted by either `expires` field set to
2847         # an empty string or 0. MozillaCookieJar only recognizes the former
2848         # (see [1]). So we need force the latter to be recognized as session
2849         # cookies on our own.
2850         # Session cookies may be important for cookies-based authentication,
2851         # e.g. usually, when user does not check 'Remember me' check box while
2852         # logging in on a site, some important cookies are stored as session
2853         # cookies so that not recognizing them will result in failed login.
2854         # 1. https://bugs.python.org/issue17164
2855         for cookie in self:
2856             # Treat `expires=0` cookies as session cookies
2857             if cookie.expires == 0:
2858                 cookie.expires = None
2859                 cookie.discard = True
2860
2861
2862 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2863     def __init__(self, cookiejar=None):
2864         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2865
2866     def http_response(self, request, response):
2867         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2868         # characters in Set-Cookie HTTP header of last response (see
2869         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2870         # In order to at least prevent crashing we will percent encode Set-Cookie
2871         # header before HTTPCookieProcessor starts processing it.
2872         # if sys.version_info < (3, 0) and response.headers:
2873         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2874         #         set_cookie = response.headers.get(set_cookie_header)
2875         #         if set_cookie:
2876         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2877         #             if set_cookie != set_cookie_escaped:
2878         #                 del response.headers[set_cookie_header]
2879         #                 response.headers[set_cookie_header] = set_cookie_escaped
2880         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2881
2882     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2883     https_response = http_response
2884
2885
2886 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2887     if sys.version_info[0] < 3:
2888         def redirect_request(self, req, fp, code, msg, headers, newurl):
2889             # On python 2 urlh.geturl() may sometimes return redirect URL
2890             # as byte string instead of unicode. This workaround allows
2891             # to force it always return unicode.
2892             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2893
2894
2895 def extract_timezone(date_str):
2896     m = re.search(
2897         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2898         date_str)
2899     if not m:
2900         timezone = datetime.timedelta()
2901     else:
2902         date_str = date_str[:-len(m.group('tz'))]
2903         if not m.group('sign'):
2904             timezone = datetime.timedelta()
2905         else:
2906             sign = 1 if m.group('sign') == '+' else -1
2907             timezone = datetime.timedelta(
2908                 hours=sign * int(m.group('hours')),
2909                 minutes=sign * int(m.group('minutes')))
2910     return timezone, date_str
2911
2912
2913 def parse_iso8601(date_str, delimiter='T', timezone=None):
2914     """ Return a UNIX timestamp from the given date """
2915
2916     if date_str is None:
2917         return None
2918
2919     date_str = re.sub(r'\.[0-9]+', '', date_str)
2920
2921     if timezone is None:
2922         timezone, date_str = extract_timezone(date_str)
2923
2924     try:
2925         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2926         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2927         return calendar.timegm(dt.timetuple())
2928     except ValueError:
2929         pass
2930
2931
2932 def date_formats(day_first=True):
2933     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2934
2935
2936 def unified_strdate(date_str, day_first=True):
2937     """Return a string with the date in the format YYYYMMDD"""
2938
2939     if date_str is None:
2940         return None
2941     upload_date = None
2942     # Replace commas
2943     date_str = date_str.replace(',', ' ')
2944     # Remove AM/PM + timezone
2945     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2946     _, date_str = extract_timezone(date_str)
2947
2948     for expression in date_formats(day_first):
2949         try:
2950             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2951         except ValueError:
2952             pass
2953     if upload_date is None:
2954         timetuple = email.utils.parsedate_tz(date_str)
2955         if timetuple:
2956             try:
2957                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2958             except ValueError:
2959                 pass
2960     if upload_date is not None:
2961         return compat_str(upload_date)
2962
2963
2964 def unified_timestamp(date_str, day_first=True):
2965     if date_str is None:
2966         return None
2967
2968     date_str = re.sub(r'[,|]', '', date_str)
2969
2970     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2971     timezone, date_str = extract_timezone(date_str)
2972
2973     # Remove AM/PM + timezone
2974     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2975
2976     # Remove unrecognized timezones from ISO 8601 alike timestamps
2977     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2978     if m:
2979         date_str = date_str[:-len(m.group('tz'))]
2980
2981     # Python only supports microseconds, so remove nanoseconds
2982     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2983     if m:
2984         date_str = m.group(1)
2985
2986     for expression in date_formats(day_first):
2987         try:
2988             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2989             return calendar.timegm(dt.timetuple())
2990         except ValueError:
2991             pass
2992     timetuple = email.utils.parsedate_tz(date_str)
2993     if timetuple:
2994         return calendar.timegm(timetuple) + pm_delta * 3600
2995
2996
2997 def determine_ext(url, default_ext='unknown_video'):
2998     if url is None or '.' not in url:
2999         return default_ext
3000     guess = url.partition('?')[0].rpartition('.')[2]
3001     if re.match(r'^[A-Za-z0-9]+$', guess):
3002         return guess
3003     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3004     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3005         return guess.rstrip('/')
3006     else:
3007         return default_ext
3008
3009
3010 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3011     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3012
3013
3014 def date_from_str(date_str):
3015     """
3016     Return a datetime object from a string in the format YYYYMMDD or
3017     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3018     today = datetime.date.today()
3019     if date_str in ('now', 'today'):
3020         return today
3021     if date_str == 'yesterday':
3022         return today - datetime.timedelta(days=1)
3023     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3024     if match is not None:
3025         sign = match.group('sign')
3026         time = int(match.group('time'))
3027         if sign == '-':
3028             time = -time
3029         unit = match.group('unit')
3030         # A bad approximation?
3031         if unit == 'month':
3032             unit = 'day'
3033             time *= 30
3034         elif unit == 'year':
3035             unit = 'day'
3036             time *= 365
3037         unit += 's'
3038         delta = datetime.timedelta(**{unit: time})
3039         return today + delta
3040     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3041
3042
3043 def hyphenate_date(date_str):
3044     """
3045     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3046     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3047     if match is not None:
3048         return '-'.join(match.groups())
3049     else:
3050         return date_str
3051
3052
3053 class DateRange(object):
3054     """Represents a time interval between two dates"""
3055
3056     def __init__(self, start=None, end=None):
3057         """start and end must be strings in the format accepted by date"""
3058         if start is not None:
3059             self.start = date_from_str(start)
3060         else:
3061             self.start = datetime.datetime.min.date()
3062         if end is not None:
3063             self.end = date_from_str(end)
3064         else:
3065             self.end = datetime.datetime.max.date()
3066         if self.start > self.end:
3067             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3068
3069     @classmethod
3070     def day(cls, day):
3071         """Returns a range that only contains the given day"""
3072         return cls(day, day)
3073
3074     def __contains__(self, date):
3075         """Check if the date is in the range"""
3076         if not isinstance(date, datetime.date):
3077             date = date_from_str(date)
3078         return self.start <= date <= self.end
3079
3080     def __str__(self):
3081         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3082
3083
3084 def platform_name():
3085     """ Returns the platform name as a compat_str """
3086     res = platform.platform()
3087     if isinstance(res, bytes):
3088         res = res.decode(preferredencoding())
3089
3090     assert isinstance(res, compat_str)
3091     return res
3092
3093
3094 def _windows_write_string(s, out):
3095     """ Returns True if the string was written using special methods,
3096     False if it has yet to be written out."""
3097     # Adapted from http://stackoverflow.com/a/3259271/35070
3098
3099     import ctypes
3100     import ctypes.wintypes
3101
3102     WIN_OUTPUT_IDS = {
3103         1: -11,
3104         2: -12,
3105     }
3106
3107     try:
3108         fileno = out.fileno()
3109     except AttributeError:
3110         # If the output stream doesn't have a fileno, it's virtual
3111         return False
3112     except io.UnsupportedOperation:
3113         # Some strange Windows pseudo files?
3114         return False
3115     if fileno not in WIN_OUTPUT_IDS:
3116         return False
3117
3118     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3119         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3120         ('GetStdHandle', ctypes.windll.kernel32))
3121     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3122
3123     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3124         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3125         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3126         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3127     written = ctypes.wintypes.DWORD(0)
3128
3129     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3130     FILE_TYPE_CHAR = 0x0002
3131     FILE_TYPE_REMOTE = 0x8000
3132     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3133         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3134         ctypes.POINTER(ctypes.wintypes.DWORD))(
3135         ('GetConsoleMode', ctypes.windll.kernel32))
3136     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3137
3138     def not_a_console(handle):
3139         if handle == INVALID_HANDLE_VALUE or handle is None:
3140             return True
3141         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3142                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3143
3144     if not_a_console(h):
3145         return False
3146
3147     def next_nonbmp_pos(s):
3148         try:
3149             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3150         except StopIteration:
3151             return len(s)
3152
3153     while s:
3154         count = min(next_nonbmp_pos(s), 1024)
3155
3156         ret = WriteConsoleW(
3157             h, s, count if count else 2, ctypes.byref(written), None)
3158         if ret == 0:
3159             raise OSError('Failed to write string')
3160         if not count:  # We just wrote a non-BMP character
3161             assert written.value == 2
3162             s = s[1:]
3163         else:
3164             assert written.value > 0
3165             s = s[written.value:]
3166     return True
3167
3168
3169 def write_string(s, out=None, encoding=None):
3170     if out is None:
3171         out = sys.stderr
3172     assert type(s) == compat_str
3173
3174     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3175         if _windows_write_string(s, out):
3176             return
3177
3178     if ('b' in getattr(out, 'mode', '')
3179             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3180         byt = s.encode(encoding or preferredencoding(), 'ignore')
3181         out.write(byt)
3182     elif hasattr(out, 'buffer'):
3183         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3184         byt = s.encode(enc, 'ignore')
3185         out.buffer.write(byt)
3186     else:
3187         out.write(s)
3188     out.flush()
3189
3190
3191 def bytes_to_intlist(bs):
3192     if not bs:
3193         return []
3194     if isinstance(bs[0], int):  # Python 3
3195         return list(bs)
3196     else:
3197         return [ord(c) for c in bs]
3198
3199
3200 def intlist_to_bytes(xs):
3201     if not xs:
3202         return b''
3203     return compat_struct_pack('%dB' % len(xs), *xs)
3204
3205
3206 # Cross-platform file locking
3207 if sys.platform == 'win32':
3208     import ctypes.wintypes
3209     import msvcrt
3210
3211     class OVERLAPPED(ctypes.Structure):
3212         _fields_ = [
3213             ('Internal', ctypes.wintypes.LPVOID),
3214             ('InternalHigh', ctypes.wintypes.LPVOID),
3215             ('Offset', ctypes.wintypes.DWORD),
3216             ('OffsetHigh', ctypes.wintypes.DWORD),
3217             ('hEvent', ctypes.wintypes.HANDLE),
3218         ]
3219
3220     kernel32 = ctypes.windll.kernel32
3221     LockFileEx = kernel32.LockFileEx
3222     LockFileEx.argtypes = [
3223         ctypes.wintypes.HANDLE,     # hFile
3224         ctypes.wintypes.DWORD,      # dwFlags
3225         ctypes.wintypes.DWORD,      # dwReserved
3226         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3227         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3228         ctypes.POINTER(OVERLAPPED)  # Overlapped
3229     ]
3230     LockFileEx.restype = ctypes.wintypes.BOOL
3231     UnlockFileEx = kernel32.UnlockFileEx
3232     UnlockFileEx.argtypes = [
3233         ctypes.wintypes.HANDLE,     # hFile
3234         ctypes.wintypes.DWORD,      # dwReserved
3235         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3236         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3237         ctypes.POINTER(OVERLAPPED)  # Overlapped
3238     ]
3239     UnlockFileEx.restype = ctypes.wintypes.BOOL
3240     whole_low = 0xffffffff
3241     whole_high = 0x7fffffff
3242
3243     def _lock_file(f, exclusive):
3244         overlapped = OVERLAPPED()
3245         overlapped.Offset = 0
3246         overlapped.OffsetHigh = 0
3247         overlapped.hEvent = 0
3248         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3249         handle = msvcrt.get_osfhandle(f.fileno())
3250         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3251                           whole_low, whole_high, f._lock_file_overlapped_p):
3252             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3253
3254     def _unlock_file(f):
3255         assert f._lock_file_overlapped_p
3256         handle = msvcrt.get_osfhandle(f.fileno())
3257         if not UnlockFileEx(handle, 0,
3258                             whole_low, whole_high, f._lock_file_overlapped_p):
3259             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3260
3261 else:
3262     # Some platforms, such as Jython, is missing fcntl
3263     try:
3264         import fcntl
3265
3266         def _lock_file(f, exclusive):
3267             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3268
3269         def _unlock_file(f):
3270             fcntl.flock(f, fcntl.LOCK_UN)
3271     except ImportError:
3272         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3273
3274         def _lock_file(f, exclusive):
3275             raise IOError(UNSUPPORTED_MSG)
3276
3277         def _unlock_file(f):
3278             raise IOError(UNSUPPORTED_MSG)
3279
3280
3281 class locked_file(object):
3282     def __init__(self, filename, mode, encoding=None):
3283         assert mode in ['r', 'a', 'w']
3284         self.f = io.open(filename, mode, encoding=encoding)
3285         self.mode = mode
3286
3287     def __enter__(self):
3288         exclusive = self.mode != 'r'
3289         try:
3290             _lock_file(self.f, exclusive)
3291         except IOError:
3292             self.f.close()
3293             raise
3294         return self
3295
3296     def __exit__(self, etype, value, traceback):
3297         try:
3298             _unlock_file(self.f)
3299         finally:
3300             self.f.close()
3301
3302     def __iter__(self):
3303         return iter(self.f)
3304
3305     def write(self, *args):
3306         return self.f.write(*args)
3307
3308     def read(self, *args):
3309         return self.f.read(*args)
3310
3311
3312 def get_filesystem_encoding():
3313     encoding = sys.getfilesystemencoding()
3314     return encoding if encoding is not None else 'utf-8'
3315
3316
3317 def shell_quote(args):
3318     quoted_args = []
3319     encoding = get_filesystem_encoding()
3320     for a in args:
3321         if isinstance(a, bytes):
3322             # We may get a filename encoded with 'encodeFilename'
3323             a = a.decode(encoding)
3324         quoted_args.append(compat_shlex_quote(a))
3325     return ' '.join(quoted_args)
3326
3327
3328 def smuggle_url(url, data):
3329     """ Pass additional data in a URL for internal use. """
3330
3331     url, idata = unsmuggle_url(url, {})
3332     data.update(idata)
3333     sdata = compat_urllib_parse_urlencode(
3334         {'__youtubedl_smuggle': json.dumps(data)})
3335     return url + '#' + sdata
3336
3337
3338 def unsmuggle_url(smug_url, default=None):
3339     if '#__youtubedl_smuggle' not in smug_url:
3340         return smug_url, default
3341     url, _, sdata = smug_url.rpartition('#')
3342     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3343     data = json.loads(jsond)
3344     return url, data
3345
3346
3347 def format_bytes(bytes):
3348     if bytes is None:
3349         return 'N/A'
3350     if type(bytes) is str:
3351         bytes = float(bytes)
3352     if bytes == 0.0:
3353         exponent = 0
3354     else:
3355         exponent = int(math.log(bytes, 1024.0))
3356     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3357     converted = float(bytes) / float(1024 ** exponent)
3358     return '%.2f%s' % (converted, suffix)
3359
3360
3361 def lookup_unit_table(unit_table, s):
3362     units_re = '|'.join(re.escape(u) for u in unit_table)
3363     m = re.match(
3364         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3365     if not m:
3366         return None
3367     num_str = m.group('num').replace(',', '.')
3368     mult = unit_table[m.group('unit')]
3369     return int(float(num_str) * mult)
3370
3371
3372 def parse_filesize(s):
3373     if s is None:
3374         return None
3375
3376     # The lower-case forms are of course incorrect and unofficial,
3377     # but we support those too
3378     _UNIT_TABLE = {
3379         'B': 1,
3380         'b': 1,
3381         'bytes': 1,
3382         'KiB': 1024,
3383         'KB': 1000,
3384         'kB': 1024,
3385         'Kb': 1000,
3386         'kb': 1000,
3387         'kilobytes': 1000,
3388         'kibibytes': 1024,
3389         'MiB': 1024 ** 2,
3390         'MB': 1000 ** 2,
3391         'mB': 1024 ** 2,
3392         'Mb': 1000 ** 2,
3393         'mb': 1000 ** 2,
3394         'megabytes': 1000 ** 2,
3395         'mebibytes': 1024 ** 2,
3396         'GiB': 1024 ** 3,
3397         'GB': 1000 ** 3,
3398         'gB': 1024 ** 3,
3399         'Gb': 1000 ** 3,
3400         'gb': 1000 ** 3,
3401         'gigabytes': 1000 ** 3,
3402         'gibibytes': 1024 ** 3,
3403         'TiB': 1024 ** 4,
3404         'TB': 1000 ** 4,
3405         'tB': 1024 ** 4,
3406         'Tb': 1000 ** 4,
3407         'tb': 1000 ** 4,
3408         'terabytes': 1000 ** 4,
3409         'tebibytes': 1024 ** 4,
3410         'PiB': 1024 ** 5,
3411         'PB': 1000 ** 5,
3412         'pB': 1024 ** 5,
3413         'Pb': 1000 ** 5,
3414         'pb': 1000 ** 5,
3415         'petabytes': 1000 ** 5,
3416         'pebibytes': 1024 ** 5,
3417         'EiB': 1024 ** 6,
3418         'EB': 1000 ** 6,
3419         'eB': 1024 ** 6,
3420         'Eb': 1000 ** 6,
3421         'eb': 1000 ** 6,
3422         'exabytes': 1000 ** 6,
3423         'exbibytes': 1024 ** 6,
3424         'ZiB': 1024 ** 7,
3425         'ZB': 1000 ** 7,
3426         'zB': 1024 ** 7,
3427         'Zb': 1000 ** 7,
3428         'zb': 1000 ** 7,
3429         'zettabytes': 1000 ** 7,
3430         'zebibytes': 1024 ** 7,
3431         'YiB': 1024 ** 8,
3432         'YB': 1000 ** 8,
3433         'yB': 1024 ** 8,
3434         'Yb': 1000 ** 8,
3435         'yb': 1000 ** 8,
3436         'yottabytes': 1000 ** 8,
3437         'yobibytes': 1024 ** 8,
3438     }
3439
3440     return lookup_unit_table(_UNIT_TABLE, s)
3441
3442
3443 def parse_count(s):
3444     if s is None:
3445         return None
3446
3447     s = s.strip()
3448
3449     if re.match(r'^[\d,.]+$', s):
3450         return str_to_int(s)
3451
3452     _UNIT_TABLE = {
3453         'k': 1000,
3454         'K': 1000,
3455         'm': 1000 ** 2,
3456         'M': 1000 ** 2,
3457         'kk': 1000 ** 2,
3458         'KK': 1000 ** 2,
3459     }
3460
3461     return lookup_unit_table(_UNIT_TABLE, s)
3462
3463
3464 def parse_resolution(s):
3465     if s is None:
3466         return {}
3467
3468     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3469     if mobj:
3470         return {
3471             'width': int(mobj.group('w')),
3472             'height': int(mobj.group('h')),
3473         }
3474
3475     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3476     if mobj:
3477         return {'height': int(mobj.group(1))}
3478
3479     mobj = re.search(r'\b([48])[kK]\b', s)
3480     if mobj:
3481         return {'height': int(mobj.group(1)) * 540}
3482
3483     return {}
3484
3485
3486 def parse_bitrate(s):
3487     if not isinstance(s, compat_str):
3488         return
3489     mobj = re.search(r'\b(\d+)\s*kbps', s)
3490     if mobj:
3491         return int(mobj.group(1))
3492
3493
3494 def month_by_name(name, lang='en'):
3495     """ Return the number of a month by (locale-independently) English name """
3496
3497     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3498
3499     try:
3500         return month_names.index(name) + 1
3501     except ValueError:
3502         return None
3503
3504
3505 def month_by_abbreviation(abbrev):
3506     """ Return the number of a month by (locale-independently) English
3507         abbreviations """
3508
3509     try:
3510         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3511     except ValueError:
3512         return None
3513
3514
3515 def fix_xml_ampersands(xml_str):
3516     """Replace all the '&' by '&amp;' in XML"""
3517     return re.sub(
3518         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3519         '&amp;',
3520         xml_str)
3521
3522
3523 def setproctitle(title):
3524     assert isinstance(title, compat_str)
3525
3526     # ctypes in Jython is not complete
3527     # http://bugs.jython.org/issue2148
3528     if sys.platform.startswith('java'):
3529         return
3530
3531     try:
3532         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3533     except OSError:
3534         return
3535     except TypeError:
3536         # LoadLibrary in Windows Python 2.7.13 only expects
3537         # a bytestring, but since unicode_literals turns
3538         # every string into a unicode string, it fails.
3539         return
3540     title_bytes = title.encode('utf-8')
3541     buf = ctypes.create_string_buffer(len(title_bytes))
3542     buf.value = title_bytes
3543     try:
3544         libc.prctl(15, buf, 0, 0, 0)
3545     except AttributeError:
3546         return  # Strange libc, just skip this
3547
3548
3549 def remove_start(s, start):
3550     return s[len(start):] if s is not None and s.startswith(start) else s
3551
3552
3553 def remove_end(s, end):
3554     return s[:-len(end)] if s is not None and s.endswith(end) else s
3555
3556
3557 def remove_quotes(s):
3558     if s is None or len(s) < 2:
3559         return s
3560     for quote in ('"', "'", ):
3561         if s[0] == quote and s[-1] == quote:
3562             return s[1:-1]
3563     return s
3564
3565
3566 def get_domain(url):
3567     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3568     return domain.group('domain') if domain else None
3569
3570
3571 def url_basename(url):
3572     path = compat_urlparse.urlparse(url).path
3573     return path.strip('/').split('/')[-1]
3574
3575
3576 def base_url(url):
3577     return re.match(r'https?://[^?#&]+/', url).group()
3578
3579
3580 def urljoin(base, path):
3581     if isinstance(path, bytes):
3582         path = path.decode('utf-8')
3583     if not isinstance(path, compat_str) or not path:
3584         return None
3585     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3586         return path
3587     if isinstance(base, bytes):
3588         base = base.decode('utf-8')
3589     if not isinstance(base, compat_str) or not re.match(
3590             r'^(?:https?:)?//', base):
3591         return None
3592     return compat_urlparse.urljoin(base, path)
3593
3594
3595 class HEADRequest(compat_urllib_request.Request):
3596     def get_method(self):
3597         return 'HEAD'
3598
3599
3600 class PUTRequest(compat_urllib_request.Request):
3601     def get_method(self):
3602         return 'PUT'
3603
3604
3605 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3606     if get_attr:
3607         if v is not None:
3608             v = getattr(v, get_attr, None)
3609     if v == '':
3610         v = None
3611     if v is None:
3612         return default
3613     try:
3614         return int(v) * invscale // scale
3615     except (ValueError, TypeError):
3616         return default
3617
3618
3619 def str_or_none(v, default=None):
3620     return default if v is None else compat_str(v)
3621
3622
3623 def str_to_int(int_str):
3624     """ A more relaxed version of int_or_none """
3625     if isinstance(int_str, compat_integer_types):
3626         return int_str
3627     elif isinstance(int_str, compat_str):
3628         int_str = re.sub(r'[,\.\+]', '', int_str)
3629         return int_or_none(int_str)
3630
3631
3632 def float_or_none(v, scale=1, invscale=1, default=None):
3633     if v is None:
3634         return default
3635     try:
3636         return float(v) * invscale / scale
3637     except (ValueError, TypeError):
3638         return default
3639
3640
3641 def bool_or_none(v, default=None):
3642     return v if isinstance(v, bool) else default
3643
3644
3645 def strip_or_none(v, default=None):
3646     return v.strip() if isinstance(v, compat_str) else default
3647
3648
3649 def url_or_none(url):
3650     if not url or not isinstance(url, compat_str):
3651         return None
3652     url = url.strip()
3653     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3654
3655
3656 def parse_duration(s):
3657     if not isinstance(s, compat_basestring):
3658         return None
3659
3660     s = s.strip()
3661
3662     days, hours, mins, secs, ms = [None] * 5
3663     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3664     if m:
3665         days, hours, mins, secs, ms = m.groups()
3666     else:
3667         m = re.match(
3668             r'''(?ix)(?:P?
3669                 (?:
3670                     [0-9]+\s*y(?:ears?)?\s*
3671                 )?
3672                 (?:
3673                     [0-9]+\s*m(?:onths?)?\s*
3674                 )?
3675                 (?:
3676                     [0-9]+\s*w(?:eeks?)?\s*
3677                 )?
3678                 (?:
3679                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3680                 )?
3681                 T)?
3682                 (?:
3683                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3684                 )?
3685                 (?:
3686                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3687                 )?
3688                 (?:
3689                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3690                 )?Z?$''', s)
3691         if m:
3692             days, hours, mins, secs, ms = m.groups()
3693         else:
3694             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3695             if m:
3696                 hours, mins = m.groups()
3697             else:
3698                 return None
3699
3700     duration = 0
3701     if secs:
3702         duration += float(secs)
3703     if mins:
3704         duration += float(mins) * 60
3705     if hours:
3706         duration += float(hours) * 60 * 60
3707     if days:
3708         duration += float(days) * 24 * 60 * 60
3709     if ms:
3710         duration += float(ms)
3711     return duration
3712
3713
3714 def prepend_extension(filename, ext, expected_real_ext=None):
3715     name, real_ext = os.path.splitext(filename)
3716     return (
3717         '{0}.{1}{2}'.format(name, ext, real_ext)
3718         if not expected_real_ext or real_ext[1:] == expected_real_ext
3719         else '{0}.{1}'.format(filename, ext))
3720
3721
3722 def replace_extension(filename, ext, expected_real_ext=None):
3723     name, real_ext = os.path.splitext(filename)
3724     return '{0}.{1}'.format(
3725         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3726         ext)
3727
3728
3729 def check_executable(exe, args=[]):
3730     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3731     args can be a list of arguments for a short output (like -version) """
3732     try:
3733         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3734     except OSError:
3735         return False
3736     return exe
3737
3738
3739 def get_exe_version(exe, args=['--version'],
3740                     version_re=None, unrecognized='present'):
3741     """ Returns the version of the specified executable,
3742     or False if the executable is not present """
3743     try:
3744         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3745         # SIGTTOU if youtube-dlc is run in the background.
3746         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3747         out, _ = subprocess.Popen(
3748             [encodeArgument(exe)] + args,
3749             stdin=subprocess.PIPE,
3750             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3751     except OSError:
3752         return False
3753     if isinstance(out, bytes):  # Python 2.x
3754         out = out.decode('ascii', 'ignore')
3755     return detect_exe_version(out, version_re, unrecognized)
3756
3757
3758 def detect_exe_version(output, version_re=None, unrecognized='present'):
3759     assert isinstance(output, compat_str)
3760     if version_re is None:
3761         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3762     m = re.search(version_re, output)
3763     if m:
3764         return m.group(1)
3765     else:
3766         return unrecognized
3767
3768
3769 class PagedList(object):
3770     def __len__(self):
3771         # This is only useful for tests
3772         return len(self.getslice())
3773
3774
3775 class OnDemandPagedList(PagedList):
3776     def __init__(self, pagefunc, pagesize, use_cache=True):
3777         self._pagefunc = pagefunc
3778         self._pagesize = pagesize
3779         self._use_cache = use_cache
3780         if use_cache:
3781             self._cache = {}
3782
3783     def getslice(self, start=0, end=None):
3784         res = []
3785         for pagenum in itertools.count(start // self._pagesize):
3786             firstid = pagenum * self._pagesize
3787             nextfirstid = pagenum * self._pagesize + self._pagesize
3788             if start >= nextfirstid:
3789                 continue
3790
3791             page_results = None
3792             if self._use_cache:
3793                 page_results = self._cache.get(pagenum)
3794             if page_results is None:
3795                 page_results = list(self._pagefunc(pagenum))
3796             if self._use_cache:
3797                 self._cache[pagenum] = page_results
3798
3799             startv = (
3800                 start % self._pagesize
3801                 if firstid <= start < nextfirstid
3802                 else 0)
3803
3804             endv = (
3805                 ((end - 1) % self._pagesize) + 1
3806                 if (end is not None and firstid <= end <= nextfirstid)
3807                 else None)
3808
3809             if startv != 0 or endv is not None:
3810                 page_results = page_results[startv:endv]
3811             res.extend(page_results)
3812
3813             # A little optimization - if current page is not "full", ie. does
3814             # not contain page_size videos then we can assume that this page
3815             # is the last one - there are no more ids on further pages -
3816             # i.e. no need to query again.
3817             if len(page_results) + startv < self._pagesize:
3818                 break
3819
3820             # If we got the whole page, but the next page is not interesting,
3821             # break out early as well
3822             if end == nextfirstid:
3823                 break
3824         return res
3825
3826
3827 class InAdvancePagedList(PagedList):
3828     def __init__(self, pagefunc, pagecount, pagesize):
3829         self._pagefunc = pagefunc
3830         self._pagecount = pagecount
3831         self._pagesize = pagesize
3832
3833     def getslice(self, start=0, end=None):
3834         res = []
3835         start_page = start // self._pagesize
3836         end_page = (
3837             self._pagecount if end is None else (end // self._pagesize + 1))
3838         skip_elems = start - start_page * self._pagesize
3839         only_more = None if end is None else end - start
3840         for pagenum in range(start_page, end_page):
3841             page = list(self._pagefunc(pagenum))
3842             if skip_elems:
3843                 page = page[skip_elems:]
3844                 skip_elems = None
3845             if only_more is not None:
3846                 if len(page) < only_more:
3847                     only_more -= len(page)
3848                 else:
3849                     page = page[:only_more]
3850                     res.extend(page)
3851                     break
3852             res.extend(page)
3853         return res
3854
3855
3856 def uppercase_escape(s):
3857     unicode_escape = codecs.getdecoder('unicode_escape')
3858     return re.sub(
3859         r'\\U[0-9a-fA-F]{8}',
3860         lambda m: unicode_escape(m.group(0))[0],
3861         s)
3862
3863
3864 def lowercase_escape(s):
3865     unicode_escape = codecs.getdecoder('unicode_escape')
3866     return re.sub(
3867         r'\\u[0-9a-fA-F]{4}',
3868         lambda m: unicode_escape(m.group(0))[0],
3869         s)
3870
3871
3872 def escape_rfc3986(s):
3873     """Escape non-ASCII characters as suggested by RFC 3986"""
3874     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3875         s = s.encode('utf-8')
3876     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3877
3878
3879 def escape_url(url):
3880     """Escape URL as suggested by RFC 3986"""
3881     url_parsed = compat_urllib_parse_urlparse(url)
3882     return url_parsed._replace(
3883         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3884         path=escape_rfc3986(url_parsed.path),
3885         params=escape_rfc3986(url_parsed.params),
3886         query=escape_rfc3986(url_parsed.query),
3887         fragment=escape_rfc3986(url_parsed.fragment)
3888     ).geturl()
3889
3890
3891 def read_batch_urls(batch_fd):
3892     def fixup(url):
3893         if not isinstance(url, compat_str):
3894             url = url.decode('utf-8', 'replace')
3895         BOM_UTF8 = '\xef\xbb\xbf'
3896         if url.startswith(BOM_UTF8):
3897             url = url[len(BOM_UTF8):]
3898         url = url.strip()
3899         if url.startswith(('#', ';', ']')):
3900             return False
3901         return url
3902
3903     with contextlib.closing(batch_fd) as fd:
3904         return [url for url in map(fixup, fd) if url]
3905
3906
3907 def urlencode_postdata(*args, **kargs):
3908     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3909
3910
3911 def update_url_query(url, query):
3912     if not query:
3913         return url
3914     parsed_url = compat_urlparse.urlparse(url)
3915     qs = compat_parse_qs(parsed_url.query)
3916     qs.update(query)
3917     return compat_urlparse.urlunparse(parsed_url._replace(
3918         query=compat_urllib_parse_urlencode(qs, True)))
3919
3920
3921 def update_Request(req, url=None, data=None, headers={}, query={}):
3922     req_headers = req.headers.copy()
3923     req_headers.update(headers)
3924     req_data = data or req.data
3925     req_url = update_url_query(url or req.get_full_url(), query)
3926     req_get_method = req.get_method()
3927     if req_get_method == 'HEAD':
3928         req_type = HEADRequest
3929     elif req_get_method == 'PUT':
3930         req_type = PUTRequest
3931     else:
3932         req_type = compat_urllib_request.Request
3933     new_req = req_type(
3934         req_url, data=req_data, headers=req_headers,
3935         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3936     if hasattr(req, 'timeout'):
3937         new_req.timeout = req.timeout
3938     return new_req
3939
3940
3941 def _multipart_encode_impl(data, boundary):
3942     content_type = 'multipart/form-data; boundary=%s' % boundary
3943
3944     out = b''
3945     for k, v in data.items():
3946         out += b'--' + boundary.encode('ascii') + b'\r\n'
3947         if isinstance(k, compat_str):
3948             k = k.encode('utf-8')
3949         if isinstance(v, compat_str):
3950             v = v.encode('utf-8')
3951         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3952         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3953         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3954         if boundary.encode('ascii') in content:
3955             raise ValueError('Boundary overlaps with data')
3956         out += content
3957
3958     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3959
3960     return out, content_type
3961
3962
3963 def multipart_encode(data, boundary=None):
3964     '''
3965     Encode a dict to RFC 7578-compliant form-data
3966
3967     data:
3968         A dict where keys and values can be either Unicode or bytes-like
3969         objects.
3970     boundary:
3971         If specified a Unicode object, it's used as the boundary. Otherwise
3972         a random boundary is generated.
3973
3974     Reference: https://tools.ietf.org/html/rfc7578
3975     '''
3976     has_specified_boundary = boundary is not None
3977
3978     while True:
3979         if boundary is None:
3980             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3981
3982         try:
3983             out, content_type = _multipart_encode_impl(data, boundary)
3984             break
3985         except ValueError:
3986             if has_specified_boundary:
3987                 raise
3988             boundary = None
3989
3990     return out, content_type
3991
3992
3993 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3994     if isinstance(key_or_keys, (list, tuple)):
3995         for key in key_or_keys:
3996             if key not in d or d[key] is None or skip_false_values and not d[key]:
3997                 continue
3998             return d[key]
3999         return default
4000     return d.get(key_or_keys, default)
4001
4002
4003 def try_get(src, getter, expected_type=None):
4004     if not isinstance(getter, (list, tuple)):
4005         getter = [getter]
4006     for get in getter:
4007         try:
4008             v = get(src)
4009         except (AttributeError, KeyError, TypeError, IndexError):
4010             pass
4011         else:
4012             if expected_type is None or isinstance(v, expected_type):
4013                 return v
4014
4015
4016 def merge_dicts(*dicts):
4017     merged = {}
4018     for a_dict in dicts:
4019         for k, v in a_dict.items():
4020             if v is None:
4021                 continue
4022             if (k not in merged
4023                     or (isinstance(v, compat_str) and v
4024                         and isinstance(merged[k], compat_str)
4025                         and not merged[k])):
4026                 merged[k] = v
4027     return merged
4028
4029
4030 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4031     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4032
4033
4034 US_RATINGS = {
4035     'G': 0,
4036     'PG': 10,
4037     'PG-13': 13,
4038     'R': 16,
4039     'NC': 18,
4040 }
4041
4042
4043 TV_PARENTAL_GUIDELINES = {
4044     'TV-Y': 0,
4045     'TV-Y7': 7,
4046     'TV-G': 0,
4047     'TV-PG': 0,
4048     'TV-14': 14,
4049     'TV-MA': 17,
4050 }
4051
4052
4053 def parse_age_limit(s):
4054     if type(s) == int:
4055         return s if 0 <= s <= 21 else None
4056     if not isinstance(s, compat_basestring):
4057         return None
4058     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4059     if m:
4060         return int(m.group('age'))
4061     if s in US_RATINGS:
4062         return US_RATINGS[s]
4063     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4064     if m:
4065         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4066     return None
4067
4068
4069 def strip_jsonp(code):
4070     return re.sub(
4071         r'''(?sx)^
4072             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4073             (?:\s*&&\s*(?P=func_name))?
4074             \s*\(\s*(?P<callback_data>.*)\);?
4075             \s*?(?://[^\n]*)*$''',
4076         r'\g<callback_data>', code)
4077
4078
4079 def js_to_json(code):
4080     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4081     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4082     INTEGER_TABLE = (
4083         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4084         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4085     )
4086
4087     def fix_kv(m):
4088         v = m.group(0)
4089         if v in ('true', 'false', 'null'):
4090             return v
4091         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4092             return ""
4093
4094         if v[0] in ("'", '"'):
4095             v = re.sub(r'(?s)\\.|"', lambda m: {
4096                 '"': '\\"',
4097                 "\\'": "'",
4098                 '\\\n': '',
4099                 '\\x': '\\u00',
4100             }.get(m.group(0), m.group(0)), v[1:-1])
4101         else:
4102             for regex, base in INTEGER_TABLE:
4103                 im = re.match(regex, v)
4104                 if im:
4105                     i = int(im.group(1), base)
4106                     return '"%d":' % i if v.endswith(':') else '%d' % i
4107
4108         return '"%s"' % v
4109
4110     return re.sub(r'''(?sx)
4111         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4112         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4113         {comment}|,(?={skip}[\]}}])|
4114         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4115         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4116         [0-9]+(?={skip}:)|
4117         !+
4118         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4119
4120
4121 def qualities(quality_ids):
4122     """ Get a numeric quality value out of a list of possible values """
4123     def q(qid):
4124         try:
4125             return quality_ids.index(qid)
4126         except ValueError:
4127             return -1
4128     return q
4129
4130
4131 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4132
4133
4134 def limit_length(s, length):
4135     """ Add ellipses to overly long strings """
4136     if s is None:
4137         return None
4138     ELLIPSES = '...'
4139     if len(s) > length:
4140         return s[:length - len(ELLIPSES)] + ELLIPSES
4141     return s
4142
4143
4144 def version_tuple(v):
4145     return tuple(int(e) for e in re.split(r'[-.]', v))
4146
4147
4148 def is_outdated_version(version, limit, assume_new=True):
4149     if not version:
4150         return not assume_new
4151     try:
4152         return version_tuple(version) < version_tuple(limit)
4153     except ValueError:
4154         return not assume_new
4155
4156
4157 def ytdl_is_updateable():
4158     """ Returns if youtube-dlc can be updated with -U """
4159     from zipimport import zipimporter
4160
4161     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4162
4163
4164 def args_to_str(args):
4165     # Get a short string representation for a subprocess command
4166     return ' '.join(compat_shlex_quote(a) for a in args)
4167
4168
4169 def error_to_compat_str(err):
4170     err_str = str(err)
4171     # On python 2 error byte string must be decoded with proper
4172     # encoding rather than ascii
4173     if sys.version_info[0] < 3:
4174         err_str = err_str.decode(preferredencoding())
4175     return err_str
4176
4177
4178 def mimetype2ext(mt):
4179     if mt is None:
4180         return None
4181
4182     ext = {
4183         'audio/mp4': 'm4a',
4184         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4185         # it's the most popular one
4186         'audio/mpeg': 'mp3',
4187         'audio/x-wav': 'wav',
4188     }.get(mt)
4189     if ext is not None:
4190         return ext
4191
4192     _, _, res = mt.rpartition('/')
4193     res = res.split(';')[0].strip().lower()
4194
4195     return {
4196         '3gpp': '3gp',
4197         'smptett+xml': 'tt',
4198         'ttaf+xml': 'dfxp',
4199         'ttml+xml': 'ttml',
4200         'x-flv': 'flv',
4201         'x-mp4-fragmented': 'mp4',
4202         'x-ms-sami': 'sami',
4203         'x-ms-wmv': 'wmv',
4204         'mpegurl': 'm3u8',
4205         'x-mpegurl': 'm3u8',
4206         'vnd.apple.mpegurl': 'm3u8',
4207         'dash+xml': 'mpd',
4208         'f4m+xml': 'f4m',
4209         'hds+xml': 'f4m',
4210         'vnd.ms-sstr+xml': 'ism',
4211         'quicktime': 'mov',
4212         'mp2t': 'ts',
4213         'x-wav': 'wav',
4214     }.get(res, res)
4215
4216
4217 def parse_codecs(codecs_str):
4218     # http://tools.ietf.org/html/rfc6381
4219     if not codecs_str:
4220         return {}
4221     split_codecs = list(filter(None, map(
4222         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4223     vcodec, acodec = None, None
4224     for full_codec in split_codecs:
4225         codec = full_codec.split('.')[0]
4226         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4227             if not vcodec:
4228                 vcodec = full_codec
4229         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4230             if not acodec:
4231                 acodec = full_codec
4232         else:
4233             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4234     if not vcodec and not acodec:
4235         if len(split_codecs) == 2:
4236             return {
4237                 'vcodec': split_codecs[0],
4238                 'acodec': split_codecs[1],
4239             }
4240     else:
4241         return {
4242             'vcodec': vcodec or 'none',
4243             'acodec': acodec or 'none',
4244         }
4245     return {}
4246
4247
4248 def urlhandle_detect_ext(url_handle):
4249     getheader = url_handle.headers.get
4250
4251     cd = getheader('Content-Disposition')
4252     if cd:
4253         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4254         if m:
4255             e = determine_ext(m.group('filename'), default_ext=None)
4256             if e:
4257                 return e
4258
4259     return mimetype2ext(getheader('Content-Type'))
4260
4261
4262 def encode_data_uri(data, mime_type):
4263     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4264
4265
4266 def age_restricted(content_limit, age_limit):
4267     """ Returns True iff the content should be blocked """
4268
4269     if age_limit is None:  # No limit set
4270         return False
4271     if content_limit is None:
4272         return False  # Content available for everyone
4273     return age_limit < content_limit
4274
4275
4276 def is_html(first_bytes):
4277     """ Detect whether a file contains HTML by examining its first bytes. """
4278
4279     BOMS = [
4280         (b'\xef\xbb\xbf', 'utf-8'),
4281         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4282         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4283         (b'\xff\xfe', 'utf-16-le'),
4284         (b'\xfe\xff', 'utf-16-be'),
4285     ]
4286     for bom, enc in BOMS:
4287         if first_bytes.startswith(bom):
4288             s = first_bytes[len(bom):].decode(enc, 'replace')
4289             break
4290     else:
4291         s = first_bytes.decode('utf-8', 'replace')
4292
4293     return re.match(r'^\s*<', s)
4294
4295
4296 def determine_protocol(info_dict):
4297     protocol = info_dict.get('protocol')
4298     if protocol is not None:
4299         return protocol
4300
4301     url = info_dict['url']
4302     if url.startswith('rtmp'):
4303         return 'rtmp'
4304     elif url.startswith('mms'):
4305         return 'mms'
4306     elif url.startswith('rtsp'):
4307         return 'rtsp'
4308
4309     ext = determine_ext(url)
4310     if ext == 'm3u8':
4311         return 'm3u8'
4312     elif ext == 'f4m':
4313         return 'f4m'
4314
4315     return compat_urllib_parse_urlparse(url).scheme
4316
4317
4318 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4319     """ Render a list of rows, each as a list of values """
4320
4321     def get_max_lens(table):
4322         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4323
4324     def filter_using_list(row, filterArray):
4325         return [col for (take, col) in zip(filterArray, row) if take]
4326
4327     if hideEmpty:
4328         max_lens = get_max_lens(data)
4329         header_row = filter_using_list(header_row, max_lens)
4330         data = [filter_using_list(row, max_lens) for row in data]
4331
4332     table = [header_row] + data
4333     max_lens = get_max_lens(table)
4334     if delim:
4335         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4336     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4337     return '\n'.join(format_str % tuple(row) for row in table)
4338
4339
4340 def _match_one(filter_part, dct):
4341     COMPARISON_OPERATORS = {
4342         '<': operator.lt,
4343         '<=': operator.le,
4344         '>': operator.gt,
4345         '>=': operator.ge,
4346         '=': operator.eq,
4347         '!=': operator.ne,
4348     }
4349     operator_rex = re.compile(r'''(?x)\s*
4350         (?P<key>[a-z_]+)
4351         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4352         (?:
4353             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4354             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4355             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4356         )
4357         \s*$
4358         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4359     m = operator_rex.search(filter_part)
4360     if m:
4361         op = COMPARISON_OPERATORS[m.group('op')]
4362         actual_value = dct.get(m.group('key'))
4363         if (m.group('quotedstrval') is not None
4364             or m.group('strval') is not None
4365             # If the original field is a string and matching comparisonvalue is
4366             # a number we should respect the origin of the original field
4367             # and process comparison value as a string (see
4368             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4369             or actual_value is not None and m.group('intval') is not None
4370                 and isinstance(actual_value, compat_str)):
4371             if m.group('op') not in ('=', '!='):
4372                 raise ValueError(
4373                     'Operator %s does not support string values!' % m.group('op'))
4374             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4375             quote = m.group('quote')
4376             if quote is not None:
4377                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4378         else:
4379             try:
4380                 comparison_value = int(m.group('intval'))
4381             except ValueError:
4382                 comparison_value = parse_filesize(m.group('intval'))
4383                 if comparison_value is None:
4384                     comparison_value = parse_filesize(m.group('intval') + 'B')
4385                 if comparison_value is None:
4386                     raise ValueError(
4387                         'Invalid integer value %r in filter part %r' % (
4388                             m.group('intval'), filter_part))
4389         if actual_value is None:
4390             return m.group('none_inclusive')
4391         return op(actual_value, comparison_value)
4392
4393     UNARY_OPERATORS = {
4394         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4395         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4396     }
4397     operator_rex = re.compile(r'''(?x)\s*
4398         (?P<op>%s)\s*(?P<key>[a-z_]+)
4399         \s*$
4400         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4401     m = operator_rex.search(filter_part)
4402     if m:
4403         op = UNARY_OPERATORS[m.group('op')]
4404         actual_value = dct.get(m.group('key'))
4405         return op(actual_value)
4406
4407     raise ValueError('Invalid filter part %r' % filter_part)
4408
4409
4410 def match_str(filter_str, dct):
4411     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4412
4413     return all(
4414         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4415
4416
4417 def match_filter_func(filter_str):
4418     def _match_func(info_dict):
4419         if match_str(filter_str, info_dict):
4420             return None
4421         else:
4422             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4423             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4424     return _match_func
4425
4426
4427 def parse_dfxp_time_expr(time_expr):
4428     if not time_expr:
4429         return
4430
4431     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4432     if mobj:
4433         return float(mobj.group('time_offset'))
4434
4435     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4436     if mobj:
4437         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4438
4439
4440 def srt_subtitles_timecode(seconds):
4441     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4442
4443
4444 def dfxp2srt(dfxp_data):
4445     '''
4446     @param dfxp_data A bytes-like object containing DFXP data
4447     @returns A unicode object containing converted SRT data
4448     '''
4449     LEGACY_NAMESPACES = (
4450         (b'http://www.w3.org/ns/ttml', [
4451             b'http://www.w3.org/2004/11/ttaf1',
4452             b'http://www.w3.org/2006/04/ttaf1',
4453             b'http://www.w3.org/2006/10/ttaf1',
4454         ]),
4455         (b'http://www.w3.org/ns/ttml#styling', [
4456             b'http://www.w3.org/ns/ttml#style',
4457         ]),
4458     )
4459
4460     SUPPORTED_STYLING = [
4461         'color',
4462         'fontFamily',
4463         'fontSize',
4464         'fontStyle',
4465         'fontWeight',
4466         'textDecoration'
4467     ]
4468
4469     _x = functools.partial(xpath_with_ns, ns_map={
4470         'xml': 'http://www.w3.org/XML/1998/namespace',
4471         'ttml': 'http://www.w3.org/ns/ttml',
4472         'tts': 'http://www.w3.org/ns/ttml#styling',
4473     })
4474
4475     styles = {}
4476     default_style = {}
4477
4478     class TTMLPElementParser(object):
4479         _out = ''
4480         _unclosed_elements = []
4481         _applied_styles = []
4482
4483         def start(self, tag, attrib):
4484             if tag in (_x('ttml:br'), 'br'):
4485                 self._out += '\n'
4486             else:
4487                 unclosed_elements = []
4488                 style = {}
4489                 element_style_id = attrib.get('style')
4490                 if default_style:
4491                     style.update(default_style)
4492                 if element_style_id:
4493                     style.update(styles.get(element_style_id, {}))
4494                 for prop in SUPPORTED_STYLING:
4495                     prop_val = attrib.get(_x('tts:' + prop))
4496                     if prop_val:
4497                         style[prop] = prop_val
4498                 if style:
4499                     font = ''
4500                     for k, v in sorted(style.items()):
4501                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4502                             continue
4503                         if k == 'color':
4504                             font += ' color="%s"' % v
4505                         elif k == 'fontSize':
4506                             font += ' size="%s"' % v
4507                         elif k == 'fontFamily':
4508                             font += ' face="%s"' % v
4509                         elif k == 'fontWeight' and v == 'bold':
4510                             self._out += '<b>'
4511                             unclosed_elements.append('b')
4512                         elif k == 'fontStyle' and v == 'italic':
4513                             self._out += '<i>'
4514                             unclosed_elements.append('i')
4515                         elif k == 'textDecoration' and v == 'underline':
4516                             self._out += '<u>'
4517                             unclosed_elements.append('u')
4518                     if font:
4519                         self._out += '<font' + font + '>'
4520                         unclosed_elements.append('font')
4521                     applied_style = {}
4522                     if self._applied_styles:
4523                         applied_style.update(self._applied_styles[-1])
4524                     applied_style.update(style)
4525                     self._applied_styles.append(applied_style)
4526                 self._unclosed_elements.append(unclosed_elements)
4527
4528         def end(self, tag):
4529             if tag not in (_x('ttml:br'), 'br'):
4530                 unclosed_elements = self._unclosed_elements.pop()
4531                 for element in reversed(unclosed_elements):
4532                     self._out += '</%s>' % element
4533                 if unclosed_elements and self._applied_styles:
4534                     self._applied_styles.pop()
4535
4536         def data(self, data):
4537             self._out += data
4538
4539         def close(self):
4540             return self._out.strip()
4541
4542     def parse_node(node):
4543         target = TTMLPElementParser()
4544         parser = xml.etree.ElementTree.XMLParser(target=target)
4545         parser.feed(xml.etree.ElementTree.tostring(node))
4546         return parser.close()
4547
4548     for k, v in LEGACY_NAMESPACES:
4549         for ns in v:
4550             dfxp_data = dfxp_data.replace(ns, k)
4551
4552     dfxp = compat_etree_fromstring(dfxp_data)
4553     out = []
4554     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4555
4556     if not paras:
4557         raise ValueError('Invalid dfxp/TTML subtitle')
4558
4559     repeat = False
4560     while True:
4561         for style in dfxp.findall(_x('.//ttml:style')):
4562             style_id = style.get('id') or style.get(_x('xml:id'))
4563             if not style_id:
4564                 continue
4565             parent_style_id = style.get('style')
4566             if parent_style_id:
4567                 if parent_style_id not in styles:
4568                     repeat = True
4569                     continue
4570                 styles[style_id] = styles[parent_style_id].copy()
4571             for prop in SUPPORTED_STYLING:
4572                 prop_val = style.get(_x('tts:' + prop))
4573                 if prop_val:
4574                     styles.setdefault(style_id, {})[prop] = prop_val
4575         if repeat:
4576             repeat = False
4577         else:
4578             break
4579
4580     for p in ('body', 'div'):
4581         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4582         if ele is None:
4583             continue
4584         style = styles.get(ele.get('style'))
4585         if not style:
4586             continue
4587         default_style.update(style)
4588
4589     for para, index in zip(paras, itertools.count(1)):
4590         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4591         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4592         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4593         if begin_time is None:
4594             continue
4595         if not end_time:
4596             if not dur:
4597                 continue
4598             end_time = begin_time + dur
4599         out.append('%d\n%s --> %s\n%s\n\n' % (
4600             index,
4601             srt_subtitles_timecode(begin_time),
4602             srt_subtitles_timecode(end_time),
4603             parse_node(para)))
4604
4605     return ''.join(out)
4606
4607
4608 def cli_option(params, command_option, param):
4609     param = params.get(param)
4610     if param:
4611         param = compat_str(param)
4612     return [command_option, param] if param is not None else []
4613
4614
4615 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4616     param = params.get(param)
4617     if param is None:
4618         return []
4619     assert isinstance(param, bool)
4620     if separator:
4621         return [command_option + separator + (true_value if param else false_value)]
4622     return [command_option, true_value if param else false_value]
4623
4624
4625 def cli_valueless_option(params, command_option, param, expected_value=True):
4626     param = params.get(param)
4627     return [command_option] if param == expected_value else []
4628
4629
4630 def cli_configuration_args(params, param, default=[]):
4631     ex_args = params.get(param)
4632     if ex_args is None:
4633         return default
4634     assert isinstance(ex_args, list)
4635     return ex_args
4636
4637
4638 class ISO639Utils(object):
4639     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4640     _lang_map = {
4641         'aa': 'aar',
4642         'ab': 'abk',
4643         'ae': 'ave',
4644         'af': 'afr',
4645         'ak': 'aka',
4646         'am': 'amh',
4647         'an': 'arg',
4648         'ar': 'ara',
4649         'as': 'asm',
4650         'av': 'ava',
4651         'ay': 'aym',
4652         'az': 'aze',
4653         'ba': 'bak',
4654         'be': 'bel',
4655         'bg': 'bul',
4656         'bh': 'bih',
4657         'bi': 'bis',
4658         'bm': 'bam',
4659         'bn': 'ben',
4660         'bo': 'bod',
4661         'br': 'bre',
4662         'bs': 'bos',
4663         'ca': 'cat',
4664         'ce': 'che',
4665         'ch': 'cha',
4666         'co': 'cos',
4667         'cr': 'cre',
4668         'cs': 'ces',
4669         'cu': 'chu',
4670         'cv': 'chv',
4671         'cy': 'cym',
4672         'da': 'dan',
4673         'de': 'deu',
4674         'dv': 'div',
4675         'dz': 'dzo',
4676         'ee': 'ewe',
4677         'el': 'ell',
4678         'en': 'eng',
4679         'eo': 'epo',
4680         'es': 'spa',
4681         'et': 'est',
4682         'eu': 'eus',
4683         'fa': 'fas',
4684         'ff': 'ful',
4685         'fi': 'fin',
4686         'fj': 'fij',
4687         'fo': 'fao',
4688         'fr': 'fra',
4689         'fy': 'fry',
4690         'ga': 'gle',
4691         'gd': 'gla',
4692         'gl': 'glg',
4693         'gn': 'grn',
4694         'gu': 'guj',
4695         'gv': 'glv',
4696         'ha': 'hau',
4697         'he': 'heb',
4698         'iw': 'heb',  # Replaced by he in 1989 revision
4699         'hi': 'hin',
4700         'ho': 'hmo',
4701         'hr': 'hrv',
4702         'ht': 'hat',
4703         'hu': 'hun',
4704         'hy': 'hye',
4705         'hz': 'her',
4706         'ia': 'ina',
4707         'id': 'ind',
4708         'in': 'ind',  # Replaced by id in 1989 revision
4709         'ie': 'ile',
4710         'ig': 'ibo',
4711         'ii': 'iii',
4712         'ik': 'ipk',
4713         'io': 'ido',
4714         'is': 'isl',
4715         'it': 'ita',
4716         'iu': 'iku',
4717         'ja': 'jpn',
4718         'jv': 'jav',
4719         'ka': 'kat',
4720         'kg': 'kon',
4721         'ki': 'kik',
4722         'kj': 'kua',
4723         'kk': 'kaz',
4724         'kl': 'kal',
4725         'km': 'khm',
4726         'kn': 'kan',
4727         'ko': 'kor',
4728         'kr': 'kau',
4729         'ks': 'kas',
4730         'ku': 'kur',
4731         'kv': 'kom',
4732         'kw': 'cor',
4733         'ky': 'kir',
4734         'la': 'lat',
4735         'lb': 'ltz',
4736         'lg': 'lug',
4737         'li': 'lim',
4738         'ln': 'lin',
4739         'lo': 'lao',
4740         'lt': 'lit',
4741         'lu': 'lub',
4742         'lv': 'lav',
4743         'mg': 'mlg',
4744         'mh': 'mah',
4745         'mi': 'mri',
4746         'mk': 'mkd',
4747         'ml': 'mal',
4748         'mn': 'mon',
4749         'mr': 'mar',
4750         'ms': 'msa',
4751         'mt': 'mlt',
4752         'my': 'mya',
4753         'na': 'nau',
4754         'nb': 'nob',
4755         'nd': 'nde',
4756         'ne': 'nep',
4757         'ng': 'ndo',
4758         'nl': 'nld',
4759         'nn': 'nno',
4760         'no': 'nor',
4761         'nr': 'nbl',
4762         'nv': 'nav',
4763         'ny': 'nya',
4764         'oc': 'oci',
4765         'oj': 'oji',
4766         'om': 'orm',
4767         'or': 'ori',
4768         'os': 'oss',
4769         'pa': 'pan',
4770         'pi': 'pli',
4771         'pl': 'pol',
4772         'ps': 'pus',
4773         'pt': 'por',
4774         'qu': 'que',
4775         'rm': 'roh',
4776         'rn': 'run',
4777         'ro': 'ron',
4778         'ru': 'rus',
4779         'rw': 'kin',
4780         'sa': 'san',
4781         'sc': 'srd',
4782         'sd': 'snd',
4783         'se': 'sme',
4784         'sg': 'sag',
4785         'si': 'sin',
4786         'sk': 'slk',
4787         'sl': 'slv',
4788         'sm': 'smo',
4789         'sn': 'sna',
4790         'so': 'som',
4791         'sq': 'sqi',
4792         'sr': 'srp',
4793         'ss': 'ssw',
4794         'st': 'sot',
4795         'su': 'sun',
4796         'sv': 'swe',
4797         'sw': 'swa',
4798         'ta': 'tam',
4799         'te': 'tel',
4800         'tg': 'tgk',
4801         'th': 'tha',
4802         'ti': 'tir',
4803         'tk': 'tuk',
4804         'tl': 'tgl',
4805         'tn': 'tsn',
4806         'to': 'ton',
4807         'tr': 'tur',
4808         'ts': 'tso',
4809         'tt': 'tat',
4810         'tw': 'twi',
4811         'ty': 'tah',
4812         'ug': 'uig',
4813         'uk': 'ukr',
4814         'ur': 'urd',
4815         'uz': 'uzb',
4816         've': 'ven',
4817         'vi': 'vie',
4818         'vo': 'vol',
4819         'wa': 'wln',
4820         'wo': 'wol',
4821         'xh': 'xho',
4822         'yi': 'yid',
4823         'ji': 'yid',  # Replaced by yi in 1989 revision
4824         'yo': 'yor',
4825         'za': 'zha',
4826         'zh': 'zho',
4827         'zu': 'zul',
4828     }
4829
4830     @classmethod
4831     def short2long(cls, code):
4832         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4833         return cls._lang_map.get(code[:2])
4834
4835     @classmethod
4836     def long2short(cls, code):
4837         """Convert language code from ISO 639-2/T to ISO 639-1"""
4838         for short_name, long_name in cls._lang_map.items():
4839             if long_name == code:
4840                 return short_name
4841
4842
4843 class ISO3166Utils(object):
4844     # From http://data.okfn.org/data/core/country-list
4845     _country_map = {
4846         'AF': 'Afghanistan',
4847         'AX': 'Åland Islands',
4848         'AL': 'Albania',
4849         'DZ': 'Algeria',
4850         'AS': 'American Samoa',
4851         'AD': 'Andorra',
4852         'AO': 'Angola',
4853         'AI': 'Anguilla',
4854         'AQ': 'Antarctica',
4855         'AG': 'Antigua and Barbuda',
4856         'AR': 'Argentina',
4857         'AM': 'Armenia',
4858         'AW': 'Aruba',
4859         'AU': 'Australia',
4860         'AT': 'Austria',
4861         'AZ': 'Azerbaijan',
4862         'BS': 'Bahamas',
4863         'BH': 'Bahrain',
4864         'BD': 'Bangladesh',
4865         'BB': 'Barbados',
4866         'BY': 'Belarus',
4867         'BE': 'Belgium',
4868         'BZ': 'Belize',
4869         'BJ': 'Benin',
4870         'BM': 'Bermuda',
4871         'BT': 'Bhutan',
4872         'BO': 'Bolivia, Plurinational State of',
4873         'BQ': 'Bonaire, Sint Eustatius and Saba',
4874         'BA': 'Bosnia and Herzegovina',
4875         'BW': 'Botswana',
4876         'BV': 'Bouvet Island',
4877         'BR': 'Brazil',
4878         'IO': 'British Indian Ocean Territory',
4879         'BN': 'Brunei Darussalam',
4880         'BG': 'Bulgaria',
4881         'BF': 'Burkina Faso',
4882         'BI': 'Burundi',
4883         'KH': 'Cambodia',
4884         'CM': 'Cameroon',
4885         'CA': 'Canada',
4886         'CV': 'Cape Verde',
4887         'KY': 'Cayman Islands',
4888         'CF': 'Central African Republic',
4889         'TD': 'Chad',
4890         'CL': 'Chile',
4891         'CN': 'China',
4892         'CX': 'Christmas Island',
4893         'CC': 'Cocos (Keeling) Islands',
4894         'CO': 'Colombia',
4895         'KM': 'Comoros',
4896         'CG': 'Congo',
4897         'CD': 'Congo, the Democratic Republic of the',
4898         'CK': 'Cook Islands',
4899         'CR': 'Costa Rica',
4900         'CI': 'Côte d\'Ivoire',
4901         'HR': 'Croatia',
4902         'CU': 'Cuba',
4903         'CW': 'Curaçao',
4904         'CY': 'Cyprus',
4905         'CZ': 'Czech Republic',
4906         'DK': 'Denmark',
4907         'DJ': 'Djibouti',
4908         'DM': 'Dominica',
4909         'DO': 'Dominican Republic',
4910         'EC': 'Ecuador',
4911         'EG': 'Egypt',
4912         'SV': 'El Salvador',
4913         'GQ': 'Equatorial Guinea',
4914         'ER': 'Eritrea',
4915         'EE': 'Estonia',
4916         'ET': 'Ethiopia',
4917         'FK': 'Falkland Islands (Malvinas)',
4918         'FO': 'Faroe Islands',
4919         'FJ': 'Fiji',
4920         'FI': 'Finland',
4921         'FR': 'France',
4922         'GF': 'French Guiana',
4923         'PF': 'French Polynesia',
4924         'TF': 'French Southern Territories',
4925         'GA': 'Gabon',
4926         'GM': 'Gambia',
4927         'GE': 'Georgia',
4928         'DE': 'Germany',
4929         'GH': 'Ghana',
4930         'GI': 'Gibraltar',
4931         'GR': 'Greece',
4932         'GL': 'Greenland',
4933         'GD': 'Grenada',
4934         'GP': 'Guadeloupe',
4935         'GU': 'Guam',
4936         'GT': 'Guatemala',
4937         'GG': 'Guernsey',
4938         'GN': 'Guinea',
4939         'GW': 'Guinea-Bissau',
4940         'GY': 'Guyana',
4941         'HT': 'Haiti',
4942         'HM': 'Heard Island and McDonald Islands',
4943         'VA': 'Holy See (Vatican City State)',
4944         'HN': 'Honduras',
4945         'HK': 'Hong Kong',
4946         'HU': 'Hungary',
4947         'IS': 'Iceland',
4948         'IN': 'India',
4949         'ID': 'Indonesia',
4950         'IR': 'Iran, Islamic Republic of',
4951         'IQ': 'Iraq',
4952         'IE': 'Ireland',
4953         'IM': 'Isle of Man',
4954         'IL': 'Israel',
4955         'IT': 'Italy',
4956         'JM': 'Jamaica',
4957         'JP': 'Japan',
4958         'JE': 'Jersey',
4959         'JO': 'Jordan',
4960         'KZ': 'Kazakhstan',
4961         'KE': 'Kenya',
4962         'KI': 'Kiribati',
4963         'KP': 'Korea, Democratic People\'s Republic of',
4964         'KR': 'Korea, Republic of',
4965         'KW': 'Kuwait',
4966         'KG': 'Kyrgyzstan',
4967         'LA': 'Lao People\'s Democratic Republic',
4968         'LV': 'Latvia',
4969         'LB': 'Lebanon',
4970         'LS': 'Lesotho',
4971         'LR': 'Liberia',
4972         'LY': 'Libya',
4973         'LI': 'Liechtenstein',
4974         'LT': 'Lithuania',
4975         'LU': 'Luxembourg',
4976         'MO': 'Macao',
4977         'MK': 'Macedonia, the Former Yugoslav Republic of',
4978         'MG': 'Madagascar',
4979         'MW': 'Malawi',
4980         'MY': 'Malaysia',
4981         'MV': 'Maldives',
4982         'ML': 'Mali',
4983         'MT': 'Malta',
4984         'MH': 'Marshall Islands',
4985         'MQ': 'Martinique',
4986         'MR': 'Mauritania',
4987         'MU': 'Mauritius',
4988         'YT': 'Mayotte',
4989         'MX': 'Mexico',
4990         'FM': 'Micronesia, Federated States of',
4991         'MD': 'Moldova, Republic of',
4992         'MC': 'Monaco',
4993         'MN': 'Mongolia',
4994         'ME': 'Montenegro',
4995         'MS': 'Montserrat',
4996         'MA': 'Morocco',
4997         'MZ': 'Mozambique',
4998         'MM': 'Myanmar',
4999         'NA': 'Namibia',
5000         'NR': 'Nauru',
5001         'NP': 'Nepal',
5002         'NL': 'Netherlands',
5003         'NC': 'New Caledonia',
5004         'NZ': 'New Zealand',
5005         'NI': 'Nicaragua',
5006         'NE': 'Niger',
5007         'NG': 'Nigeria',
5008         'NU': 'Niue',
5009         'NF': 'Norfolk Island',
5010         'MP': 'Northern Mariana Islands',
5011         'NO': 'Norway',
5012         'OM': 'Oman',
5013         'PK': 'Pakistan',
5014         'PW': 'Palau',
5015         'PS': 'Palestine, State of',
5016         'PA': 'Panama',
5017         'PG': 'Papua New Guinea',
5018         'PY': 'Paraguay',
5019         'PE': 'Peru',
5020         'PH': 'Philippines',
5021         'PN': 'Pitcairn',
5022         'PL': 'Poland',
5023         'PT': 'Portugal',
5024         'PR': 'Puerto Rico',
5025         'QA': 'Qatar',
5026         'RE': 'Réunion',
5027         'RO': 'Romania',
5028         'RU': 'Russian Federation',
5029         'RW': 'Rwanda',
5030         'BL': 'Saint Barthélemy',
5031         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5032         'KN': 'Saint Kitts and Nevis',
5033         'LC': 'Saint Lucia',
5034         'MF': 'Saint Martin (French part)',
5035         'PM': 'Saint Pierre and Miquelon',
5036         'VC': 'Saint Vincent and the Grenadines',
5037         'WS': 'Samoa',
5038         'SM': 'San Marino',
5039         'ST': 'Sao Tome and Principe',
5040         'SA': 'Saudi Arabia',
5041         'SN': 'Senegal',
5042         'RS': 'Serbia',
5043         'SC': 'Seychelles',
5044         'SL': 'Sierra Leone',
5045         'SG': 'Singapore',
5046         'SX': 'Sint Maarten (Dutch part)',
5047         'SK': 'Slovakia',
5048         'SI': 'Slovenia',
5049         'SB': 'Solomon Islands',
5050         'SO': 'Somalia',
5051         'ZA': 'South Africa',
5052         'GS': 'South Georgia and the South Sandwich Islands',
5053         'SS': 'South Sudan',
5054         'ES': 'Spain',
5055         'LK': 'Sri Lanka',
5056         'SD': 'Sudan',
5057         'SR': 'Suriname',
5058         'SJ': 'Svalbard and Jan Mayen',
5059         'SZ': 'Swaziland',
5060         'SE': 'Sweden',
5061         'CH': 'Switzerland',
5062         'SY': 'Syrian Arab Republic',
5063         'TW': 'Taiwan, Province of China',
5064         'TJ': 'Tajikistan',
5065         'TZ': 'Tanzania, United Republic of',
5066         'TH': 'Thailand',
5067         'TL': 'Timor-Leste',
5068         'TG': 'Togo',
5069         'TK': 'Tokelau',
5070         'TO': 'Tonga',
5071         'TT': 'Trinidad and Tobago',
5072         'TN': 'Tunisia',
5073         'TR': 'Turkey',
5074         'TM': 'Turkmenistan',
5075         'TC': 'Turks and Caicos Islands',
5076         'TV': 'Tuvalu',
5077         'UG': 'Uganda',
5078         'UA': 'Ukraine',
5079         'AE': 'United Arab Emirates',
5080         'GB': 'United Kingdom',
5081         'US': 'United States',
5082         'UM': 'United States Minor Outlying Islands',
5083         'UY': 'Uruguay',
5084         'UZ': 'Uzbekistan',
5085         'VU': 'Vanuatu',
5086         'VE': 'Venezuela, Bolivarian Republic of',
5087         'VN': 'Viet Nam',
5088         'VG': 'Virgin Islands, British',
5089         'VI': 'Virgin Islands, U.S.',
5090         'WF': 'Wallis and Futuna',
5091         'EH': 'Western Sahara',
5092         'YE': 'Yemen',
5093         'ZM': 'Zambia',
5094         'ZW': 'Zimbabwe',
5095     }
5096
5097     @classmethod
5098     def short2full(cls, code):
5099         """Convert an ISO 3166-2 country code to the corresponding full name"""
5100         return cls._country_map.get(code.upper())
5101
5102
5103 class GeoUtils(object):
5104     # Major IPv4 address blocks per country
5105     _country_ip_map = {
5106         'AD': '46.172.224.0/19',
5107         'AE': '94.200.0.0/13',
5108         'AF': '149.54.0.0/17',
5109         'AG': '209.59.64.0/18',
5110         'AI': '204.14.248.0/21',
5111         'AL': '46.99.0.0/16',
5112         'AM': '46.70.0.0/15',
5113         'AO': '105.168.0.0/13',
5114         'AP': '182.50.184.0/21',
5115         'AQ': '23.154.160.0/24',
5116         'AR': '181.0.0.0/12',
5117         'AS': '202.70.112.0/20',
5118         'AT': '77.116.0.0/14',
5119         'AU': '1.128.0.0/11',
5120         'AW': '181.41.0.0/18',
5121         'AX': '185.217.4.0/22',
5122         'AZ': '5.197.0.0/16',
5123         'BA': '31.176.128.0/17',
5124         'BB': '65.48.128.0/17',
5125         'BD': '114.130.0.0/16',
5126         'BE': '57.0.0.0/8',
5127         'BF': '102.178.0.0/15',
5128         'BG': '95.42.0.0/15',
5129         'BH': '37.131.0.0/17',
5130         'BI': '154.117.192.0/18',
5131         'BJ': '137.255.0.0/16',
5132         'BL': '185.212.72.0/23',
5133         'BM': '196.12.64.0/18',
5134         'BN': '156.31.0.0/16',
5135         'BO': '161.56.0.0/16',
5136         'BQ': '161.0.80.0/20',
5137         'BR': '191.128.0.0/12',
5138         'BS': '24.51.64.0/18',
5139         'BT': '119.2.96.0/19',
5140         'BW': '168.167.0.0/16',
5141         'BY': '178.120.0.0/13',
5142         'BZ': '179.42.192.0/18',
5143         'CA': '99.224.0.0/11',
5144         'CD': '41.243.0.0/16',
5145         'CF': '197.242.176.0/21',
5146         'CG': '160.113.0.0/16',
5147         'CH': '85.0.0.0/13',
5148         'CI': '102.136.0.0/14',
5149         'CK': '202.65.32.0/19',
5150         'CL': '152.172.0.0/14',
5151         'CM': '102.244.0.0/14',
5152         'CN': '36.128.0.0/10',
5153         'CO': '181.240.0.0/12',
5154         'CR': '201.192.0.0/12',
5155         'CU': '152.206.0.0/15',
5156         'CV': '165.90.96.0/19',
5157         'CW': '190.88.128.0/17',
5158         'CY': '31.153.0.0/16',
5159         'CZ': '88.100.0.0/14',
5160         'DE': '53.0.0.0/8',
5161         'DJ': '197.241.0.0/17',
5162         'DK': '87.48.0.0/12',
5163         'DM': '192.243.48.0/20',
5164         'DO': '152.166.0.0/15',
5165         'DZ': '41.96.0.0/12',
5166         'EC': '186.68.0.0/15',
5167         'EE': '90.190.0.0/15',
5168         'EG': '156.160.0.0/11',
5169         'ER': '196.200.96.0/20',
5170         'ES': '88.0.0.0/11',
5171         'ET': '196.188.0.0/14',
5172         'EU': '2.16.0.0/13',
5173         'FI': '91.152.0.0/13',
5174         'FJ': '144.120.0.0/16',
5175         'FK': '80.73.208.0/21',
5176         'FM': '119.252.112.0/20',
5177         'FO': '88.85.32.0/19',
5178         'FR': '90.0.0.0/9',
5179         'GA': '41.158.0.0/15',
5180         'GB': '25.0.0.0/8',
5181         'GD': '74.122.88.0/21',
5182         'GE': '31.146.0.0/16',
5183         'GF': '161.22.64.0/18',
5184         'GG': '62.68.160.0/19',
5185         'GH': '154.160.0.0/12',
5186         'GI': '95.164.0.0/16',
5187         'GL': '88.83.0.0/19',
5188         'GM': '160.182.0.0/15',
5189         'GN': '197.149.192.0/18',
5190         'GP': '104.250.0.0/19',
5191         'GQ': '105.235.224.0/20',
5192         'GR': '94.64.0.0/13',
5193         'GT': '168.234.0.0/16',
5194         'GU': '168.123.0.0/16',
5195         'GW': '197.214.80.0/20',
5196         'GY': '181.41.64.0/18',
5197         'HK': '113.252.0.0/14',
5198         'HN': '181.210.0.0/16',
5199         'HR': '93.136.0.0/13',
5200         'HT': '148.102.128.0/17',
5201         'HU': '84.0.0.0/14',
5202         'ID': '39.192.0.0/10',
5203         'IE': '87.32.0.0/12',
5204         'IL': '79.176.0.0/13',
5205         'IM': '5.62.80.0/20',
5206         'IN': '117.192.0.0/10',
5207         'IO': '203.83.48.0/21',
5208         'IQ': '37.236.0.0/14',
5209         'IR': '2.176.0.0/12',
5210         'IS': '82.221.0.0/16',
5211         'IT': '79.0.0.0/10',
5212         'JE': '87.244.64.0/18',
5213         'JM': '72.27.0.0/17',
5214         'JO': '176.29.0.0/16',
5215         'JP': '133.0.0.0/8',
5216         'KE': '105.48.0.0/12',
5217         'KG': '158.181.128.0/17',
5218         'KH': '36.37.128.0/17',
5219         'KI': '103.25.140.0/22',
5220         'KM': '197.255.224.0/20',
5221         'KN': '198.167.192.0/19',
5222         'KP': '175.45.176.0/22',
5223         'KR': '175.192.0.0/10',
5224         'KW': '37.36.0.0/14',
5225         'KY': '64.96.0.0/15',
5226         'KZ': '2.72.0.0/13',
5227         'LA': '115.84.64.0/18',
5228         'LB': '178.135.0.0/16',
5229         'LC': '24.92.144.0/20',
5230         'LI': '82.117.0.0/19',
5231         'LK': '112.134.0.0/15',
5232         'LR': '102.183.0.0/16',
5233         'LS': '129.232.0.0/17',
5234         'LT': '78.56.0.0/13',
5235         'LU': '188.42.0.0/16',
5236         'LV': '46.109.0.0/16',
5237         'LY': '41.252.0.0/14',
5238         'MA': '105.128.0.0/11',
5239         'MC': '88.209.64.0/18',
5240         'MD': '37.246.0.0/16',
5241         'ME': '178.175.0.0/17',
5242         'MF': '74.112.232.0/21',
5243         'MG': '154.126.0.0/17',
5244         'MH': '117.103.88.0/21',
5245         'MK': '77.28.0.0/15',
5246         'ML': '154.118.128.0/18',
5247         'MM': '37.111.0.0/17',
5248         'MN': '49.0.128.0/17',
5249         'MO': '60.246.0.0/16',
5250         'MP': '202.88.64.0/20',
5251         'MQ': '109.203.224.0/19',
5252         'MR': '41.188.64.0/18',
5253         'MS': '208.90.112.0/22',
5254         'MT': '46.11.0.0/16',
5255         'MU': '105.16.0.0/12',
5256         'MV': '27.114.128.0/18',
5257         'MW': '102.70.0.0/15',
5258         'MX': '187.192.0.0/11',
5259         'MY': '175.136.0.0/13',
5260         'MZ': '197.218.0.0/15',
5261         'NA': '41.182.0.0/16',
5262         'NC': '101.101.0.0/18',
5263         'NE': '197.214.0.0/18',
5264         'NF': '203.17.240.0/22',
5265         'NG': '105.112.0.0/12',
5266         'NI': '186.76.0.0/15',
5267         'NL': '145.96.0.0/11',
5268         'NO': '84.208.0.0/13',
5269         'NP': '36.252.0.0/15',
5270         'NR': '203.98.224.0/19',
5271         'NU': '49.156.48.0/22',
5272         'NZ': '49.224.0.0/14',
5273         'OM': '5.36.0.0/15',
5274         'PA': '186.72.0.0/15',
5275         'PE': '186.160.0.0/14',
5276         'PF': '123.50.64.0/18',
5277         'PG': '124.240.192.0/19',
5278         'PH': '49.144.0.0/13',
5279         'PK': '39.32.0.0/11',
5280         'PL': '83.0.0.0/11',
5281         'PM': '70.36.0.0/20',
5282         'PR': '66.50.0.0/16',
5283         'PS': '188.161.0.0/16',
5284         'PT': '85.240.0.0/13',
5285         'PW': '202.124.224.0/20',
5286         'PY': '181.120.0.0/14',
5287         'QA': '37.210.0.0/15',
5288         'RE': '102.35.0.0/16',
5289         'RO': '79.112.0.0/13',
5290         'RS': '93.86.0.0/15',
5291         'RU': '5.136.0.0/13',
5292         'RW': '41.186.0.0/16',
5293         'SA': '188.48.0.0/13',
5294         'SB': '202.1.160.0/19',
5295         'SC': '154.192.0.0/11',
5296         'SD': '102.120.0.0/13',
5297         'SE': '78.64.0.0/12',
5298         'SG': '8.128.0.0/10',
5299         'SI': '188.196.0.0/14',
5300         'SK': '78.98.0.0/15',
5301         'SL': '102.143.0.0/17',
5302         'SM': '89.186.32.0/19',
5303         'SN': '41.82.0.0/15',
5304         'SO': '154.115.192.0/18',
5305         'SR': '186.179.128.0/17',
5306         'SS': '105.235.208.0/21',
5307         'ST': '197.159.160.0/19',
5308         'SV': '168.243.0.0/16',
5309         'SX': '190.102.0.0/20',
5310         'SY': '5.0.0.0/16',
5311         'SZ': '41.84.224.0/19',
5312         'TC': '65.255.48.0/20',
5313         'TD': '154.68.128.0/19',
5314         'TG': '196.168.0.0/14',
5315         'TH': '171.96.0.0/13',
5316         'TJ': '85.9.128.0/18',
5317         'TK': '27.96.24.0/21',
5318         'TL': '180.189.160.0/20',
5319         'TM': '95.85.96.0/19',
5320         'TN': '197.0.0.0/11',
5321         'TO': '175.176.144.0/21',
5322         'TR': '78.160.0.0/11',
5323         'TT': '186.44.0.0/15',
5324         'TV': '202.2.96.0/19',
5325         'TW': '120.96.0.0/11',
5326         'TZ': '156.156.0.0/14',
5327         'UA': '37.52.0.0/14',
5328         'UG': '102.80.0.0/13',
5329         'US': '6.0.0.0/8',
5330         'UY': '167.56.0.0/13',
5331         'UZ': '84.54.64.0/18',
5332         'VA': '212.77.0.0/19',
5333         'VC': '207.191.240.0/21',
5334         'VE': '186.88.0.0/13',
5335         'VG': '66.81.192.0/20',
5336         'VI': '146.226.0.0/16',
5337         'VN': '14.160.0.0/11',
5338         'VU': '202.80.32.0/20',
5339         'WF': '117.20.32.0/21',
5340         'WS': '202.4.32.0/19',
5341         'YE': '134.35.0.0/16',
5342         'YT': '41.242.116.0/22',
5343         'ZA': '41.0.0.0/11',
5344         'ZM': '102.144.0.0/13',
5345         'ZW': '102.177.192.0/18',
5346     }
5347
5348     @classmethod
5349     def random_ipv4(cls, code_or_block):
5350         if len(code_or_block) == 2:
5351             block = cls._country_ip_map.get(code_or_block.upper())
5352             if not block:
5353                 return None
5354         else:
5355             block = code_or_block
5356         addr, preflen = block.split('/')
5357         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5358         addr_max = addr_min | (0xffffffff >> int(preflen))
5359         return compat_str(socket.inet_ntoa(
5360             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5361
5362
5363 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5364     def __init__(self, proxies=None):
5365         # Set default handlers
5366         for type in ('http', 'https'):
5367             setattr(self, '%s_open' % type,
5368                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5369                         meth(r, proxy, type))
5370         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5371
5372     def proxy_open(self, req, proxy, type):
5373         req_proxy = req.headers.get('Ytdl-request-proxy')
5374         if req_proxy is not None:
5375             proxy = req_proxy
5376             del req.headers['Ytdl-request-proxy']
5377
5378         if proxy == '__noproxy__':
5379             return None  # No Proxy
5380         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5381             req.add_header('Ytdl-socks-proxy', proxy)
5382             # youtube-dlc's http/https handlers do wrapping the socket with socks
5383             return None
5384         return compat_urllib_request.ProxyHandler.proxy_open(
5385             self, req, proxy, type)
5386
5387
5388 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5389 # released into Public Domain
5390 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5391
5392 def long_to_bytes(n, blocksize=0):
5393     """long_to_bytes(n:long, blocksize:int) : string
5394     Convert a long integer to a byte string.
5395
5396     If optional blocksize is given and greater than zero, pad the front of the
5397     byte string with binary zeros so that the length is a multiple of
5398     blocksize.
5399     """
5400     # after much testing, this algorithm was deemed to be the fastest
5401     s = b''
5402     n = int(n)
5403     while n > 0:
5404         s = compat_struct_pack('>I', n & 0xffffffff) + s
5405         n = n >> 32
5406     # strip off leading zeros
5407     for i in range(len(s)):
5408         if s[i] != b'\000'[0]:
5409             break
5410     else:
5411         # only happens when n == 0
5412         s = b'\000'
5413         i = 0
5414     s = s[i:]
5415     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5416     # de-padding being done above, but sigh...
5417     if blocksize > 0 and len(s) % blocksize:
5418         s = (blocksize - len(s) % blocksize) * b'\000' + s
5419     return s
5420
5421
5422 def bytes_to_long(s):
5423     """bytes_to_long(string) : long
5424     Convert a byte string to a long integer.
5425
5426     This is (essentially) the inverse of long_to_bytes().
5427     """
5428     acc = 0
5429     length = len(s)
5430     if length % 4:
5431         extra = (4 - length % 4)
5432         s = b'\000' * extra + s
5433         length = length + extra
5434     for i in range(0, length, 4):
5435         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5436     return acc
5437
5438
5439 def ohdave_rsa_encrypt(data, exponent, modulus):
5440     '''
5441     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5442
5443     Input:
5444         data: data to encrypt, bytes-like object
5445         exponent, modulus: parameter e and N of RSA algorithm, both integer
5446     Output: hex string of encrypted data
5447
5448     Limitation: supports one block encryption only
5449     '''
5450
5451     payload = int(binascii.hexlify(data[::-1]), 16)
5452     encrypted = pow(payload, exponent, modulus)
5453     return '%x' % encrypted
5454
5455
5456 def pkcs1pad(data, length):
5457     """
5458     Padding input data with PKCS#1 scheme
5459
5460     @param {int[]} data        input data
5461     @param {int}   length      target length
5462     @returns {int[]}           padded data
5463     """
5464     if len(data) > length - 11:
5465         raise ValueError('Input data too long for PKCS#1 padding')
5466
5467     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5468     return [0, 2] + pseudo_random + [0] + data
5469
5470
5471 def encode_base_n(num, n, table=None):
5472     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5473     if not table:
5474         table = FULL_TABLE[:n]
5475
5476     if n > len(table):
5477         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5478
5479     if num == 0:
5480         return table[0]
5481
5482     ret = ''
5483     while num:
5484         ret = table[num % n] + ret
5485         num = num // n
5486     return ret
5487
5488
5489 def decode_packed_codes(code):
5490     mobj = re.search(PACKED_CODES_RE, code)
5491     obfuscated_code, base, count, symbols = mobj.groups()
5492     base = int(base)
5493     count = int(count)
5494     symbols = symbols.split('|')
5495     symbol_table = {}
5496
5497     while count:
5498         count -= 1
5499         base_n_count = encode_base_n(count, base)
5500         symbol_table[base_n_count] = symbols[count] or base_n_count
5501
5502     return re.sub(
5503         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5504         obfuscated_code)
5505
5506
5507 def caesar(s, alphabet, shift):
5508     if shift == 0:
5509         return s
5510     l = len(alphabet)
5511     return ''.join(
5512         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5513         for c in s)
5514
5515
5516 def rot47(s):
5517     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5518
5519
5520 def parse_m3u8_attributes(attrib):
5521     info = {}
5522     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5523         if val.startswith('"'):
5524             val = val[1:-1]
5525         info[key] = val
5526     return info
5527
5528
5529 def urshift(val, n):
5530     return val >> n if val >= 0 else (val + 0x100000000) >> n
5531
5532
5533 # Based on png2str() written by @gdkchan and improved by @yokrysty
5534 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5535 def decode_png(png_data):
5536     # Reference: https://www.w3.org/TR/PNG/
5537     header = png_data[8:]
5538
5539     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5540         raise IOError('Not a valid PNG file.')
5541
5542     int_map = {1: '>B', 2: '>H', 4: '>I'}
5543     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5544
5545     chunks = []
5546
5547     while header:
5548         length = unpack_integer(header[:4])
5549         header = header[4:]
5550
5551         chunk_type = header[:4]
5552         header = header[4:]
5553
5554         chunk_data = header[:length]
5555         header = header[length:]
5556
5557         header = header[4:]  # Skip CRC
5558
5559         chunks.append({
5560             'type': chunk_type,
5561             'length': length,
5562             'data': chunk_data
5563         })
5564
5565     ihdr = chunks[0]['data']
5566
5567     width = unpack_integer(ihdr[:4])
5568     height = unpack_integer(ihdr[4:8])
5569
5570     idat = b''
5571
5572     for chunk in chunks:
5573         if chunk['type'] == b'IDAT':
5574             idat += chunk['data']
5575
5576     if not idat:
5577         raise IOError('Unable to read PNG data.')
5578
5579     decompressed_data = bytearray(zlib.decompress(idat))
5580
5581     stride = width * 3
5582     pixels = []
5583
5584     def _get_pixel(idx):
5585         x = idx % stride
5586         y = idx // stride
5587         return pixels[y][x]
5588
5589     for y in range(height):
5590         basePos = y * (1 + stride)
5591         filter_type = decompressed_data[basePos]
5592
5593         current_row = []
5594
5595         pixels.append(current_row)
5596
5597         for x in range(stride):
5598             color = decompressed_data[1 + basePos + x]
5599             basex = y * stride + x
5600             left = 0
5601             up = 0
5602
5603             if x > 2:
5604                 left = _get_pixel(basex - 3)
5605             if y > 0:
5606                 up = _get_pixel(basex - stride)
5607
5608             if filter_type == 1:  # Sub
5609                 color = (color + left) & 0xff
5610             elif filter_type == 2:  # Up
5611                 color = (color + up) & 0xff
5612             elif filter_type == 3:  # Average
5613                 color = (color + ((left + up) >> 1)) & 0xff
5614             elif filter_type == 4:  # Paeth
5615                 a = left
5616                 b = up
5617                 c = 0
5618
5619                 if x > 2 and y > 0:
5620                     c = _get_pixel(basex - stride - 3)
5621
5622                 p = a + b - c
5623
5624                 pa = abs(p - a)
5625                 pb = abs(p - b)
5626                 pc = abs(p - c)
5627
5628                 if pa <= pb and pa <= pc:
5629                     color = (color + a) & 0xff
5630                 elif pb <= pc:
5631                     color = (color + b) & 0xff
5632                 else:
5633                     color = (color + c) & 0xff
5634
5635             current_row.append(color)
5636
5637     return width, height, pixels
5638
5639
5640 def write_xattr(path, key, value):
5641     # This mess below finds the best xattr tool for the job
5642     try:
5643         # try the pyxattr module...
5644         import xattr
5645
5646         if hasattr(xattr, 'set'):  # pyxattr
5647             # Unicode arguments are not supported in python-pyxattr until
5648             # version 0.5.0
5649             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5650             pyxattr_required_version = '0.5.0'
5651             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5652                 # TODO: fallback to CLI tools
5653                 raise XAttrUnavailableError(
5654                     'python-pyxattr is detected but is too old. '
5655                     'youtube-dlc requires %s or above while your version is %s. '
5656                     'Falling back to other xattr implementations' % (
5657                         pyxattr_required_version, xattr.__version__))
5658
5659             setxattr = xattr.set
5660         else:  # xattr
5661             setxattr = xattr.setxattr
5662
5663         try:
5664             setxattr(path, key, value)
5665         except EnvironmentError as e:
5666             raise XAttrMetadataError(e.errno, e.strerror)
5667
5668     except ImportError:
5669         if compat_os_name == 'nt':
5670             # Write xattrs to NTFS Alternate Data Streams:
5671             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5672             assert ':' not in key
5673             assert os.path.exists(path)
5674
5675             ads_fn = path + ':' + key
5676             try:
5677                 with open(ads_fn, 'wb') as f:
5678                     f.write(value)
5679             except EnvironmentError as e:
5680                 raise XAttrMetadataError(e.errno, e.strerror)
5681         else:
5682             user_has_setfattr = check_executable('setfattr', ['--version'])
5683             user_has_xattr = check_executable('xattr', ['-h'])
5684
5685             if user_has_setfattr or user_has_xattr:
5686
5687                 value = value.decode('utf-8')
5688                 if user_has_setfattr:
5689                     executable = 'setfattr'
5690                     opts = ['-n', key, '-v', value]
5691                 elif user_has_xattr:
5692                     executable = 'xattr'
5693                     opts = ['-w', key, value]
5694
5695                 cmd = ([encodeFilename(executable, True)]
5696                        + [encodeArgument(o) for o in opts]
5697                        + [encodeFilename(path, True)])
5698
5699                 try:
5700                     p = subprocess.Popen(
5701                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5702                 except EnvironmentError as e:
5703                     raise XAttrMetadataError(e.errno, e.strerror)
5704                 stdout, stderr = p.communicate()
5705                 stderr = stderr.decode('utf-8', 'replace')
5706                 if p.returncode != 0:
5707                     raise XAttrMetadataError(p.returncode, stderr)
5708
5709             else:
5710                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5711                 if sys.platform.startswith('linux'):
5712                     raise XAttrUnavailableError(
5713                         "Couldn't find a tool to set the xattrs. "
5714                         "Install either the python 'pyxattr' or 'xattr' "
5715                         "modules, or the GNU 'attr' package "
5716                         "(which contains the 'setfattr' tool).")
5717                 else:
5718                     raise XAttrUnavailableError(
5719                         "Couldn't find a tool to set the xattrs. "
5720                         "Install either the python 'xattr' module, "
5721                         "or the 'xattr' binary.")
5722
5723
5724 def random_birthday(year_field, month_field, day_field):
5725     start_date = datetime.date(1950, 1, 1)
5726     end_date = datetime.date(1995, 12, 31)
5727     offset = random.randint(0, (end_date - start_date).days)
5728     random_date = start_date + datetime.timedelta(offset)
5729     return {
5730         year_field: str(random_date.year),
5731         month_field: str(random_date.month),
5732         day_field: str(random_date.day),
5733     }
5734
5735 # Templates for internet shortcut files, which are plain text files.
5736 DOT_URL_LINK_TEMPLATE = '''
5737 [InternetShortcut]
5738 URL=%(url)s
5739 '''.lstrip()
5740
5741 DOT_WEBLOC_LINK_TEMPLATE = '''
5742 <?xml version="1.0" encoding="UTF-8"?>
5743 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5744 <plist version="1.0">
5745 <dict>
5746 \t<key>URL</key>
5747 \t<string>%(url)s</string>
5748 </dict>
5749 </plist>
5750 '''.lstrip()
5751
5752 DOT_DESKTOP_LINK_TEMPLATE = '''
5753 [Desktop Entry]
5754 Encoding=UTF-8
5755 Name=%(filename)s
5756 Type=Link
5757 URL=%(url)s
5758 Icon=text-html
5759 '''.lstrip()
5760
5761
5762 def iri_to_uri(iri):
5763     """
5764     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5765
5766     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5767     """
5768
5769     iri_parts = compat_urllib_parse_urlparse(iri)
5770
5771     if '[' in iri_parts.netloc:
5772         raise ValueError('IPv6 URIs are not, yet, supported.')
5773         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5774
5775     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5776
5777     net_location = ''
5778     if iri_parts.username:
5779         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5780         if iri_parts.password is not None:
5781             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5782         net_location += '@'
5783
5784     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5785     # The 'idna' encoding produces ASCII text.
5786     if iri_parts.port is not None and iri_parts.port != 80:
5787         net_location += ':' + str(iri_parts.port)
5788
5789     return compat_urllib_parse_urlunparse(
5790         (iri_parts.scheme,
5791             net_location,
5792
5793             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5794
5795             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5796             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5797
5798             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5799             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5800
5801             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5802
5803     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5804
5805
5806 def to_high_limit_path(path):
5807     if sys.platform in ['win32', 'cygwin']:
5808         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5809         return r'\\?\ '.rstrip() + os.path.abspath(path)
5810
5811     return path
5812
5813 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5814     val = obj.get(field, default)
5815     if func and val not in ignore:
5816         val = func(val)
5817     return template % val if val not in ignore else default