youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_urlunparse,
  64     compat_urllib_parse_quote,
  65     compat_urllib_parse_quote_plus,
  66     compat_urllib_parse_unquote_plus,
  67     compat_urllib_request,
  68     compat_urlparse,
  69     compat_xpath,
  70 )
  71
  72 from .socks import (
  73     ProxyType,
  74     sockssocket,
  75 )
  76
  77
  78 def register_socks_protocols():
  79     # "Register" SOCKS protocols
  80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  83         if scheme not in compat_urlparse.uses_netloc:
  84             compat_urlparse.uses_netloc.append(scheme)
  85
  86
  87 # This is not clearly defined otherwise
  88 compiled_regex_type = type(re.compile(''))
  89
  90
  91 def random_user_agent():
  92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  93     _CHROME_VERSIONS = (
  94         '74.0.3729.129',
  95         '76.0.3780.3',
  96         '76.0.3780.2',
  97         '74.0.3729.128',
  98         '76.0.3780.1',
  99         '76.0.3780.0',
 100         '75.0.3770.15',
 101         '74.0.3729.127',
 102         '74.0.3729.126',
 103         '76.0.3779.1',
 104         '76.0.3779.0',
 105         '75.0.3770.14',
 106         '74.0.3729.125',
 107         '76.0.3778.1',
 108         '76.0.3778.0',
 109         '75.0.3770.13',
 110         '74.0.3729.124',
 111         '74.0.3729.123',
 112         '73.0.3683.121',
 113         '76.0.3777.1',
 114         '76.0.3777.0',
 115         '75.0.3770.12',
 116         '74.0.3729.122',
 117         '76.0.3776.4',
 118         '75.0.3770.11',
 119         '74.0.3729.121',
 120         '76.0.3776.3',
 121         '76.0.3776.2',
 122         '73.0.3683.120',
 123         '74.0.3729.120',
 124         '74.0.3729.119',
 125         '74.0.3729.118',
 126         '76.0.3776.1',
 127         '76.0.3776.0',
 128         '76.0.3775.5',
 129         '75.0.3770.10',
 130         '74.0.3729.117',
 131         '76.0.3775.4',
 132         '76.0.3775.3',
 133         '74.0.3729.116',
 134         '75.0.3770.9',
 135         '76.0.3775.2',
 136         '76.0.3775.1',
 137         '76.0.3775.0',
 138         '75.0.3770.8',
 139         '74.0.3729.115',
 140         '74.0.3729.114',
 141         '76.0.3774.1',
 142         '76.0.3774.0',
 143         '75.0.3770.7',
 144         '74.0.3729.113',
 145         '74.0.3729.112',
 146         '74.0.3729.111',
 147         '76.0.3773.1',
 148         '76.0.3773.0',
 149         '75.0.3770.6',
 150         '74.0.3729.110',
 151         '74.0.3729.109',
 152         '76.0.3772.1',
 153         '76.0.3772.0',
 154         '75.0.3770.5',
 155         '74.0.3729.108',
 156         '74.0.3729.107',
 157         '76.0.3771.1',
 158         '76.0.3771.0',
 159         '75.0.3770.4',
 160         '74.0.3729.106',
 161         '74.0.3729.105',
 162         '75.0.3770.3',
 163         '74.0.3729.104',
 164         '74.0.3729.103',
 165         '74.0.3729.102',
 166         '75.0.3770.2',
 167         '74.0.3729.101',
 168         '75.0.3770.1',
 169         '75.0.3770.0',
 170         '74.0.3729.100',
 171         '75.0.3769.5',
 172         '75.0.3769.4',
 173         '74.0.3729.99',
 174         '75.0.3769.3',
 175         '75.0.3769.2',
 176         '75.0.3768.6',
 177         '74.0.3729.98',
 178         '75.0.3769.1',
 179         '75.0.3769.0',
 180         '74.0.3729.97',
 181         '73.0.3683.119',
 182         '73.0.3683.118',
 183         '74.0.3729.96',
 184         '75.0.3768.5',
 185         '75.0.3768.4',
 186         '75.0.3768.3',
 187         '75.0.3768.2',
 188         '74.0.3729.95',
 189         '74.0.3729.94',
 190         '75.0.3768.1',
 191         '75.0.3768.0',
 192         '74.0.3729.93',
 193         '74.0.3729.92',
 194         '73.0.3683.117',
 195         '74.0.3729.91',
 196         '75.0.3766.3',
 197         '74.0.3729.90',
 198         '75.0.3767.2',
 199         '75.0.3767.1',
 200         '75.0.3767.0',
 201         '74.0.3729.89',
 202         '73.0.3683.116',
 203         '75.0.3766.2',
 204         '74.0.3729.88',
 205         '75.0.3766.1',
 206         '75.0.3766.0',
 207         '74.0.3729.87',
 208         '73.0.3683.115',
 209         '74.0.3729.86',
 210         '75.0.3765.1',
 211         '75.0.3765.0',
 212         '74.0.3729.85',
 213         '73.0.3683.114',
 214         '74.0.3729.84',
 215         '75.0.3764.1',
 216         '75.0.3764.0',
 217         '74.0.3729.83',
 218         '73.0.3683.113',
 219         '75.0.3763.2',
 220         '75.0.3761.4',
 221         '74.0.3729.82',
 222         '75.0.3763.1',
 223         '75.0.3763.0',
 224         '74.0.3729.81',
 225         '73.0.3683.112',
 226         '75.0.3762.1',
 227         '75.0.3762.0',
 228         '74.0.3729.80',
 229         '75.0.3761.3',
 230         '74.0.3729.79',
 231         '73.0.3683.111',
 232         '75.0.3761.2',
 233         '74.0.3729.78',
 234         '74.0.3729.77',
 235         '75.0.3761.1',
 236         '75.0.3761.0',
 237         '73.0.3683.110',
 238         '74.0.3729.76',
 239         '74.0.3729.75',
 240         '75.0.3760.0',
 241         '74.0.3729.74',
 242         '75.0.3759.8',
 243         '75.0.3759.7',
 244         '75.0.3759.6',
 245         '74.0.3729.73',
 246         '75.0.3759.5',
 247         '74.0.3729.72',
 248         '73.0.3683.109',
 249         '75.0.3759.4',
 250         '75.0.3759.3',
 251         '74.0.3729.71',
 252         '75.0.3759.2',
 253         '74.0.3729.70',
 254         '73.0.3683.108',
 255         '74.0.3729.69',
 256         '75.0.3759.1',
 257         '75.0.3759.0',
 258         '74.0.3729.68',
 259         '73.0.3683.107',
 260         '74.0.3729.67',
 261         '75.0.3758.1',
 262         '75.0.3758.0',
 263         '74.0.3729.66',
 264         '73.0.3683.106',
 265         '74.0.3729.65',
 266         '75.0.3757.1',
 267         '75.0.3757.0',
 268         '74.0.3729.64',
 269         '73.0.3683.105',
 270         '74.0.3729.63',
 271         '75.0.3756.1',
 272         '75.0.3756.0',
 273         '74.0.3729.62',
 274         '73.0.3683.104',
 275         '75.0.3755.3',
 276         '75.0.3755.2',
 277         '73.0.3683.103',
 278         '75.0.3755.1',
 279         '75.0.3755.0',
 280         '74.0.3729.61',
 281         '73.0.3683.102',
 282         '74.0.3729.60',
 283         '75.0.3754.2',
 284         '74.0.3729.59',
 285         '75.0.3753.4',
 286         '74.0.3729.58',
 287         '75.0.3754.1',
 288         '75.0.3754.0',
 289         '74.0.3729.57',
 290         '73.0.3683.101',
 291         '75.0.3753.3',
 292         '75.0.3752.2',
 293         '75.0.3753.2',
 294         '74.0.3729.56',
 295         '75.0.3753.1',
 296         '75.0.3753.0',
 297         '74.0.3729.55',
 298         '73.0.3683.100',
 299         '74.0.3729.54',
 300         '75.0.3752.1',
 301         '75.0.3752.0',
 302         '74.0.3729.53',
 303         '73.0.3683.99',
 304         '74.0.3729.52',
 305         '75.0.3751.1',
 306         '75.0.3751.0',
 307         '74.0.3729.51',
 308         '73.0.3683.98',
 309         '74.0.3729.50',
 310         '75.0.3750.0',
 311         '74.0.3729.49',
 312         '74.0.3729.48',
 313         '74.0.3729.47',
 314         '75.0.3749.3',
 315         '74.0.3729.46',
 316         '73.0.3683.97',
 317         '75.0.3749.2',
 318         '74.0.3729.45',
 319         '75.0.3749.1',
 320         '75.0.3749.0',
 321         '74.0.3729.44',
 322         '73.0.3683.96',
 323         '74.0.3729.43',
 324         '74.0.3729.42',
 325         '75.0.3748.1',
 326         '75.0.3748.0',
 327         '74.0.3729.41',
 328         '75.0.3747.1',
 329         '73.0.3683.95',
 330         '75.0.3746.4',
 331         '74.0.3729.40',
 332         '74.0.3729.39',
 333         '75.0.3747.0',
 334         '75.0.3746.3',
 335         '75.0.3746.2',
 336         '74.0.3729.38',
 337         '75.0.3746.1',
 338         '75.0.3746.0',
 339         '74.0.3729.37',
 340         '73.0.3683.94',
 341         '75.0.3745.5',
 342         '75.0.3745.4',
 343         '75.0.3745.3',
 344         '75.0.3745.2',
 345         '74.0.3729.36',
 346         '75.0.3745.1',
 347         '75.0.3745.0',
 348         '75.0.3744.2',
 349         '74.0.3729.35',
 350         '73.0.3683.93',
 351         '74.0.3729.34',
 352         '75.0.3744.1',
 353         '75.0.3744.0',
 354         '74.0.3729.33',
 355         '73.0.3683.92',
 356         '74.0.3729.32',
 357         '74.0.3729.31',
 358         '73.0.3683.91',
 359         '75.0.3741.2',
 360         '75.0.3740.5',
 361         '74.0.3729.30',
 362         '75.0.3741.1',
 363         '75.0.3741.0',
 364         '74.0.3729.29',
 365         '75.0.3740.4',
 366         '73.0.3683.90',
 367         '74.0.3729.28',
 368         '75.0.3740.3',
 369         '73.0.3683.89',
 370         '75.0.3740.2',
 371         '74.0.3729.27',
 372         '75.0.3740.1',
 373         '75.0.3740.0',
 374         '74.0.3729.26',
 375         '73.0.3683.88',
 376         '73.0.3683.87',
 377         '74.0.3729.25',
 378         '75.0.3739.1',
 379         '75.0.3739.0',
 380         '73.0.3683.86',
 381         '74.0.3729.24',
 382         '73.0.3683.85',
 383         '75.0.3738.4',
 384         '75.0.3738.3',
 385         '75.0.3738.2',
 386         '75.0.3738.1',
 387         '75.0.3738.0',
 388         '74.0.3729.23',
 389         '73.0.3683.84',
 390         '74.0.3729.22',
 391         '74.0.3729.21',
 392         '75.0.3737.1',
 393         '75.0.3737.0',
 394         '74.0.3729.20',
 395         '73.0.3683.83',
 396         '74.0.3729.19',
 397         '75.0.3736.1',
 398         '75.0.3736.0',
 399         '74.0.3729.18',
 400         '73.0.3683.82',
 401         '74.0.3729.17',
 402         '75.0.3735.1',
 403         '75.0.3735.0',
 404         '74.0.3729.16',
 405         '73.0.3683.81',
 406         '75.0.3734.1',
 407         '75.0.3734.0',
 408         '74.0.3729.15',
 409         '73.0.3683.80',
 410         '74.0.3729.14',
 411         '75.0.3733.1',
 412         '75.0.3733.0',
 413         '75.0.3732.1',
 414         '74.0.3729.13',
 415         '74.0.3729.12',
 416         '73.0.3683.79',
 417         '74.0.3729.11',
 418         '75.0.3732.0',
 419         '74.0.3729.10',
 420         '73.0.3683.78',
 421         '74.0.3729.9',
 422         '74.0.3729.8',
 423         '74.0.3729.7',
 424         '75.0.3731.3',
 425         '75.0.3731.2',
 426         '75.0.3731.0',
 427         '74.0.3729.6',
 428         '73.0.3683.77',
 429         '73.0.3683.76',
 430         '75.0.3730.5',
 431         '75.0.3730.4',
 432         '73.0.3683.75',
 433         '74.0.3729.5',
 434         '73.0.3683.74',
 435         '75.0.3730.3',
 436         '75.0.3730.2',
 437         '74.0.3729.4',
 438         '73.0.3683.73',
 439         '73.0.3683.72',
 440         '75.0.3730.1',
 441         '75.0.3730.0',
 442         '74.0.3729.3',
 443         '73.0.3683.71',
 444         '74.0.3729.2',
 445         '73.0.3683.70',
 446         '74.0.3729.1',
 447         '74.0.3729.0',
 448         '74.0.3726.4',
 449         '73.0.3683.69',
 450         '74.0.3726.3',
 451         '74.0.3728.0',
 452         '74.0.3726.2',
 453         '73.0.3683.68',
 454         '74.0.3726.1',
 455         '74.0.3726.0',
 456         '74.0.3725.4',
 457         '73.0.3683.67',
 458         '73.0.3683.66',
 459         '74.0.3725.3',
 460         '74.0.3725.2',
 461         '74.0.3725.1',
 462         '74.0.3724.8',
 463         '74.0.3725.0',
 464         '73.0.3683.65',
 465         '74.0.3724.7',
 466         '74.0.3724.6',
 467         '74.0.3724.5',
 468         '74.0.3724.4',
 469         '74.0.3724.3',
 470         '74.0.3724.2',
 471         '74.0.3724.1',
 472         '74.0.3724.0',
 473         '73.0.3683.64',
 474         '74.0.3723.1',
 475         '74.0.3723.0',
 476         '73.0.3683.63',
 477         '74.0.3722.1',
 478         '74.0.3722.0',
 479         '73.0.3683.62',
 480         '74.0.3718.9',
 481         '74.0.3702.3',
 482         '74.0.3721.3',
 483         '74.0.3721.2',
 484         '74.0.3721.1',
 485         '74.0.3721.0',
 486         '74.0.3720.6',
 487         '73.0.3683.61',
 488         '72.0.3626.122',
 489         '73.0.3683.60',
 490         '74.0.3720.5',
 491         '72.0.3626.121',
 492         '74.0.3718.8',
 493         '74.0.3720.4',
 494         '74.0.3720.3',
 495         '74.0.3718.7',
 496         '74.0.3720.2',
 497         '74.0.3720.1',
 498         '74.0.3720.0',
 499         '74.0.3718.6',
 500         '74.0.3719.5',
 501         '73.0.3683.59',
 502         '74.0.3718.5',
 503         '74.0.3718.4',
 504         '74.0.3719.4',
 505         '74.0.3719.3',
 506         '74.0.3719.2',
 507         '74.0.3719.1',
 508         '73.0.3683.58',
 509         '74.0.3719.0',
 510         '73.0.3683.57',
 511         '73.0.3683.56',
 512         '74.0.3718.3',
 513         '73.0.3683.55',
 514         '74.0.3718.2',
 515         '74.0.3718.1',
 516         '74.0.3718.0',
 517         '73.0.3683.54',
 518         '74.0.3717.2',
 519         '73.0.3683.53',
 520         '74.0.3717.1',
 521         '74.0.3717.0',
 522         '73.0.3683.52',
 523         '74.0.3716.1',
 524         '74.0.3716.0',
 525         '73.0.3683.51',
 526         '74.0.3715.1',
 527         '74.0.3715.0',
 528         '73.0.3683.50',
 529         '74.0.3711.2',
 530         '74.0.3714.2',
 531         '74.0.3713.3',
 532         '74.0.3714.1',
 533         '74.0.3714.0',
 534         '73.0.3683.49',
 535         '74.0.3713.1',
 536         '74.0.3713.0',
 537         '72.0.3626.120',
 538         '73.0.3683.48',
 539         '74.0.3712.2',
 540         '74.0.3712.1',
 541         '74.0.3712.0',
 542         '73.0.3683.47',
 543         '72.0.3626.119',
 544         '73.0.3683.46',
 545         '74.0.3710.2',
 546         '72.0.3626.118',
 547         '74.0.3711.1',
 548         '74.0.3711.0',
 549         '73.0.3683.45',
 550         '72.0.3626.117',
 551         '74.0.3710.1',
 552         '74.0.3710.0',
 553         '73.0.3683.44',
 554         '72.0.3626.116',
 555         '74.0.3709.1',
 556         '74.0.3709.0',
 557         '74.0.3704.9',
 558         '73.0.3683.43',
 559         '72.0.3626.115',
 560         '74.0.3704.8',
 561         '74.0.3704.7',
 562         '74.0.3708.0',
 563         '74.0.3706.7',
 564         '74.0.3704.6',
 565         '73.0.3683.42',
 566         '72.0.3626.114',
 567         '74.0.3706.6',
 568         '72.0.3626.113',
 569         '74.0.3704.5',
 570         '74.0.3706.5',
 571         '74.0.3706.4',
 572         '74.0.3706.3',
 573         '74.0.3706.2',
 574         '74.0.3706.1',
 575         '74.0.3706.0',
 576         '73.0.3683.41',
 577         '72.0.3626.112',
 578         '74.0.3705.1',
 579         '74.0.3705.0',
 580         '73.0.3683.40',
 581         '72.0.3626.111',
 582         '73.0.3683.39',
 583         '74.0.3704.4',
 584         '73.0.3683.38',
 585         '74.0.3704.3',
 586         '74.0.3704.2',
 587         '74.0.3704.1',
 588         '74.0.3704.0',
 589         '73.0.3683.37',
 590         '72.0.3626.110',
 591         '72.0.3626.109',
 592         '74.0.3703.3',
 593         '74.0.3703.2',
 594         '73.0.3683.36',
 595         '74.0.3703.1',
 596         '74.0.3703.0',
 597         '73.0.3683.35',
 598         '72.0.3626.108',
 599         '74.0.3702.2',
 600         '74.0.3699.3',
 601         '74.0.3702.1',
 602         '74.0.3702.0',
 603         '73.0.3683.34',
 604         '72.0.3626.107',
 605         '73.0.3683.33',
 606         '74.0.3701.1',
 607         '74.0.3701.0',
 608         '73.0.3683.32',
 609         '73.0.3683.31',
 610         '72.0.3626.105',
 611         '74.0.3700.1',
 612         '74.0.3700.0',
 613         '73.0.3683.29',
 614         '72.0.3626.103',
 615         '74.0.3699.2',
 616         '74.0.3699.1',
 617         '74.0.3699.0',
 618         '73.0.3683.28',
 619         '72.0.3626.102',
 620         '73.0.3683.27',
 621         '73.0.3683.26',
 622         '74.0.3698.0',
 623         '74.0.3696.2',
 624         '72.0.3626.101',
 625         '73.0.3683.25',
 626         '74.0.3696.1',
 627         '74.0.3696.0',
 628         '74.0.3694.8',
 629         '72.0.3626.100',
 630         '74.0.3694.7',
 631         '74.0.3694.6',
 632         '74.0.3694.5',
 633         '74.0.3694.4',
 634         '72.0.3626.99',
 635         '72.0.3626.98',
 636         '74.0.3694.3',
 637         '73.0.3683.24',
 638         '72.0.3626.97',
 639         '72.0.3626.96',
 640         '72.0.3626.95',
 641         '73.0.3683.23',
 642         '72.0.3626.94',
 643         '73.0.3683.22',
 644         '73.0.3683.21',
 645         '72.0.3626.93',
 646         '74.0.3694.2',
 647         '72.0.3626.92',
 648         '74.0.3694.1',
 649         '74.0.3694.0',
 650         '74.0.3693.6',
 651         '73.0.3683.20',
 652         '72.0.3626.91',
 653         '74.0.3693.5',
 654         '74.0.3693.4',
 655         '74.0.3693.3',
 656         '74.0.3693.2',
 657         '73.0.3683.19',
 658         '74.0.3693.1',
 659         '74.0.3693.0',
 660         '73.0.3683.18',
 661         '72.0.3626.90',
 662         '74.0.3692.1',
 663         '74.0.3692.0',
 664         '73.0.3683.17',
 665         '72.0.3626.89',
 666         '74.0.3687.3',
 667         '74.0.3691.1',
 668         '74.0.3691.0',
 669         '73.0.3683.16',
 670         '72.0.3626.88',
 671         '72.0.3626.87',
 672         '73.0.3683.15',
 673         '74.0.3690.1',
 674         '74.0.3690.0',
 675         '73.0.3683.14',
 676         '72.0.3626.86',
 677         '73.0.3683.13',
 678         '73.0.3683.12',
 679         '74.0.3689.1',
 680         '74.0.3689.0',
 681         '73.0.3683.11',
 682         '72.0.3626.85',
 683         '73.0.3683.10',
 684         '72.0.3626.84',
 685         '73.0.3683.9',
 686         '74.0.3688.1',
 687         '74.0.3688.0',
 688         '73.0.3683.8',
 689         '72.0.3626.83',
 690         '74.0.3687.2',
 691         '74.0.3687.1',
 692         '74.0.3687.0',
 693         '73.0.3683.7',
 694         '72.0.3626.82',
 695         '74.0.3686.4',
 696         '72.0.3626.81',
 697         '74.0.3686.3',
 698         '74.0.3686.2',
 699         '74.0.3686.1',
 700         '74.0.3686.0',
 701         '73.0.3683.6',
 702         '72.0.3626.80',
 703         '74.0.3685.1',
 704         '74.0.3685.0',
 705         '73.0.3683.5',
 706         '72.0.3626.79',
 707         '74.0.3684.1',
 708         '74.0.3684.0',
 709         '73.0.3683.4',
 710         '72.0.3626.78',
 711         '72.0.3626.77',
 712         '73.0.3683.3',
 713         '73.0.3683.2',
 714         '72.0.3626.76',
 715         '73.0.3683.1',
 716         '73.0.3683.0',
 717         '72.0.3626.75',
 718         '71.0.3578.141',
 719         '73.0.3682.1',
 720         '73.0.3682.0',
 721         '72.0.3626.74',
 722         '71.0.3578.140',
 723         '73.0.3681.4',
 724         '73.0.3681.3',
 725         '73.0.3681.2',
 726         '73.0.3681.1',
 727         '73.0.3681.0',
 728         '72.0.3626.73',
 729         '71.0.3578.139',
 730         '72.0.3626.72',
 731         '72.0.3626.71',
 732         '73.0.3680.1',
 733         '73.0.3680.0',
 734         '72.0.3626.70',
 735         '71.0.3578.138',
 736         '73.0.3678.2',
 737         '73.0.3679.1',
 738         '73.0.3679.0',
 739         '72.0.3626.69',
 740         '71.0.3578.137',
 741         '73.0.3678.1',
 742         '73.0.3678.0',
 743         '71.0.3578.136',
 744         '73.0.3677.1',
 745         '73.0.3677.0',
 746         '72.0.3626.68',
 747         '72.0.3626.67',
 748         '71.0.3578.135',
 749         '73.0.3676.1',
 750         '73.0.3676.0',
 751         '73.0.3674.2',
 752         '72.0.3626.66',
 753         '71.0.3578.134',
 754         '73.0.3674.1',
 755         '73.0.3674.0',
 756         '72.0.3626.65',
 757         '71.0.3578.133',
 758         '73.0.3673.2',
 759         '73.0.3673.1',
 760         '73.0.3673.0',
 761         '72.0.3626.64',
 762         '71.0.3578.132',
 763         '72.0.3626.63',
 764         '72.0.3626.62',
 765         '72.0.3626.61',
 766         '72.0.3626.60',
 767         '73.0.3672.1',
 768         '73.0.3672.0',
 769         '72.0.3626.59',
 770         '71.0.3578.131',
 771         '73.0.3671.3',
 772         '73.0.3671.2',
 773         '73.0.3671.1',
 774         '73.0.3671.0',
 775         '72.0.3626.58',
 776         '71.0.3578.130',
 777         '73.0.3670.1',
 778         '73.0.3670.0',
 779         '72.0.3626.57',
 780         '71.0.3578.129',
 781         '73.0.3669.1',
 782         '73.0.3669.0',
 783         '72.0.3626.56',
 784         '71.0.3578.128',
 785         '73.0.3668.2',
 786         '73.0.3668.1',
 787         '73.0.3668.0',
 788         '72.0.3626.55',
 789         '71.0.3578.127',
 790         '73.0.3667.2',
 791         '73.0.3667.1',
 792         '73.0.3667.0',
 793         '72.0.3626.54',
 794         '71.0.3578.126',
 795         '73.0.3666.1',
 796         '73.0.3666.0',
 797         '72.0.3626.53',
 798         '71.0.3578.125',
 799         '73.0.3665.4',
 800         '73.0.3665.3',
 801         '72.0.3626.52',
 802         '73.0.3665.2',
 803         '73.0.3664.4',
 804         '73.0.3665.1',
 805         '73.0.3665.0',
 806         '72.0.3626.51',
 807         '71.0.3578.124',
 808         '72.0.3626.50',
 809         '73.0.3664.3',
 810         '73.0.3664.2',
 811         '73.0.3664.1',
 812         '73.0.3664.0',
 813         '73.0.3663.2',
 814         '72.0.3626.49',
 815         '71.0.3578.123',
 816         '73.0.3663.1',
 817         '73.0.3663.0',
 818         '72.0.3626.48',
 819         '71.0.3578.122',
 820         '73.0.3662.1',
 821         '73.0.3662.0',
 822         '72.0.3626.47',
 823         '71.0.3578.121',
 824         '73.0.3661.1',
 825         '72.0.3626.46',
 826         '73.0.3661.0',
 827         '72.0.3626.45',
 828         '71.0.3578.120',
 829         '73.0.3660.2',
 830         '73.0.3660.1',
 831         '73.0.3660.0',
 832         '72.0.3626.44',
 833         '71.0.3578.119',
 834         '73.0.3659.1',
 835         '73.0.3659.0',
 836         '72.0.3626.43',
 837         '71.0.3578.118',
 838         '73.0.3658.1',
 839         '73.0.3658.0',
 840         '72.0.3626.42',
 841         '71.0.3578.117',
 842         '73.0.3657.1',
 843         '73.0.3657.0',
 844         '72.0.3626.41',
 845         '71.0.3578.116',
 846         '73.0.3656.1',
 847         '73.0.3656.0',
 848         '72.0.3626.40',
 849         '71.0.3578.115',
 850         '73.0.3655.1',
 851         '73.0.3655.0',
 852         '72.0.3626.39',
 853         '71.0.3578.114',
 854         '73.0.3654.1',
 855         '73.0.3654.0',
 856         '72.0.3626.38',
 857         '71.0.3578.113',
 858         '73.0.3653.1',
 859         '73.0.3653.0',
 860         '72.0.3626.37',
 861         '71.0.3578.112',
 862         '73.0.3652.1',
 863         '73.0.3652.0',
 864         '72.0.3626.36',
 865         '71.0.3578.111',
 866         '73.0.3651.1',
 867         '73.0.3651.0',
 868         '72.0.3626.35',
 869         '71.0.3578.110',
 870         '73.0.3650.1',
 871         '73.0.3650.0',
 872         '72.0.3626.34',
 873         '71.0.3578.109',
 874         '73.0.3649.1',
 875         '73.0.3649.0',
 876         '72.0.3626.33',
 877         '71.0.3578.108',
 878         '73.0.3648.2',
 879         '73.0.3648.1',
 880         '73.0.3648.0',
 881         '72.0.3626.32',
 882         '71.0.3578.107',
 883         '73.0.3647.2',
 884         '73.0.3647.1',
 885         '73.0.3647.0',
 886         '72.0.3626.31',
 887         '71.0.3578.106',
 888         '73.0.3635.3',
 889         '73.0.3646.2',
 890         '73.0.3646.1',
 891         '73.0.3646.0',
 892         '72.0.3626.30',
 893         '71.0.3578.105',
 894         '72.0.3626.29',
 895         '73.0.3645.2',
 896         '73.0.3645.1',
 897         '73.0.3645.0',
 898         '72.0.3626.28',
 899         '71.0.3578.104',
 900         '72.0.3626.27',
 901         '72.0.3626.26',
 902         '72.0.3626.25',
 903         '72.0.3626.24',
 904         '73.0.3644.0',
 905         '73.0.3643.2',
 906         '72.0.3626.23',
 907         '71.0.3578.103',
 908         '73.0.3643.1',
 909         '73.0.3643.0',
 910         '72.0.3626.22',
 911         '71.0.3578.102',
 912         '73.0.3642.1',
 913         '73.0.3642.0',
 914         '72.0.3626.21',
 915         '71.0.3578.101',
 916         '73.0.3641.1',
 917         '73.0.3641.0',
 918         '72.0.3626.20',
 919         '71.0.3578.100',
 920         '72.0.3626.19',
 921         '73.0.3640.1',
 922         '73.0.3640.0',
 923         '72.0.3626.18',
 924         '73.0.3639.1',
 925         '71.0.3578.99',
 926         '73.0.3639.0',
 927         '72.0.3626.17',
 928         '73.0.3638.2',
 929         '72.0.3626.16',
 930         '73.0.3638.1',
 931         '73.0.3638.0',
 932         '72.0.3626.15',
 933         '71.0.3578.98',
 934         '73.0.3635.2',
 935         '71.0.3578.97',
 936         '73.0.3637.1',
 937         '73.0.3637.0',
 938         '72.0.3626.14',
 939         '71.0.3578.96',
 940         '71.0.3578.95',
 941         '72.0.3626.13',
 942         '71.0.3578.94',
 943         '73.0.3636.2',
 944         '71.0.3578.93',
 945         '73.0.3636.1',
 946         '73.0.3636.0',
 947         '72.0.3626.12',
 948         '71.0.3578.92',
 949         '73.0.3635.1',
 950         '73.0.3635.0',
 951         '72.0.3626.11',
 952         '71.0.3578.91',
 953         '73.0.3634.2',
 954         '73.0.3634.1',
 955         '73.0.3634.0',
 956         '72.0.3626.10',
 957         '71.0.3578.90',
 958         '71.0.3578.89',
 959         '73.0.3633.2',
 960         '73.0.3633.1',
 961         '73.0.3633.0',
 962         '72.0.3610.4',
 963         '72.0.3626.9',
 964         '71.0.3578.88',
 965         '73.0.3632.5',
 966         '73.0.3632.4',
 967         '73.0.3632.3',
 968         '73.0.3632.2',
 969         '73.0.3632.1',
 970         '73.0.3632.0',
 971         '72.0.3626.8',
 972         '71.0.3578.87',
 973         '73.0.3631.2',
 974         '73.0.3631.1',
 975         '73.0.3631.0',
 976         '72.0.3626.7',
 977         '71.0.3578.86',
 978         '72.0.3626.6',
 979         '73.0.3630.1',
 980         '73.0.3630.0',
 981         '72.0.3626.5',
 982         '71.0.3578.85',
 983         '72.0.3626.4',
 984         '73.0.3628.3',
 985         '73.0.3628.2',
 986         '73.0.3629.1',
 987         '73.0.3629.0',
 988         '72.0.3626.3',
 989         '71.0.3578.84',
 990         '73.0.3628.1',
 991         '73.0.3628.0',
 992         '71.0.3578.83',
 993         '73.0.3627.1',
 994         '73.0.3627.0',
 995         '72.0.3626.2',
 996         '71.0.3578.82',
 997         '71.0.3578.81',
 998         '71.0.3578.80',
 999         '72.0.3626.1',
1000         '72.0.3626.0',
1001         '71.0.3578.79',
1002         '70.0.3538.124',
1003         '71.0.3578.78',
1004         '72.0.3623.4',
1005         '72.0.3625.2',
1006         '72.0.3625.1',
1007         '72.0.3625.0',
1008         '71.0.3578.77',
1009         '70.0.3538.123',
1010         '72.0.3624.4',
1011         '72.0.3624.3',
1012         '72.0.3624.2',
1013         '71.0.3578.76',
1014         '72.0.3624.1',
1015         '72.0.3624.0',
1016         '72.0.3623.3',
1017         '71.0.3578.75',
1018         '70.0.3538.122',
1019         '71.0.3578.74',
1020         '72.0.3623.2',
1021         '72.0.3610.3',
1022         '72.0.3623.1',
1023         '72.0.3623.0',
1024         '72.0.3622.3',
1025         '72.0.3622.2',
1026         '71.0.3578.73',
1027         '70.0.3538.121',
1028         '72.0.3622.1',
1029         '72.0.3622.0',
1030         '71.0.3578.72',
1031         '70.0.3538.120',
1032         '72.0.3621.1',
1033         '72.0.3621.0',
1034         '71.0.3578.71',
1035         '70.0.3538.119',
1036         '72.0.3620.1',
1037         '72.0.3620.0',
1038         '71.0.3578.70',
1039         '70.0.3538.118',
1040         '71.0.3578.69',
1041         '72.0.3619.1',
1042         '72.0.3619.0',
1043         '71.0.3578.68',
1044         '70.0.3538.117',
1045         '71.0.3578.67',
1046         '72.0.3618.1',
1047         '72.0.3618.0',
1048         '71.0.3578.66',
1049         '70.0.3538.116',
1050         '72.0.3617.1',
1051         '72.0.3617.0',
1052         '71.0.3578.65',
1053         '70.0.3538.115',
1054         '72.0.3602.3',
1055         '71.0.3578.64',
1056         '72.0.3616.1',
1057         '72.0.3616.0',
1058         '71.0.3578.63',
1059         '70.0.3538.114',
1060         '71.0.3578.62',
1061         '72.0.3615.1',
1062         '72.0.3615.0',
1063         '71.0.3578.61',
1064         '70.0.3538.113',
1065         '72.0.3614.1',
1066         '72.0.3614.0',
1067         '71.0.3578.60',
1068         '70.0.3538.112',
1069         '72.0.3613.1',
1070         '72.0.3613.0',
1071         '71.0.3578.59',
1072         '70.0.3538.111',
1073         '72.0.3612.2',
1074         '72.0.3612.1',
1075         '72.0.3612.0',
1076         '70.0.3538.110',
1077         '71.0.3578.58',
1078         '70.0.3538.109',
1079         '72.0.3611.2',
1080         '72.0.3611.1',
1081         '72.0.3611.0',
1082         '71.0.3578.57',
1083         '70.0.3538.108',
1084         '72.0.3610.2',
1085         '71.0.3578.56',
1086         '71.0.3578.55',
1087         '72.0.3610.1',
1088         '72.0.3610.0',
1089         '71.0.3578.54',
1090         '70.0.3538.107',
1091         '71.0.3578.53',
1092         '72.0.3609.3',
1093         '71.0.3578.52',
1094         '72.0.3609.2',
1095         '71.0.3578.51',
1096         '72.0.3608.5',
1097         '72.0.3609.1',
1098         '72.0.3609.0',
1099         '71.0.3578.50',
1100         '70.0.3538.106',
1101         '72.0.3608.4',
1102         '72.0.3608.3',
1103         '72.0.3608.2',
1104         '71.0.3578.49',
1105         '72.0.3608.1',
1106         '72.0.3608.0',
1107         '70.0.3538.105',
1108         '71.0.3578.48',
1109         '72.0.3607.1',
1110         '72.0.3607.0',
1111         '71.0.3578.47',
1112         '70.0.3538.104',
1113         '72.0.3606.2',
1114         '72.0.3606.1',
1115         '72.0.3606.0',
1116         '71.0.3578.46',
1117         '70.0.3538.103',
1118         '70.0.3538.102',
1119         '72.0.3605.3',
1120         '72.0.3605.2',
1121         '72.0.3605.1',
1122         '72.0.3605.0',
1123         '71.0.3578.45',
1124         '70.0.3538.101',
1125         '71.0.3578.44',
1126         '71.0.3578.43',
1127         '70.0.3538.100',
1128         '70.0.3538.99',
1129         '71.0.3578.42',
1130         '72.0.3604.1',
1131         '72.0.3604.0',
1132         '71.0.3578.41',
1133         '70.0.3538.98',
1134         '71.0.3578.40',
1135         '72.0.3603.2',
1136         '72.0.3603.1',
1137         '72.0.3603.0',
1138         '71.0.3578.39',
1139         '70.0.3538.97',
1140         '72.0.3602.2',
1141         '71.0.3578.38',
1142         '71.0.3578.37',
1143         '72.0.3602.1',
1144         '72.0.3602.0',
1145         '71.0.3578.36',
1146         '70.0.3538.96',
1147         '72.0.3601.1',
1148         '72.0.3601.0',
1149         '71.0.3578.35',
1150         '70.0.3538.95',
1151         '72.0.3600.1',
1152         '72.0.3600.0',
1153         '71.0.3578.34',
1154         '70.0.3538.94',
1155         '72.0.3599.3',
1156         '72.0.3599.2',
1157         '72.0.3599.1',
1158         '72.0.3599.0',
1159         '71.0.3578.33',
1160         '70.0.3538.93',
1161         '72.0.3598.1',
1162         '72.0.3598.0',
1163         '71.0.3578.32',
1164         '70.0.3538.87',
1165         '72.0.3597.1',
1166         '72.0.3597.0',
1167         '72.0.3596.2',
1168         '71.0.3578.31',
1169         '70.0.3538.86',
1170         '71.0.3578.30',
1171         '71.0.3578.29',
1172         '72.0.3596.1',
1173         '72.0.3596.0',
1174         '71.0.3578.28',
1175         '70.0.3538.85',
1176         '72.0.3595.2',
1177         '72.0.3591.3',
1178         '72.0.3595.1',
1179         '72.0.3595.0',
1180         '71.0.3578.27',
1181         '70.0.3538.84',
1182         '72.0.3594.1',
1183         '72.0.3594.0',
1184         '71.0.3578.26',
1185         '70.0.3538.83',
1186         '72.0.3593.2',
1187         '72.0.3593.1',
1188         '72.0.3593.0',
1189         '71.0.3578.25',
1190         '70.0.3538.82',
1191         '72.0.3589.3',
1192         '72.0.3592.2',
1193         '72.0.3592.1',
1194         '72.0.3592.0',
1195         '71.0.3578.24',
1196         '72.0.3589.2',
1197         '70.0.3538.81',
1198         '70.0.3538.80',
1199         '72.0.3591.2',
1200         '72.0.3591.1',
1201         '72.0.3591.0',
1202         '71.0.3578.23',
1203         '70.0.3538.79',
1204         '71.0.3578.22',
1205         '72.0.3590.1',
1206         '72.0.3590.0',
1207         '71.0.3578.21',
1208         '70.0.3538.78',
1209         '70.0.3538.77',
1210         '72.0.3589.1',
1211         '72.0.3589.0',
1212         '71.0.3578.20',
1213         '70.0.3538.76',
1214         '71.0.3578.19',
1215         '70.0.3538.75',
1216         '72.0.3588.1',
1217         '72.0.3588.0',
1218         '71.0.3578.18',
1219         '70.0.3538.74',
1220         '72.0.3586.2',
1221         '72.0.3587.0',
1222         '71.0.3578.17',
1223         '70.0.3538.73',
1224         '72.0.3586.1',
1225         '72.0.3586.0',
1226         '71.0.3578.16',
1227         '70.0.3538.72',
1228         '72.0.3585.1',
1229         '72.0.3585.0',
1230         '71.0.3578.15',
1231         '70.0.3538.71',
1232         '71.0.3578.14',
1233         '72.0.3584.1',
1234         '72.0.3584.0',
1235         '71.0.3578.13',
1236         '70.0.3538.70',
1237         '72.0.3583.2',
1238         '71.0.3578.12',
1239         '72.0.3583.1',
1240         '72.0.3583.0',
1241         '71.0.3578.11',
1242         '70.0.3538.69',
1243         '71.0.3578.10',
1244         '72.0.3582.0',
1245         '72.0.3581.4',
1246         '71.0.3578.9',
1247         '70.0.3538.67',
1248         '72.0.3581.3',
1249         '72.0.3581.2',
1250         '72.0.3581.1',
1251         '72.0.3581.0',
1252         '71.0.3578.8',
1253         '70.0.3538.66',
1254         '72.0.3580.1',
1255         '72.0.3580.0',
1256         '71.0.3578.7',
1257         '70.0.3538.65',
1258         '71.0.3578.6',
1259         '72.0.3579.1',
1260         '72.0.3579.0',
1261         '71.0.3578.5',
1262         '70.0.3538.64',
1263         '71.0.3578.4',
1264         '71.0.3578.3',
1265         '71.0.3578.2',
1266         '71.0.3578.1',
1267         '71.0.3578.0',
1268         '70.0.3538.63',
1269         '69.0.3497.128',
1270         '70.0.3538.62',
1271         '70.0.3538.61',
1272         '70.0.3538.60',
1273         '70.0.3538.59',
1274         '71.0.3577.1',
1275         '71.0.3577.0',
1276         '70.0.3538.58',
1277         '69.0.3497.127',
1278         '71.0.3576.2',
1279         '71.0.3576.1',
1280         '71.0.3576.0',
1281         '70.0.3538.57',
1282         '70.0.3538.56',
1283         '71.0.3575.2',
1284         '70.0.3538.55',
1285         '69.0.3497.126',
1286         '70.0.3538.54',
1287         '71.0.3575.1',
1288         '71.0.3575.0',
1289         '71.0.3574.1',
1290         '71.0.3574.0',
1291         '70.0.3538.53',
1292         '69.0.3497.125',
1293         '70.0.3538.52',
1294         '71.0.3573.1',
1295         '71.0.3573.0',
1296         '70.0.3538.51',
1297         '69.0.3497.124',
1298         '71.0.3572.1',
1299         '71.0.3572.0',
1300         '70.0.3538.50',
1301         '69.0.3497.123',
1302         '71.0.3571.2',
1303         '70.0.3538.49',
1304         '69.0.3497.122',
1305         '71.0.3571.1',
1306         '71.0.3571.0',
1307         '70.0.3538.48',
1308         '69.0.3497.121',
1309         '71.0.3570.1',
1310         '71.0.3570.0',
1311         '70.0.3538.47',
1312         '69.0.3497.120',
1313         '71.0.3568.2',
1314         '71.0.3569.1',
1315         '71.0.3569.0',
1316         '70.0.3538.46',
1317         '69.0.3497.119',
1318         '70.0.3538.45',
1319         '71.0.3568.1',
1320         '71.0.3568.0',
1321         '70.0.3538.44',
1322         '69.0.3497.118',
1323         '70.0.3538.43',
1324         '70.0.3538.42',
1325         '71.0.3567.1',
1326         '71.0.3567.0',
1327         '70.0.3538.41',
1328         '69.0.3497.117',
1329         '71.0.3566.1',
1330         '71.0.3566.0',
1331         '70.0.3538.40',
1332         '69.0.3497.116',
1333         '71.0.3565.1',
1334         '71.0.3565.0',
1335         '70.0.3538.39',
1336         '69.0.3497.115',
1337         '71.0.3564.1',
1338         '71.0.3564.0',
1339         '70.0.3538.38',
1340         '69.0.3497.114',
1341         '71.0.3563.0',
1342         '71.0.3562.2',
1343         '70.0.3538.37',
1344         '69.0.3497.113',
1345         '70.0.3538.36',
1346         '70.0.3538.35',
1347         '71.0.3562.1',
1348         '71.0.3562.0',
1349         '70.0.3538.34',
1350         '69.0.3497.112',
1351         '70.0.3538.33',
1352         '71.0.3561.1',
1353         '71.0.3561.0',
1354         '70.0.3538.32',
1355         '69.0.3497.111',
1356         '71.0.3559.6',
1357         '71.0.3560.1',
1358         '71.0.3560.0',
1359         '71.0.3559.5',
1360         '71.0.3559.4',
1361         '70.0.3538.31',
1362         '69.0.3497.110',
1363         '71.0.3559.3',
1364         '70.0.3538.30',
1365         '69.0.3497.109',
1366         '71.0.3559.2',
1367         '71.0.3559.1',
1368         '71.0.3559.0',
1369         '70.0.3538.29',
1370         '69.0.3497.108',
1371         '71.0.3558.2',
1372         '71.0.3558.1',
1373         '71.0.3558.0',
1374         '70.0.3538.28',
1375         '69.0.3497.107',
1376         '71.0.3557.2',
1377         '71.0.3557.1',
1378         '71.0.3557.0',
1379         '70.0.3538.27',
1380         '69.0.3497.106',
1381         '71.0.3554.4',
1382         '70.0.3538.26',
1383         '71.0.3556.1',
1384         '71.0.3556.0',
1385         '70.0.3538.25',
1386         '71.0.3554.3',
1387         '69.0.3497.105',
1388         '71.0.3554.2',
1389         '70.0.3538.24',
1390         '69.0.3497.104',
1391         '71.0.3555.2',
1392         '70.0.3538.23',
1393         '71.0.3555.1',
1394         '71.0.3555.0',
1395         '70.0.3538.22',
1396         '69.0.3497.103',
1397         '71.0.3554.1',
1398         '71.0.3554.0',
1399         '70.0.3538.21',
1400         '69.0.3497.102',
1401         '71.0.3553.3',
1402         '70.0.3538.20',
1403         '69.0.3497.101',
1404         '71.0.3553.2',
1405         '69.0.3497.100',
1406         '71.0.3553.1',
1407         '71.0.3553.0',
1408         '70.0.3538.19',
1409         '69.0.3497.99',
1410         '69.0.3497.98',
1411         '69.0.3497.97',
1412         '71.0.3552.6',
1413         '71.0.3552.5',
1414         '71.0.3552.4',
1415         '71.0.3552.3',
1416         '71.0.3552.2',
1417         '71.0.3552.1',
1418         '71.0.3552.0',
1419         '70.0.3538.18',
1420         '69.0.3497.96',
1421         '71.0.3551.3',
1422         '71.0.3551.2',
1423         '71.0.3551.1',
1424         '71.0.3551.0',
1425         '70.0.3538.17',
1426         '69.0.3497.95',
1427         '71.0.3550.3',
1428         '71.0.3550.2',
1429         '71.0.3550.1',
1430         '71.0.3550.0',
1431         '70.0.3538.16',
1432         '69.0.3497.94',
1433         '71.0.3549.1',
1434         '71.0.3549.0',
1435         '70.0.3538.15',
1436         '69.0.3497.93',
1437         '69.0.3497.92',
1438         '71.0.3548.1',
1439         '71.0.3548.0',
1440         '70.0.3538.14',
1441         '69.0.3497.91',
1442         '71.0.3547.1',
1443         '71.0.3547.0',
1444         '70.0.3538.13',
1445         '69.0.3497.90',
1446         '71.0.3546.2',
1447         '69.0.3497.89',
1448         '71.0.3546.1',
1449         '71.0.3546.0',
1450         '70.0.3538.12',
1451         '69.0.3497.88',
1452         '71.0.3545.4',
1453         '71.0.3545.3',
1454         '71.0.3545.2',
1455         '71.0.3545.1',
1456         '71.0.3545.0',
1457         '70.0.3538.11',
1458         '69.0.3497.87',
1459         '71.0.3544.5',
1460         '71.0.3544.4',
1461         '71.0.3544.3',
1462         '71.0.3544.2',
1463         '71.0.3544.1',
1464         '71.0.3544.0',
1465         '69.0.3497.86',
1466         '70.0.3538.10',
1467         '69.0.3497.85',
1468         '70.0.3538.9',
1469         '69.0.3497.84',
1470         '71.0.3543.4',
1471         '70.0.3538.8',
1472         '71.0.3543.3',
1473         '71.0.3543.2',
1474         '71.0.3543.1',
1475         '71.0.3543.0',
1476         '70.0.3538.7',
1477         '69.0.3497.83',
1478         '71.0.3542.2',
1479         '71.0.3542.1',
1480         '71.0.3542.0',
1481         '70.0.3538.6',
1482         '69.0.3497.82',
1483         '69.0.3497.81',
1484         '71.0.3541.1',
1485         '71.0.3541.0',
1486         '70.0.3538.5',
1487         '69.0.3497.80',
1488         '71.0.3540.1',
1489         '71.0.3540.0',
1490         '70.0.3538.4',
1491         '69.0.3497.79',
1492         '70.0.3538.3',
1493         '71.0.3539.1',
1494         '71.0.3539.0',
1495         '69.0.3497.78',
1496         '68.0.3440.134',
1497         '69.0.3497.77',
1498         '70.0.3538.2',
1499         '70.0.3538.1',
1500         '70.0.3538.0',
1501         '69.0.3497.76',
1502         '68.0.3440.133',
1503         '69.0.3497.75',
1504         '70.0.3537.2',
1505         '70.0.3537.1',
1506         '70.0.3537.0',
1507         '69.0.3497.74',
1508         '68.0.3440.132',
1509         '70.0.3536.0',
1510         '70.0.3535.5',
1511         '70.0.3535.4',
1512         '70.0.3535.3',
1513         '69.0.3497.73',
1514         '68.0.3440.131',
1515         '70.0.3532.8',
1516         '70.0.3532.7',
1517         '69.0.3497.72',
1518         '69.0.3497.71',
1519         '70.0.3535.2',
1520         '70.0.3535.1',
1521         '70.0.3535.0',
1522         '69.0.3497.70',
1523         '68.0.3440.130',
1524         '69.0.3497.69',
1525         '68.0.3440.129',
1526         '70.0.3534.4',
1527         '70.0.3534.3',
1528         '70.0.3534.2',
1529         '70.0.3534.1',
1530         '70.0.3534.0',
1531         '69.0.3497.68',
1532         '68.0.3440.128',
1533         '70.0.3533.2',
1534         '70.0.3533.1',
1535         '70.0.3533.0',
1536         '69.0.3497.67',
1537         '68.0.3440.127',
1538         '70.0.3532.6',
1539         '70.0.3532.5',
1540         '70.0.3532.4',
1541         '69.0.3497.66',
1542         '68.0.3440.126',
1543         '70.0.3532.3',
1544         '70.0.3532.2',
1545         '70.0.3532.1',
1546         '69.0.3497.60',
1547         '69.0.3497.65',
1548         '69.0.3497.64',
1549         '70.0.3532.0',
1550         '70.0.3531.0',
1551         '70.0.3530.4',
1552         '70.0.3530.3',
1553         '70.0.3530.2',
1554         '69.0.3497.58',
1555         '68.0.3440.125',
1556         '69.0.3497.57',
1557         '69.0.3497.56',
1558         '69.0.3497.55',
1559         '69.0.3497.54',
1560         '70.0.3530.1',
1561         '70.0.3530.0',
1562         '69.0.3497.53',
1563         '68.0.3440.124',
1564         '69.0.3497.52',
1565         '70.0.3529.3',
1566         '70.0.3529.2',
1567         '70.0.3529.1',
1568         '70.0.3529.0',
1569         '69.0.3497.51',
1570         '70.0.3528.4',
1571         '68.0.3440.123',
1572         '70.0.3528.3',
1573         '70.0.3528.2',
1574         '70.0.3528.1',
1575         '70.0.3528.0',
1576         '69.0.3497.50',
1577         '68.0.3440.122',
1578         '70.0.3527.1',
1579         '70.0.3527.0',
1580         '69.0.3497.49',
1581         '68.0.3440.121',
1582         '70.0.3526.1',
1583         '70.0.3526.0',
1584         '68.0.3440.120',
1585         '69.0.3497.48',
1586         '69.0.3497.47',
1587         '68.0.3440.119',
1588         '68.0.3440.118',
1589         '70.0.3525.5',
1590         '70.0.3525.4',
1591         '70.0.3525.3',
1592         '68.0.3440.117',
1593         '69.0.3497.46',
1594         '70.0.3525.2',
1595         '70.0.3525.1',
1596         '70.0.3525.0',
1597         '69.0.3497.45',
1598         '68.0.3440.116',
1599         '70.0.3524.4',
1600         '70.0.3524.3',
1601         '69.0.3497.44',
1602         '70.0.3524.2',
1603         '70.0.3524.1',
1604         '70.0.3524.0',
1605         '70.0.3523.2',
1606         '69.0.3497.43',
1607         '68.0.3440.115',
1608         '70.0.3505.9',
1609         '69.0.3497.42',
1610         '70.0.3505.8',
1611         '70.0.3523.1',
1612         '70.0.3523.0',
1613         '69.0.3497.41',
1614         '68.0.3440.114',
1615         '70.0.3505.7',
1616         '69.0.3497.40',
1617         '70.0.3522.1',
1618         '70.0.3522.0',
1619         '70.0.3521.2',
1620         '69.0.3497.39',
1621         '68.0.3440.113',
1622         '70.0.3505.6',
1623         '70.0.3521.1',
1624         '70.0.3521.0',
1625         '69.0.3497.38',
1626         '68.0.3440.112',
1627         '70.0.3520.1',
1628         '70.0.3520.0',
1629         '69.0.3497.37',
1630         '68.0.3440.111',
1631         '70.0.3519.3',
1632         '70.0.3519.2',
1633         '70.0.3519.1',
1634         '70.0.3519.0',
1635         '69.0.3497.36',
1636         '68.0.3440.110',
1637         '70.0.3518.1',
1638         '70.0.3518.0',
1639         '69.0.3497.35',
1640         '69.0.3497.34',
1641         '68.0.3440.109',
1642         '70.0.3517.1',
1643         '70.0.3517.0',
1644         '69.0.3497.33',
1645         '68.0.3440.108',
1646         '69.0.3497.32',
1647         '70.0.3516.3',
1648         '70.0.3516.2',
1649         '70.0.3516.1',
1650         '70.0.3516.0',
1651         '69.0.3497.31',
1652         '68.0.3440.107',
1653         '70.0.3515.4',
1654         '68.0.3440.106',
1655         '70.0.3515.3',
1656         '70.0.3515.2',
1657         '70.0.3515.1',
1658         '70.0.3515.0',
1659         '69.0.3497.30',
1660         '68.0.3440.105',
1661         '68.0.3440.104',
1662         '70.0.3514.2',
1663         '70.0.3514.1',
1664         '70.0.3514.0',
1665         '69.0.3497.29',
1666         '68.0.3440.103',
1667         '70.0.3513.1',
1668         '70.0.3513.0',
1669         '69.0.3497.28',
1670     )
1671     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675     'User-Agent': random_user_agent(),
1676     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678     'Accept-Encoding': 'gzip, deflate',
1679     'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691     'January', 'February', 'March', 'April', 'May', 'June',
1692     'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695     'en': ENGLISH_MONTH_NAMES,
1696     'fr': [
1697         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703     'flv', 'f4v', 'f4a', 'f4b',
1704     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705     'mkv', 'mka', 'mk3d',
1706     'avi', 'divx',
1707     'mov',
1708     'asf', 'wmv', 'wma',
1709     '3gp', '3g2',
1710     'mp3',
1711     'flac',
1712     'ape',
1713     'wav',
1714     'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722     '%d %B %Y',
1723     '%d %b %Y',
1724     '%B %d %Y',
1725     '%B %dst %Y',
1726     '%B %dnd %Y',
1727     '%B %drd %Y',
1728     '%B %dth %Y',
1729     '%b %d %Y',
1730     '%b %dst %Y',
1731     '%b %dnd %Y',
1732     '%b %drd %Y',
1733     '%b %dth %Y',
1734     '%b %dst %Y %I:%M',
1735     '%b %dnd %Y %I:%M',
1736     '%b %drd %Y %I:%M',
1737     '%b %dth %Y %I:%M',
1738     '%Y %m %d',
1739     '%Y-%m-%d',
1740     '%Y/%m/%d',
1741     '%Y/%m/%d %H:%M',
1742     '%Y/%m/%d %H:%M:%S',
1743     '%Y-%m-%d %H:%M',
1744     '%Y-%m-%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M:%S.%f',
1746     '%d.%m.%Y %H:%M',
1747     '%d.%m.%Y %H.%M',
1748     '%Y-%m-%dT%H:%M:%SZ',
1749     '%Y-%m-%dT%H:%M:%S.%fZ',
1750     '%Y-%m-%dT%H:%M:%S.%f0Z',
1751     '%Y-%m-%dT%H:%M:%S',
1752     '%Y-%m-%dT%H:%M:%S.%f',
1753     '%Y-%m-%dT%H:%M',
1754     '%b %d %Y at %H:%M',
1755     '%b %d %Y at %H:%M:%S',
1756     '%B %d %Y at %H:%M',
1757     '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762     '%d-%m-%Y',
1763     '%d.%m.%Y',
1764     '%d.%m.%y',
1765     '%d/%m/%Y',
1766     '%d/%m/%y',
1767     '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772     '%m-%d-%Y',
1773     '%m.%d.%Y',
1774     '%m/%d/%Y',
1775     '%m/%d/%y',
1776     '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784     """Get preferred encoding.
1785
1786     Returns the best encoding scheme for the system, based on
1787     locale.getpreferredencoding() and some further tweaks.
1788     """
1789     try:
1790         pref = locale.getpreferredencoding()
1791         'TEST'.encode(pref)
1792     except Exception:
1793         pref = 'UTF-8'
1794
1795     return pref
1796
1797
1798 def write_json_file(obj, fn):
1799     """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801     fn = encodeFilename(fn)
1802     if sys.version_info < (3, 0) and sys.platform != 'win32':
1803         encoding = get_filesystem_encoding()
1804         # os.path.basename returns a bytes object, but NamedTemporaryFile
1805         # will fail if the filename contains non ascii characters unless we
1806         # use a unicode object
1807         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808         # the same for os.path.dirname
1809         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810     else:
1811         path_basename = os.path.basename
1812         path_dirname = os.path.dirname
1813
1814     args = {
1815         'suffix': '.tmp',
1816         'prefix': path_basename(fn) + '.',
1817         'dir': path_dirname(fn),
1818         'delete': False,
1819     }
1820
1821     # In Python 2.x, json.dump expects a bytestream.
1822     # In Python 3.x, it writes to a character stream
1823     if sys.version_info < (3, 0):
1824         args['mode'] = 'wb'
1825     else:
1826         args.update({
1827             'mode': 'w',
1828             'encoding': 'utf-8',
1829         })
1830
1831     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833     try:
1834         with tf:
1835             json.dump(obj, tf)
1836         if sys.platform == 'win32':
1837             # Need to remove existing file on Windows, else os.rename raises
1838             # WindowsError or FileExistsError.
1839             try:
1840                 os.unlink(fn)
1841             except OSError:
1842                 pass
1843         try:
1844             mask = os.umask(0)
1845             os.umask(mask)
1846             os.chmod(tf.name, 0o666 & ~mask)
1847         except OSError:
1848             pass
1849         os.rename(tf.name, fn)
1850     except Exception:
1851         try:
1852             os.remove(tf.name)
1853         except OSError:
1854             pass
1855         raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859     def find_xpath_attr(node, xpath, key, val=None):
1860         """ Find the xpath xpath[@key=val] """
1861         assert re.match(r'^[a-zA-Z_-]+$', key)
1862         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863         return node.find(expr)
1864 else:
1865     def find_xpath_attr(node, xpath, key, val=None):
1866         for f in node.findall(compat_xpath(xpath)):
1867             if key not in f.attrib:
1868                 continue
1869             if val is None or f.attrib.get(key) == val:
1870                 return f
1871         return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878     components = [c.split(':') for c in path.split('/')]
1879     replaced = []
1880     for c in components:
1881         if len(c) == 1:
1882             replaced.append(c[0])
1883         else:
1884             ns, tag = c
1885             replaced.append('{%s}%s' % (ns_map[ns], tag))
1886     return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890     def _find_xpath(xpath):
1891         return node.find(compat_xpath(xpath))
1892
1893     if isinstance(xpath, (str, compat_str)):
1894         n = _find_xpath(xpath)
1895     else:
1896         for xp in xpath:
1897             n = _find_xpath(xp)
1898             if n is not None:
1899                 break
1900
1901     if n is None:
1902         if default is not NO_DEFAULT:
1903             return default
1904         elif fatal:
1905             name = xpath if name is None else name
1906             raise ExtractorError('Could not find XML element %s' % name)
1907         else:
1908             return None
1909     return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914     if n is None or n == default:
1915         return n
1916     if n.text is None:
1917         if default is not NO_DEFAULT:
1918             return default
1919         elif fatal:
1920             name = xpath if name is None else name
1921             raise ExtractorError('Could not find XML element\'s text %s' % name)
1922         else:
1923             return None
1924     return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928     n = find_xpath_attr(node, xpath, key)
1929     if n is None:
1930         if default is not NO_DEFAULT:
1931             return default
1932         elif fatal:
1933             name = '%s[@%s]' % (xpath, key) if name is None else name
1934             raise ExtractorError('Could not find XML attribute %s' % name)
1935         else:
1936             return None
1937     return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941     """Return the content of the tag with the specified ID in the passed HTML document"""
1942     return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946     """Return the content of the first tag with the specified class in the passed HTML document"""
1947     retval = get_elements_by_class(class_name, html)
1948     return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953     return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958     return get_elements_by_attribute(
1959         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960         html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964     """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966     value = re.escape(value) if escape_value else value
1967
1968     retlist = []
1969     for m in re.finditer(r'''(?xs)
1970         <([a-zA-Z0-9:._-]+)
1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972          \s+%s=['"]?%s['"]?
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974         \s*>
1975         (?P<content>.*?)
1976         </\1>
1977     ''' % (re.escape(attribute), value), html):
1978         res = m.group('content')
1979
1980         if res.startswith('"') or res.startswith("'"):
1981             res = res[1:-1]
1982
1983         retlist.append(unescapeHTML(res))
1984
1985     return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989     """Trivial HTML parser to gather the attributes for a single element"""
1990
1991     def __init__(self):
1992         self.attrs = {}
1993         compat_HTMLParser.__init__(self)
1994
1995     def handle_starttag(self, tag, attrs):
1996         self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000     """Given a string for an HTML element such as
2001     <el
2002          a="foo" B="bar" c="&98;az" d=boz
2003          empty= noval entity="&amp;"
2004          sq='"' dq="'"
2005     >
2006     Decode and return a dictionary of attributes.
2007     {
2008         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009         'empty': '', 'noval': None, 'entity': '&',
2010         'sq': '"', 'dq': '\''
2011     }.
2012     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014     """
2015     parser = HTMLAttributeParser()
2016     try:
2017         parser.feed(html_element)
2018         parser.close()
2019     # Older Python may throw HTMLParseError in case of malformed HTML
2020     except compat_HTMLParseError:
2021         pass
2022     return parser.attrs
2023
2024
2025 def clean_html(html):
2026     """Clean an HTML snippet into a readable string"""
2027
2028     if html is None:  # Convenience for sanitizing descriptions etc.
2029         return html
2030
2031     # Newline vs <br />
2032     html = html.replace('\n', ' ')
2033     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035     # Strip html tags
2036     html = re.sub('<.*?>', '', html)
2037     # Replace html entities
2038     html = unescapeHTML(html)
2039     return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043     """Try to open the given filename, and slightly tweak it if this fails.
2044
2045     Attempts to open the given filename. If this fails, it tries to change
2046     the filename slightly, step by step, until it's either able to open it
2047     or it fails and raises a final exception, like the standard open()
2048     function.
2049
2050     It returns the tuple (stream, definitive_file_name).
2051     """
2052     try:
2053         if filename == '-':
2054             if sys.platform == 'win32':
2055                 import msvcrt
2056                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058         stream = open(encodeFilename(filename), open_mode)
2059         return (stream, filename)
2060     except (IOError, OSError) as err:
2061         if err.errno in (errno.EACCES,):
2062             raise
2063
2064         # In case of error, try to remove win32 forbidden chars
2065         alt_filename = sanitize_path(filename)
2066         if alt_filename == filename:
2067             raise
2068         else:
2069             # An exception here should be caught in the caller
2070             stream = open(encodeFilename(alt_filename), open_mode)
2071             return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075     """Convert RFC 2822 defined time string into system timestamp"""
2076     timestamp = None
2077     timetuple = email.utils.parsedate_tz(timestr)
2078     if timetuple is not None:
2079         timestamp = email.utils.mktime_tz(timetuple)
2080     return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084     """Sanitizes a string so it could be used as part of a filename.
2085     If restricted is set, use a stricter subset of allowed characters.
2086     Set is_id if this is not an arbitrary string, but an ID that should be kept
2087     if possible.
2088     """
2089     def replace_insane(char):
2090         if restricted and char in ACCENT_CHARS:
2091             return ACCENT_CHARS[char]
2092         if char == '?' or ord(char) < 32 or ord(char) == 127:
2093             return ''
2094         elif char == '"':
2095             return '' if restricted else '\''
2096         elif char == ':':
2097             return '_-' if restricted else ' -'
2098         elif char in '\\/|*<>':
2099             return '_'
2100         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101             return '_'
2102         if restricted and ord(char) > 127:
2103             return '_'
2104         return char
2105
2106     # Handle timestamps
2107     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108     result = ''.join(map(replace_insane, s))
2109     if not is_id:
2110         while '__' in result:
2111             result = result.replace('__', '_')
2112         result = result.strip('_')
2113         # Common case of "Foreign band name - English song title"
2114         if restricted and result.startswith('-_'):
2115             result = result[2:]
2116         if result.startswith('-'):
2117             result = '_' + result[len('-'):]
2118         result = result.lstrip('.')
2119         if not result:
2120             result = '_'
2121     return result
2122
2123
2124 def sanitize_path(s):
2125     """Sanitizes and normalizes path on Windows"""
2126     if sys.platform != 'win32':
2127         return s
2128     drive_or_unc, _ = os.path.splitdrive(s)
2129     if sys.version_info < (2, 7) and not drive_or_unc:
2130         drive_or_unc, _ = os.path.splitunc(s)
2131     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132     if drive_or_unc:
2133         norm_path.pop(0)
2134     sanitized_path = [
2135         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136         for path_part in norm_path]
2137     if drive_or_unc:
2138         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139     return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144     # the number of unwanted failures due to missing protocol
2145     if url.startswith('//'):
2146         return 'http:%s' % url
2147     # Fix some common typos seen so far
2148     COMMON_TYPOS = (
2149         # https://github.com/ytdl-org/youtube-dl/issues/15649
2150         (r'^httpss://', r'https://'),
2151         # https://bx1.be/lives/direct-tv/
2152         (r'^rmtp([es]?)://', r'rtmp\1://'),
2153     )
2154     for mistake, fixup in COMMON_TYPOS:
2155         if re.match(mistake, url):
2156             return re.sub(mistake, fixup, url)
2157     return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165     """Expand shell variables and ~"""
2166     return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170     """ Remove all duplicates from the input iterable """
2171     res = []
2172     for el in iterable:
2173         if el not in res:
2174             res.append(el)
2175     return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179     """Transforms an HTML entity to a character."""
2180     entity = entity_with_semicolon[:-1]
2181
2182     # Known non-numeric HTML entity
2183     if entity in compat_html_entities.name2codepoint:
2184         return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186     # TODO: HTML5 allows entities without a semicolon. For example,
2187     # '&Eacuteric' should be decoded as 'Éric'.
2188     if entity_with_semicolon in compat_html_entities_html5:
2189         return compat_html_entities_html5[entity_with_semicolon]
2190
2191     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192     if mobj is not None:
2193         numstr = mobj.group(1)
2194         if numstr.startswith('x'):
2195             base = 16
2196             numstr = '0%s' % numstr
2197         else:
2198             base = 10
2199         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200         try:
2201             return compat_chr(int(numstr, base))
2202         except ValueError:
2203             pass
2204
2205     # Unknown entity in name, return its literal representation
2206     return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210     if s is None:
2211         return None
2212     assert type(s) == compat_str
2213
2214     return re.sub(
2215         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def process_communicate_or_kill(p, *args, **kwargs):
2219     try:
2220         return p.communicate(*args, **kwargs)
2221     except BaseException:  # Including KeyboardInterrupt
2222         p.kill()
2223         p.wait()
2224         raise
2225
2226
2227 def get_subprocess_encoding():
2228     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229         # For subprocess calls, encode with locale encoding
2230         # Refer to http://stackoverflow.com/a/9951851/35070
2231         encoding = preferredencoding()
2232     else:
2233         encoding = sys.getfilesystemencoding()
2234     if encoding is None:
2235         encoding = 'utf-8'
2236     return encoding
2237
2238
2239 def encodeFilename(s, for_subprocess=False):
2240     """
2241     @param s The name of the file
2242     """
2243
2244     assert type(s) == compat_str
2245
2246     # Python 3 has a Unicode API
2247     if sys.version_info >= (3, 0):
2248         return s
2249
2250     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2251     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2252     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2253     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2254         return s
2255
2256     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2257     if sys.platform.startswith('java'):
2258         return s
2259
2260     return s.encode(get_subprocess_encoding(), 'ignore')
2261
2262
2263 def decodeFilename(b, for_subprocess=False):
2264
2265     if sys.version_info >= (3, 0):
2266         return b
2267
2268     if not isinstance(b, bytes):
2269         return b
2270
2271     return b.decode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def encodeArgument(s):
2275     if not isinstance(s, compat_str):
2276         # Legacy code that uses byte strings
2277         # Uncomment the following line after fixing all post processors
2278         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2279         s = s.decode('ascii')
2280     return encodeFilename(s, True)
2281
2282
2283 def decodeArgument(b):
2284     return decodeFilename(b, True)
2285
2286
2287 def decodeOption(optval):
2288     if optval is None:
2289         return optval
2290     if isinstance(optval, bytes):
2291         optval = optval.decode(preferredencoding())
2292
2293     assert isinstance(optval, compat_str)
2294     return optval
2295
2296
2297 def formatSeconds(secs, delim=':'):
2298     if secs > 3600:
2299         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2300     elif secs > 60:
2301         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2302     else:
2303         return '%d' % secs
2304
2305
2306 def make_HTTPS_handler(params, **kwargs):
2307     opts_no_check_certificate = params.get('nocheckcertificate', False)
2308     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2309         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2310         if opts_no_check_certificate:
2311             context.check_hostname = False
2312             context.verify_mode = ssl.CERT_NONE
2313         try:
2314             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2315         except TypeError:
2316             # Python 2.7.8
2317             # (create_default_context present but HTTPSHandler has no context=)
2318             pass
2319
2320     if sys.version_info < (3, 2):
2321         return YoutubeDLHTTPSHandler(params, **kwargs)
2322     else:  # Python < 3.4
2323         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2324         context.verify_mode = (ssl.CERT_NONE
2325                                if opts_no_check_certificate
2326                                else ssl.CERT_REQUIRED)
2327         context.set_default_verify_paths()
2328         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2329
2330
2331 def bug_reports_message():
2332     if ytdl_is_updateable():
2333         update_cmd = 'type  youtube-dlc -U  to update'
2334     else:
2335         update_cmd = 'see  https://github.com/pukkandan/yt-dlp  on how to update'
2336     msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
2337     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2338     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2339     return msg
2340
2341
2342 class YoutubeDLError(Exception):
2343     """Base exception for YoutubeDL errors."""
2344     pass
2345
2346
2347 class ExtractorError(YoutubeDLError):
2348     """Error during info extraction."""
2349
2350     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2351         """ tb, if given, is the original traceback (so that it can be printed out).
2352         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2353         """
2354
2355         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2356             expected = True
2357         if video_id is not None:
2358             msg = video_id + ': ' + msg
2359         if cause:
2360             msg += ' (caused by %r)' % cause
2361         if not expected:
2362             msg += bug_reports_message()
2363         super(ExtractorError, self).__init__(msg)
2364
2365         self.traceback = tb
2366         self.exc_info = sys.exc_info()  # preserve original exception
2367         self.cause = cause
2368         self.video_id = video_id
2369
2370     def format_traceback(self):
2371         if self.traceback is None:
2372             return None
2373         return ''.join(traceback.format_tb(self.traceback))
2374
2375
2376 class UnsupportedError(ExtractorError):
2377     def __init__(self, url):
2378         super(UnsupportedError, self).__init__(
2379             'Unsupported URL: %s' % url, expected=True)
2380         self.url = url
2381
2382
2383 class RegexNotFoundError(ExtractorError):
2384     """Error when a regex didn't match"""
2385     pass
2386
2387
2388 class GeoRestrictedError(ExtractorError):
2389     """Geographic restriction Error exception.
2390
2391     This exception may be thrown when a video is not available from your
2392     geographic location due to geographic restrictions imposed by a website.
2393     """
2394
2395     def __init__(self, msg, countries=None):
2396         super(GeoRestrictedError, self).__init__(msg, expected=True)
2397         self.msg = msg
2398         self.countries = countries
2399
2400
2401 class DownloadError(YoutubeDLError):
2402     """Download Error exception.
2403
2404     This exception may be thrown by FileDownloader objects if they are not
2405     configured to continue on errors. They will contain the appropriate
2406     error message.
2407     """
2408
2409     def __init__(self, msg, exc_info=None):
2410         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2411         super(DownloadError, self).__init__(msg)
2412         self.exc_info = exc_info
2413
2414
2415 class SameFileError(YoutubeDLError):
2416     """Same File exception.
2417
2418     This exception will be thrown by FileDownloader objects if they detect
2419     multiple files would have to be downloaded to the same file on disk.
2420     """
2421     pass
2422
2423
2424 class PostProcessingError(YoutubeDLError):
2425     """Post Processing exception.
2426
2427     This exception may be raised by PostProcessor's .run() method to
2428     indicate an error in the postprocessing task.
2429     """
2430
2431     def __init__(self, msg):
2432         super(PostProcessingError, self).__init__(msg)
2433         self.msg = msg
2434
2435
2436 class ExistingVideoReached(YoutubeDLError):
2437     """ --max-downloads limit has been reached. """
2438     pass
2439
2440
2441 class RejectedVideoReached(YoutubeDLError):
2442     """ --max-downloads limit has been reached. """
2443     pass
2444
2445
2446 class MaxDownloadsReached(YoutubeDLError):
2447     """ --max-downloads limit has been reached. """
2448     pass
2449
2450
2451 class UnavailableVideoError(YoutubeDLError):
2452     """Unavailable Format exception.
2453
2454     This exception will be thrown when a video is requested
2455     in a format that is not available for that video.
2456     """
2457     pass
2458
2459
2460 class ContentTooShortError(YoutubeDLError):
2461     """Content Too Short exception.
2462
2463     This exception may be raised by FileDownloader objects when a file they
2464     download is too small for what the server announced first, indicating
2465     the connection was probably interrupted.
2466     """
2467
2468     def __init__(self, downloaded, expected):
2469         super(ContentTooShortError, self).__init__(
2470             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2471         )
2472         # Both in bytes
2473         self.downloaded = downloaded
2474         self.expected = expected
2475
2476
2477 class XAttrMetadataError(YoutubeDLError):
2478     def __init__(self, code=None, msg='Unknown error'):
2479         super(XAttrMetadataError, self).__init__(msg)
2480         self.code = code
2481         self.msg = msg
2482
2483         # Parsing code and msg
2484         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2485                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2486             self.reason = 'NO_SPACE'
2487         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2488             self.reason = 'VALUE_TOO_LONG'
2489         else:
2490             self.reason = 'NOT_SUPPORTED'
2491
2492
2493 class XAttrUnavailableError(YoutubeDLError):
2494     pass
2495
2496
2497 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2498     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2499     # expected HTTP responses to meet HTTP/1.0 or later (see also
2500     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2501     if sys.version_info < (3, 0):
2502         kwargs['strict'] = True
2503     hc = http_class(*args, **compat_kwargs(kwargs))
2504     source_address = ydl_handler._params.get('source_address')
2505
2506     if source_address is not None:
2507         # This is to workaround _create_connection() from socket where it will try all
2508         # address data from getaddrinfo() including IPv6. This filters the result from
2509         # getaddrinfo() based on the source_address value.
2510         # This is based on the cpython socket.create_connection() function.
2511         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2512         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2513             host, port = address
2514             err = None
2515             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2516             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2517             ip_addrs = [addr for addr in addrs if addr[0] == af]
2518             if addrs and not ip_addrs:
2519                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2520                 raise socket.error(
2521                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2522                     % (ip_version, source_address[0]))
2523             for res in ip_addrs:
2524                 af, socktype, proto, canonname, sa = res
2525                 sock = None
2526                 try:
2527                     sock = socket.socket(af, socktype, proto)
2528                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2529                         sock.settimeout(timeout)
2530                     sock.bind(source_address)
2531                     sock.connect(sa)
2532                     err = None  # Explicitly break reference cycle
2533                     return sock
2534                 except socket.error as _:
2535                     err = _
2536                     if sock is not None:
2537                         sock.close()
2538             if err is not None:
2539                 raise err
2540             else:
2541                 raise socket.error('getaddrinfo returns an empty list')
2542         if hasattr(hc, '_create_connection'):
2543             hc._create_connection = _create_connection
2544         sa = (source_address, 0)
2545         if hasattr(hc, 'source_address'):  # Python 2.7+
2546             hc.source_address = sa
2547         else:  # Python 2.6
2548             def _hc_connect(self, *args, **kwargs):
2549                 sock = _create_connection(
2550                     (self.host, self.port), self.timeout, sa)
2551                 if is_https:
2552                     self.sock = ssl.wrap_socket(
2553                         sock, self.key_file, self.cert_file,
2554                         ssl_version=ssl.PROTOCOL_TLSv1)
2555                 else:
2556                     self.sock = sock
2557             hc.connect = functools.partial(_hc_connect, hc)
2558
2559     return hc
2560
2561
2562 def handle_youtubedl_headers(headers):
2563     filtered_headers = headers
2564
2565     if 'Youtubedl-no-compression' in filtered_headers:
2566         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2567         del filtered_headers['Youtubedl-no-compression']
2568
2569     return filtered_headers
2570
2571
2572 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2573     """Handler for HTTP requests and responses.
2574
2575     This class, when installed with an OpenerDirector, automatically adds
2576     the standard headers to every HTTP request and handles gzipped and
2577     deflated responses from web servers. If compression is to be avoided in
2578     a particular request, the original request in the program code only has
2579     to include the HTTP header "Youtubedl-no-compression", which will be
2580     removed before making the real request.
2581
2582     Part of this code was copied from:
2583
2584     http://techknack.net/python-urllib2-handlers/
2585
2586     Andrew Rowls, the author of that code, agreed to release it to the
2587     public domain.
2588     """
2589
2590     def __init__(self, params, *args, **kwargs):
2591         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2592         self._params = params
2593
2594     def http_open(self, req):
2595         conn_class = compat_http_client.HTTPConnection
2596
2597         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2598         if socks_proxy:
2599             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2600             del req.headers['Ytdl-socks-proxy']
2601
2602         return self.do_open(functools.partial(
2603             _create_http_connection, self, conn_class, False),
2604             req)
2605
2606     @staticmethod
2607     def deflate(data):
2608         try:
2609             return zlib.decompress(data, -zlib.MAX_WBITS)
2610         except zlib.error:
2611             return zlib.decompress(data)
2612
2613     def http_request(self, req):
2614         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2615         # always respected by websites, some tend to give out URLs with non percent-encoded
2616         # non-ASCII characters (see telemb.py, ard.py [#3412])
2617         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2618         # To work around aforementioned issue we will replace request's original URL with
2619         # percent-encoded one
2620         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2621         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2622         url = req.get_full_url()
2623         url_escaped = escape_url(url)
2624
2625         # Substitute URL if any change after escaping
2626         if url != url_escaped:
2627             req = update_Request(req, url=url_escaped)
2628
2629         for h, v in std_headers.items():
2630             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2631             # The dict keys are capitalized because of this bug by urllib
2632             if h.capitalize() not in req.headers:
2633                 req.add_header(h, v)
2634
2635         req.headers = handle_youtubedl_headers(req.headers)
2636
2637         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2638             # Python 2.6 is brain-dead when it comes to fragments
2639             req._Request__original = req._Request__original.partition('#')[0]
2640             req._Request__r_type = req._Request__r_type.partition('#')[0]
2641
2642         return req
2643
2644     def http_response(self, req, resp):
2645         old_resp = resp
2646         # gzip
2647         if resp.headers.get('Content-encoding', '') == 'gzip':
2648             content = resp.read()
2649             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2650             try:
2651                 uncompressed = io.BytesIO(gz.read())
2652             except IOError as original_ioerror:
2653                 # There may be junk add the end of the file
2654                 # See http://stackoverflow.com/q/4928560/35070 for details
2655                 for i in range(1, 1024):
2656                     try:
2657                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2658                         uncompressed = io.BytesIO(gz.read())
2659                     except IOError:
2660                         continue
2661                     break
2662                 else:
2663                     raise original_ioerror
2664             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2665             resp.msg = old_resp.msg
2666             del resp.headers['Content-encoding']
2667         # deflate
2668         if resp.headers.get('Content-encoding', '') == 'deflate':
2669             gz = io.BytesIO(self.deflate(resp.read()))
2670             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2671             resp.msg = old_resp.msg
2672             del resp.headers['Content-encoding']
2673         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2674         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2675         if 300 <= resp.code < 400:
2676             location = resp.headers.get('Location')
2677             if location:
2678                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2679                 if sys.version_info >= (3, 0):
2680                     location = location.encode('iso-8859-1').decode('utf-8')
2681                 else:
2682                     location = location.decode('utf-8')
2683                 location_escaped = escape_url(location)
2684                 if location != location_escaped:
2685                     del resp.headers['Location']
2686                     if sys.version_info < (3, 0):
2687                         location_escaped = location_escaped.encode('utf-8')
2688                     resp.headers['Location'] = location_escaped
2689         return resp
2690
2691     https_request = http_request
2692     https_response = http_response
2693
2694
2695 def make_socks_conn_class(base_class, socks_proxy):
2696     assert issubclass(base_class, (
2697         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2698
2699     url_components = compat_urlparse.urlparse(socks_proxy)
2700     if url_components.scheme.lower() == 'socks5':
2701         socks_type = ProxyType.SOCKS5
2702     elif url_components.scheme.lower() in ('socks', 'socks4'):
2703         socks_type = ProxyType.SOCKS4
2704     elif url_components.scheme.lower() == 'socks4a':
2705         socks_type = ProxyType.SOCKS4A
2706
2707     def unquote_if_non_empty(s):
2708         if not s:
2709             return s
2710         return compat_urllib_parse_unquote_plus(s)
2711
2712     proxy_args = (
2713         socks_type,
2714         url_components.hostname, url_components.port or 1080,
2715         True,  # Remote DNS
2716         unquote_if_non_empty(url_components.username),
2717         unquote_if_non_empty(url_components.password),
2718     )
2719
2720     class SocksConnection(base_class):
2721         def connect(self):
2722             self.sock = sockssocket()
2723             self.sock.setproxy(*proxy_args)
2724             if type(self.timeout) in (int, float):
2725                 self.sock.settimeout(self.timeout)
2726             self.sock.connect((self.host, self.port))
2727
2728             if isinstance(self, compat_http_client.HTTPSConnection):
2729                 if hasattr(self, '_context'):  # Python > 2.6
2730                     self.sock = self._context.wrap_socket(
2731                         self.sock, server_hostname=self.host)
2732                 else:
2733                     self.sock = ssl.wrap_socket(self.sock)
2734
2735     return SocksConnection
2736
2737
2738 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2739     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2740         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2741         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2742         self._params = params
2743
2744     def https_open(self, req):
2745         kwargs = {}
2746         conn_class = self._https_conn_class
2747
2748         if hasattr(self, '_context'):  # python > 2.6
2749             kwargs['context'] = self._context
2750         if hasattr(self, '_check_hostname'):  # python 3.x
2751             kwargs['check_hostname'] = self._check_hostname
2752
2753         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2754         if socks_proxy:
2755             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2756             del req.headers['Ytdl-socks-proxy']
2757
2758         return self.do_open(functools.partial(
2759             _create_http_connection, self, conn_class, True),
2760             req, **kwargs)
2761
2762
2763 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2764     """
2765     See [1] for cookie file format.
2766
2767     1. https://curl.haxx.se/docs/http-cookies.html
2768     """
2769     _HTTPONLY_PREFIX = '#HttpOnly_'
2770     _ENTRY_LEN = 7
2771     _HEADER = '''# Netscape HTTP Cookie File
2772 # This file is generated by youtube-dlc.  Do not edit.
2773
2774 '''
2775     _CookieFileEntry = collections.namedtuple(
2776         'CookieFileEntry',
2777         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2778
2779     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2780         """
2781         Save cookies to a file.
2782
2783         Most of the code is taken from CPython 3.8 and slightly adapted
2784         to support cookie files with UTF-8 in both python 2 and 3.
2785         """
2786         if filename is None:
2787             if self.filename is not None:
2788                 filename = self.filename
2789             else:
2790                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2791
2792         # Store session cookies with `expires` set to 0 instead of an empty
2793         # string
2794         for cookie in self:
2795             if cookie.expires is None:
2796                 cookie.expires = 0
2797
2798         with io.open(filename, 'w', encoding='utf-8') as f:
2799             f.write(self._HEADER)
2800             now = time.time()
2801             for cookie in self:
2802                 if not ignore_discard and cookie.discard:
2803                     continue
2804                 if not ignore_expires and cookie.is_expired(now):
2805                     continue
2806                 if cookie.secure:
2807                     secure = 'TRUE'
2808                 else:
2809                     secure = 'FALSE'
2810                 if cookie.domain.startswith('.'):
2811                     initial_dot = 'TRUE'
2812                 else:
2813                     initial_dot = 'FALSE'
2814                 if cookie.expires is not None:
2815                     expires = compat_str(cookie.expires)
2816                 else:
2817                     expires = ''
2818                 if cookie.value is None:
2819                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2820                     # with no name, whereas http.cookiejar regards it as a
2821                     # cookie with no value.
2822                     name = ''
2823                     value = cookie.name
2824                 else:
2825                     name = cookie.name
2826                     value = cookie.value
2827                 f.write(
2828                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2829                                secure, expires, name, value]) + '\n')
2830
2831     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2832         """Load cookies from a file."""
2833         if filename is None:
2834             if self.filename is not None:
2835                 filename = self.filename
2836             else:
2837                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2838
2839         def prepare_line(line):
2840             if line.startswith(self._HTTPONLY_PREFIX):
2841                 line = line[len(self._HTTPONLY_PREFIX):]
2842             # comments and empty lines are fine
2843             if line.startswith('#') or not line.strip():
2844                 return line
2845             cookie_list = line.split('\t')
2846             if len(cookie_list) != self._ENTRY_LEN:
2847                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2848             cookie = self._CookieFileEntry(*cookie_list)
2849             if cookie.expires_at and not cookie.expires_at.isdigit():
2850                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2851             return line
2852
2853         cf = io.StringIO()
2854         with io.open(filename, encoding='utf-8') as f:
2855             for line in f:
2856                 try:
2857                     cf.write(prepare_line(line))
2858                 except compat_cookiejar.LoadError as e:
2859                     write_string(
2860                         'WARNING: skipping cookie file entry due to %s: %r\n'
2861                         % (e, line), sys.stderr)
2862                     continue
2863         cf.seek(0)
2864         self._really_load(cf, filename, ignore_discard, ignore_expires)
2865         # Session cookies are denoted by either `expires` field set to
2866         # an empty string or 0. MozillaCookieJar only recognizes the former
2867         # (see [1]). So we need force the latter to be recognized as session
2868         # cookies on our own.
2869         # Session cookies may be important for cookies-based authentication,
2870         # e.g. usually, when user does not check 'Remember me' check box while
2871         # logging in on a site, some important cookies are stored as session
2872         # cookies so that not recognizing them will result in failed login.
2873         # 1. https://bugs.python.org/issue17164
2874         for cookie in self:
2875             # Treat `expires=0` cookies as session cookies
2876             if cookie.expires == 0:
2877                 cookie.expires = None
2878                 cookie.discard = True
2879
2880
2881 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2882     def __init__(self, cookiejar=None):
2883         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2884
2885     def http_response(self, request, response):
2886         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2887         # characters in Set-Cookie HTTP header of last response (see
2888         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2889         # In order to at least prevent crashing we will percent encode Set-Cookie
2890         # header before HTTPCookieProcessor starts processing it.
2891         # if sys.version_info < (3, 0) and response.headers:
2892         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2893         #         set_cookie = response.headers.get(set_cookie_header)
2894         #         if set_cookie:
2895         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2896         #             if set_cookie != set_cookie_escaped:
2897         #                 del response.headers[set_cookie_header]
2898         #                 response.headers[set_cookie_header] = set_cookie_escaped
2899         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2900
2901     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2902     https_response = http_response
2903
2904
2905 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2906     if sys.version_info[0] < 3:
2907         def redirect_request(self, req, fp, code, msg, headers, newurl):
2908             # On python 2 urlh.geturl() may sometimes return redirect URL
2909             # as byte string instead of unicode. This workaround allows
2910             # to force it always return unicode.
2911             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2912
2913
2914 def extract_timezone(date_str):
2915     m = re.search(
2916         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2917         date_str)
2918     if not m:
2919         timezone = datetime.timedelta()
2920     else:
2921         date_str = date_str[:-len(m.group('tz'))]
2922         if not m.group('sign'):
2923             timezone = datetime.timedelta()
2924         else:
2925             sign = 1 if m.group('sign') == '+' else -1
2926             timezone = datetime.timedelta(
2927                 hours=sign * int(m.group('hours')),
2928                 minutes=sign * int(m.group('minutes')))
2929     return timezone, date_str
2930
2931
2932 def parse_iso8601(date_str, delimiter='T', timezone=None):
2933     """ Return a UNIX timestamp from the given date """
2934
2935     if date_str is None:
2936         return None
2937
2938     date_str = re.sub(r'\.[0-9]+', '', date_str)
2939
2940     if timezone is None:
2941         timezone, date_str = extract_timezone(date_str)
2942
2943     try:
2944         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2945         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2946         return calendar.timegm(dt.timetuple())
2947     except ValueError:
2948         pass
2949
2950
2951 def date_formats(day_first=True):
2952     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2953
2954
2955 def unified_strdate(date_str, day_first=True):
2956     """Return a string with the date in the format YYYYMMDD"""
2957
2958     if date_str is None:
2959         return None
2960     upload_date = None
2961     # Replace commas
2962     date_str = date_str.replace(',', ' ')
2963     # Remove AM/PM + timezone
2964     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2965     _, date_str = extract_timezone(date_str)
2966
2967     for expression in date_formats(day_first):
2968         try:
2969             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2970         except ValueError:
2971             pass
2972     if upload_date is None:
2973         timetuple = email.utils.parsedate_tz(date_str)
2974         if timetuple:
2975             try:
2976                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2977             except ValueError:
2978                 pass
2979     if upload_date is not None:
2980         return compat_str(upload_date)
2981
2982
2983 def unified_timestamp(date_str, day_first=True):
2984     if date_str is None:
2985         return None
2986
2987     date_str = re.sub(r'[,|]', '', date_str)
2988
2989     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2990     timezone, date_str = extract_timezone(date_str)
2991
2992     # Remove AM/PM + timezone
2993     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2994
2995     # Remove unrecognized timezones from ISO 8601 alike timestamps
2996     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2997     if m:
2998         date_str = date_str[:-len(m.group('tz'))]
2999
3000     # Python only supports microseconds, so remove nanoseconds
3001     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3002     if m:
3003         date_str = m.group(1)
3004
3005     for expression in date_formats(day_first):
3006         try:
3007             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3008             return calendar.timegm(dt.timetuple())
3009         except ValueError:
3010             pass
3011     timetuple = email.utils.parsedate_tz(date_str)
3012     if timetuple:
3013         return calendar.timegm(timetuple) + pm_delta * 3600
3014
3015
3016 def determine_ext(url, default_ext='unknown_video'):
3017     if url is None or '.' not in url:
3018         return default_ext
3019     guess = url.partition('?')[0].rpartition('.')[2]
3020     if re.match(r'^[A-Za-z0-9]+$', guess):
3021         return guess
3022     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3023     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3024         return guess.rstrip('/')
3025     else:
3026         return default_ext
3027
3028
3029 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3030     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3031
3032
3033 def date_from_str(date_str):
3034     """
3035     Return a datetime object from a string in the format YYYYMMDD or
3036     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3037     today = datetime.date.today()
3038     if date_str in ('now', 'today'):
3039         return today
3040     if date_str == 'yesterday':
3041         return today - datetime.timedelta(days=1)
3042     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3043     if match is not None:
3044         sign = match.group('sign')
3045         time = int(match.group('time'))
3046         if sign == '-':
3047             time = -time
3048         unit = match.group('unit')
3049         # A bad approximation?
3050         if unit == 'month':
3051             unit = 'day'
3052             time *= 30
3053         elif unit == 'year':
3054             unit = 'day'
3055             time *= 365
3056         unit += 's'
3057         delta = datetime.timedelta(**{unit: time})
3058         return today + delta
3059     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3060
3061
3062 def hyphenate_date(date_str):
3063     """
3064     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3065     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3066     if match is not None:
3067         return '-'.join(match.groups())
3068     else:
3069         return date_str
3070
3071
3072 class DateRange(object):
3073     """Represents a time interval between two dates"""
3074
3075     def __init__(self, start=None, end=None):
3076         """start and end must be strings in the format accepted by date"""
3077         if start is not None:
3078             self.start = date_from_str(start)
3079         else:
3080             self.start = datetime.datetime.min.date()
3081         if end is not None:
3082             self.end = date_from_str(end)
3083         else:
3084             self.end = datetime.datetime.max.date()
3085         if self.start > self.end:
3086             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3087
3088     @classmethod
3089     def day(cls, day):
3090         """Returns a range that only contains the given day"""
3091         return cls(day, day)
3092
3093     def __contains__(self, date):
3094         """Check if the date is in the range"""
3095         if not isinstance(date, datetime.date):
3096             date = date_from_str(date)
3097         return self.start <= date <= self.end
3098
3099     def __str__(self):
3100         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3101
3102
3103 def platform_name():
3104     """ Returns the platform name as a compat_str """
3105     res = platform.platform()
3106     if isinstance(res, bytes):
3107         res = res.decode(preferredencoding())
3108
3109     assert isinstance(res, compat_str)
3110     return res
3111
3112
3113 def _windows_write_string(s, out):
3114     """ Returns True if the string was written using special methods,
3115     False if it has yet to be written out."""
3116     # Adapted from http://stackoverflow.com/a/3259271/35070
3117
3118     import ctypes
3119     import ctypes.wintypes
3120
3121     WIN_OUTPUT_IDS = {
3122         1: -11,
3123         2: -12,
3124     }
3125
3126     try:
3127         fileno = out.fileno()
3128     except AttributeError:
3129         # If the output stream doesn't have a fileno, it's virtual
3130         return False
3131     except io.UnsupportedOperation:
3132         # Some strange Windows pseudo files?
3133         return False
3134     if fileno not in WIN_OUTPUT_IDS:
3135         return False
3136
3137     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3138         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3139         ('GetStdHandle', ctypes.windll.kernel32))
3140     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3141
3142     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3143         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3144         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3145         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3146     written = ctypes.wintypes.DWORD(0)
3147
3148     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3149     FILE_TYPE_CHAR = 0x0002
3150     FILE_TYPE_REMOTE = 0x8000
3151     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3152         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3153         ctypes.POINTER(ctypes.wintypes.DWORD))(
3154         ('GetConsoleMode', ctypes.windll.kernel32))
3155     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3156
3157     def not_a_console(handle):
3158         if handle == INVALID_HANDLE_VALUE or handle is None:
3159             return True
3160         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3161                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3162
3163     if not_a_console(h):
3164         return False
3165
3166     def next_nonbmp_pos(s):
3167         try:
3168             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3169         except StopIteration:
3170             return len(s)
3171
3172     while s:
3173         count = min(next_nonbmp_pos(s), 1024)
3174
3175         ret = WriteConsoleW(
3176             h, s, count if count else 2, ctypes.byref(written), None)
3177         if ret == 0:
3178             raise OSError('Failed to write string')
3179         if not count:  # We just wrote a non-BMP character
3180             assert written.value == 2
3181             s = s[1:]
3182         else:
3183             assert written.value > 0
3184             s = s[written.value:]
3185     return True
3186
3187
3188 def write_string(s, out=None, encoding=None):
3189     if out is None:
3190         out = sys.stderr
3191     assert type(s) == compat_str
3192
3193     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3194         if _windows_write_string(s, out):
3195             return
3196
3197     if ('b' in getattr(out, 'mode', '')
3198             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3199         byt = s.encode(encoding or preferredencoding(), 'ignore')
3200         out.write(byt)
3201     elif hasattr(out, 'buffer'):
3202         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3203         byt = s.encode(enc, 'ignore')
3204         out.buffer.write(byt)
3205     else:
3206         out.write(s)
3207     out.flush()
3208
3209
3210 def bytes_to_intlist(bs):
3211     if not bs:
3212         return []
3213     if isinstance(bs[0], int):  # Python 3
3214         return list(bs)
3215     else:
3216         return [ord(c) for c in bs]
3217
3218
3219 def intlist_to_bytes(xs):
3220     if not xs:
3221         return b''
3222     return compat_struct_pack('%dB' % len(xs), *xs)
3223
3224
3225 # Cross-platform file locking
3226 if sys.platform == 'win32':
3227     import ctypes.wintypes
3228     import msvcrt
3229
3230     class OVERLAPPED(ctypes.Structure):
3231         _fields_ = [
3232             ('Internal', ctypes.wintypes.LPVOID),
3233             ('InternalHigh', ctypes.wintypes.LPVOID),
3234             ('Offset', ctypes.wintypes.DWORD),
3235             ('OffsetHigh', ctypes.wintypes.DWORD),
3236             ('hEvent', ctypes.wintypes.HANDLE),
3237         ]
3238
3239     kernel32 = ctypes.windll.kernel32
3240     LockFileEx = kernel32.LockFileEx
3241     LockFileEx.argtypes = [
3242         ctypes.wintypes.HANDLE,     # hFile
3243         ctypes.wintypes.DWORD,      # dwFlags
3244         ctypes.wintypes.DWORD,      # dwReserved
3245         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3246         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3247         ctypes.POINTER(OVERLAPPED)  # Overlapped
3248     ]
3249     LockFileEx.restype = ctypes.wintypes.BOOL
3250     UnlockFileEx = kernel32.UnlockFileEx
3251     UnlockFileEx.argtypes = [
3252         ctypes.wintypes.HANDLE,     # hFile
3253         ctypes.wintypes.DWORD,      # dwReserved
3254         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3255         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3256         ctypes.POINTER(OVERLAPPED)  # Overlapped
3257     ]
3258     UnlockFileEx.restype = ctypes.wintypes.BOOL
3259     whole_low = 0xffffffff
3260     whole_high = 0x7fffffff
3261
3262     def _lock_file(f, exclusive):
3263         overlapped = OVERLAPPED()
3264         overlapped.Offset = 0
3265         overlapped.OffsetHigh = 0
3266         overlapped.hEvent = 0
3267         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3268         handle = msvcrt.get_osfhandle(f.fileno())
3269         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3270                           whole_low, whole_high, f._lock_file_overlapped_p):
3271             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3272
3273     def _unlock_file(f):
3274         assert f._lock_file_overlapped_p
3275         handle = msvcrt.get_osfhandle(f.fileno())
3276         if not UnlockFileEx(handle, 0,
3277                             whole_low, whole_high, f._lock_file_overlapped_p):
3278             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3279
3280 else:
3281     # Some platforms, such as Jython, is missing fcntl
3282     try:
3283         import fcntl
3284
3285         def _lock_file(f, exclusive):
3286             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3287
3288         def _unlock_file(f):
3289             fcntl.flock(f, fcntl.LOCK_UN)
3290     except ImportError:
3291         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3292
3293         def _lock_file(f, exclusive):
3294             raise IOError(UNSUPPORTED_MSG)
3295
3296         def _unlock_file(f):
3297             raise IOError(UNSUPPORTED_MSG)
3298
3299
3300 class locked_file(object):
3301     def __init__(self, filename, mode, encoding=None):
3302         assert mode in ['r', 'a', 'w']
3303         self.f = io.open(filename, mode, encoding=encoding)
3304         self.mode = mode
3305
3306     def __enter__(self):
3307         exclusive = self.mode != 'r'
3308         try:
3309             _lock_file(self.f, exclusive)
3310         except IOError:
3311             self.f.close()
3312             raise
3313         return self
3314
3315     def __exit__(self, etype, value, traceback):
3316         try:
3317             _unlock_file(self.f)
3318         finally:
3319             self.f.close()
3320
3321     def __iter__(self):
3322         return iter(self.f)
3323
3324     def write(self, *args):
3325         return self.f.write(*args)
3326
3327     def read(self, *args):
3328         return self.f.read(*args)
3329
3330
3331 def get_filesystem_encoding():
3332     encoding = sys.getfilesystemencoding()
3333     return encoding if encoding is not None else 'utf-8'
3334
3335
3336 def shell_quote(args):
3337     quoted_args = []
3338     encoding = get_filesystem_encoding()
3339     for a in args:
3340         if isinstance(a, bytes):
3341             # We may get a filename encoded with 'encodeFilename'
3342             a = a.decode(encoding)
3343         quoted_args.append(compat_shlex_quote(a))
3344     return ' '.join(quoted_args)
3345
3346
3347 def smuggle_url(url, data):
3348     """ Pass additional data in a URL for internal use. """
3349
3350     url, idata = unsmuggle_url(url, {})
3351     data.update(idata)
3352     sdata = compat_urllib_parse_urlencode(
3353         {'__youtubedl_smuggle': json.dumps(data)})
3354     return url + '#' + sdata
3355
3356
3357 def unsmuggle_url(smug_url, default=None):
3358     if '#__youtubedl_smuggle' not in smug_url:
3359         return smug_url, default
3360     url, _, sdata = smug_url.rpartition('#')
3361     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3362     data = json.loads(jsond)
3363     return url, data
3364
3365
3366 def format_bytes(bytes):
3367     if bytes is None:
3368         return 'N/A'
3369     if type(bytes) is str:
3370         bytes = float(bytes)
3371     if bytes == 0.0:
3372         exponent = 0
3373     else:
3374         exponent = int(math.log(bytes, 1024.0))
3375     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3376     converted = float(bytes) / float(1024 ** exponent)
3377     return '%.2f%s' % (converted, suffix)
3378
3379
3380 def lookup_unit_table(unit_table, s):
3381     units_re = '|'.join(re.escape(u) for u in unit_table)
3382     m = re.match(
3383         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3384     if not m:
3385         return None
3386     num_str = m.group('num').replace(',', '.')
3387     mult = unit_table[m.group('unit')]
3388     return int(float(num_str) * mult)
3389
3390
3391 def parse_filesize(s):
3392     if s is None:
3393         return None
3394
3395     # The lower-case forms are of course incorrect and unofficial,
3396     # but we support those too
3397     _UNIT_TABLE = {
3398         'B': 1,
3399         'b': 1,
3400         'bytes': 1,
3401         'KiB': 1024,
3402         'KB': 1000,
3403         'kB': 1024,
3404         'Kb': 1000,
3405         'kb': 1000,
3406         'kilobytes': 1000,
3407         'kibibytes': 1024,
3408         'MiB': 1024 ** 2,
3409         'MB': 1000 ** 2,
3410         'mB': 1024 ** 2,
3411         'Mb': 1000 ** 2,
3412         'mb': 1000 ** 2,
3413         'megabytes': 1000 ** 2,
3414         'mebibytes': 1024 ** 2,
3415         'GiB': 1024 ** 3,
3416         'GB': 1000 ** 3,
3417         'gB': 1024 ** 3,
3418         'Gb': 1000 ** 3,
3419         'gb': 1000 ** 3,
3420         'gigabytes': 1000 ** 3,
3421         'gibibytes': 1024 ** 3,
3422         'TiB': 1024 ** 4,
3423         'TB': 1000 ** 4,
3424         'tB': 1024 ** 4,
3425         'Tb': 1000 ** 4,
3426         'tb': 1000 ** 4,
3427         'terabytes': 1000 ** 4,
3428         'tebibytes': 1024 ** 4,
3429         'PiB': 1024 ** 5,
3430         'PB': 1000 ** 5,
3431         'pB': 1024 ** 5,
3432         'Pb': 1000 ** 5,
3433         'pb': 1000 ** 5,
3434         'petabytes': 1000 ** 5,
3435         'pebibytes': 1024 ** 5,
3436         'EiB': 1024 ** 6,
3437         'EB': 1000 ** 6,
3438         'eB': 1024 ** 6,
3439         'Eb': 1000 ** 6,
3440         'eb': 1000 ** 6,
3441         'exabytes': 1000 ** 6,
3442         'exbibytes': 1024 ** 6,
3443         'ZiB': 1024 ** 7,
3444         'ZB': 1000 ** 7,
3445         'zB': 1024 ** 7,
3446         'Zb': 1000 ** 7,
3447         'zb': 1000 ** 7,
3448         'zettabytes': 1000 ** 7,
3449         'zebibytes': 1024 ** 7,
3450         'YiB': 1024 ** 8,
3451         'YB': 1000 ** 8,
3452         'yB': 1024 ** 8,
3453         'Yb': 1000 ** 8,
3454         'yb': 1000 ** 8,
3455         'yottabytes': 1000 ** 8,
3456         'yobibytes': 1024 ** 8,
3457     }
3458
3459     return lookup_unit_table(_UNIT_TABLE, s)
3460
3461
3462 def parse_count(s):
3463     if s is None:
3464         return None
3465
3466     s = s.strip()
3467
3468     if re.match(r'^[\d,.]+$', s):
3469         return str_to_int(s)
3470
3471     _UNIT_TABLE = {
3472         'k': 1000,
3473         'K': 1000,
3474         'm': 1000 ** 2,
3475         'M': 1000 ** 2,
3476         'kk': 1000 ** 2,
3477         'KK': 1000 ** 2,
3478     }
3479
3480     return lookup_unit_table(_UNIT_TABLE, s)
3481
3482
3483 def parse_resolution(s):
3484     if s is None:
3485         return {}
3486
3487     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3488     if mobj:
3489         return {
3490             'width': int(mobj.group('w')),
3491             'height': int(mobj.group('h')),
3492         }
3493
3494     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3495     if mobj:
3496         return {'height': int(mobj.group(1))}
3497
3498     mobj = re.search(r'\b([48])[kK]\b', s)
3499     if mobj:
3500         return {'height': int(mobj.group(1)) * 540}
3501
3502     return {}
3503
3504
3505 def parse_bitrate(s):
3506     if not isinstance(s, compat_str):
3507         return
3508     mobj = re.search(r'\b(\d+)\s*kbps', s)
3509     if mobj:
3510         return int(mobj.group(1))
3511
3512
3513 def month_by_name(name, lang='en'):
3514     """ Return the number of a month by (locale-independently) English name """
3515
3516     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3517
3518     try:
3519         return month_names.index(name) + 1
3520     except ValueError:
3521         return None
3522
3523
3524 def month_by_abbreviation(abbrev):
3525     """ Return the number of a month by (locale-independently) English
3526         abbreviations """
3527
3528     try:
3529         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3530     except ValueError:
3531         return None
3532
3533
3534 def fix_xml_ampersands(xml_str):
3535     """Replace all the '&' by '&amp;' in XML"""
3536     return re.sub(
3537         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3538         '&amp;',
3539         xml_str)
3540
3541
3542 def setproctitle(title):
3543     assert isinstance(title, compat_str)
3544
3545     # ctypes in Jython is not complete
3546     # http://bugs.jython.org/issue2148
3547     if sys.platform.startswith('java'):
3548         return
3549
3550     try:
3551         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3552     except OSError:
3553         return
3554     except TypeError:
3555         # LoadLibrary in Windows Python 2.7.13 only expects
3556         # a bytestring, but since unicode_literals turns
3557         # every string into a unicode string, it fails.
3558         return
3559     title_bytes = title.encode('utf-8')
3560     buf = ctypes.create_string_buffer(len(title_bytes))
3561     buf.value = title_bytes
3562     try:
3563         libc.prctl(15, buf, 0, 0, 0)
3564     except AttributeError:
3565         return  # Strange libc, just skip this
3566
3567
3568 def remove_start(s, start):
3569     return s[len(start):] if s is not None and s.startswith(start) else s
3570
3571
3572 def remove_end(s, end):
3573     return s[:-len(end)] if s is not None and s.endswith(end) else s
3574
3575
3576 def remove_quotes(s):
3577     if s is None or len(s) < 2:
3578         return s
3579     for quote in ('"', "'", ):
3580         if s[0] == quote and s[-1] == quote:
3581             return s[1:-1]
3582     return s
3583
3584
3585 def get_domain(url):
3586     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3587     return domain.group('domain') if domain else None
3588
3589
3590 def url_basename(url):
3591     path = compat_urlparse.urlparse(url).path
3592     return path.strip('/').split('/')[-1]
3593
3594
3595 def base_url(url):
3596     return re.match(r'https?://[^?#&]+/', url).group()
3597
3598
3599 def urljoin(base, path):
3600     if isinstance(path, bytes):
3601         path = path.decode('utf-8')
3602     if not isinstance(path, compat_str) or not path:
3603         return None
3604     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3605         return path
3606     if isinstance(base, bytes):
3607         base = base.decode('utf-8')
3608     if not isinstance(base, compat_str) or not re.match(
3609             r'^(?:https?:)?//', base):
3610         return None
3611     return compat_urlparse.urljoin(base, path)
3612
3613
3614 class HEADRequest(compat_urllib_request.Request):
3615     def get_method(self):
3616         return 'HEAD'
3617
3618
3619 class PUTRequest(compat_urllib_request.Request):
3620     def get_method(self):
3621         return 'PUT'
3622
3623
3624 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3625     if get_attr:
3626         if v is not None:
3627             v = getattr(v, get_attr, None)
3628     if v == '':
3629         v = None
3630     if v is None:
3631         return default
3632     try:
3633         return int(v) * invscale // scale
3634     except (ValueError, TypeError):
3635         return default
3636
3637
3638 def str_or_none(v, default=None):
3639     return default if v is None else compat_str(v)
3640
3641
3642 def str_to_int(int_str):
3643     """ A more relaxed version of int_or_none """
3644     if isinstance(int_str, compat_integer_types):
3645         return int_str
3646     elif isinstance(int_str, compat_str):
3647         int_str = re.sub(r'[,\.\+]', '', int_str)
3648         return int_or_none(int_str)
3649
3650
3651 def float_or_none(v, scale=1, invscale=1, default=None):
3652     if v is None:
3653         return default
3654     try:
3655         return float(v) * invscale / scale
3656     except (ValueError, TypeError):
3657         return default
3658
3659
3660 def bool_or_none(v, default=None):
3661     return v if isinstance(v, bool) else default
3662
3663
3664 def strip_or_none(v, default=None):
3665     return v.strip() if isinstance(v, compat_str) else default
3666
3667
3668 def url_or_none(url):
3669     if not url or not isinstance(url, compat_str):
3670         return None
3671     url = url.strip()
3672     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3673
3674
3675 def parse_duration(s):
3676     if not isinstance(s, compat_basestring):
3677         return None
3678
3679     s = s.strip()
3680
3681     days, hours, mins, secs, ms = [None] * 5
3682     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3683     if m:
3684         days, hours, mins, secs, ms = m.groups()
3685     else:
3686         m = re.match(
3687             r'''(?ix)(?:P?
3688                 (?:
3689                     [0-9]+\s*y(?:ears?)?\s*
3690                 )?
3691                 (?:
3692                     [0-9]+\s*m(?:onths?)?\s*
3693                 )?
3694                 (?:
3695                     [0-9]+\s*w(?:eeks?)?\s*
3696                 )?
3697                 (?:
3698                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3699                 )?
3700                 T)?
3701                 (?:
3702                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3703                 )?
3704                 (?:
3705                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3706                 )?
3707                 (?:
3708                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3709                 )?Z?$''', s)
3710         if m:
3711             days, hours, mins, secs, ms = m.groups()
3712         else:
3713             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3714             if m:
3715                 hours, mins = m.groups()
3716             else:
3717                 return None
3718
3719     duration = 0
3720     if secs:
3721         duration += float(secs)
3722     if mins:
3723         duration += float(mins) * 60
3724     if hours:
3725         duration += float(hours) * 60 * 60
3726     if days:
3727         duration += float(days) * 24 * 60 * 60
3728     if ms:
3729         duration += float(ms)
3730     return duration
3731
3732
3733 def prepend_extension(filename, ext, expected_real_ext=None):
3734     name, real_ext = os.path.splitext(filename)
3735     return (
3736         '{0}.{1}{2}'.format(name, ext, real_ext)
3737         if not expected_real_ext or real_ext[1:] == expected_real_ext
3738         else '{0}.{1}'.format(filename, ext))
3739
3740
3741 def replace_extension(filename, ext, expected_real_ext=None):
3742     name, real_ext = os.path.splitext(filename)
3743     return '{0}.{1}'.format(
3744         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3745         ext)
3746
3747
3748 def check_executable(exe, args=[]):
3749     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3750     args can be a list of arguments for a short output (like -version) """
3751     try:
3752         process_communicate_or_kill(subprocess.Popen(
3753             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3754     except OSError:
3755         return False
3756     return exe
3757
3758
3759 def get_exe_version(exe, args=['--version'],
3760                     version_re=None, unrecognized='present'):
3761     """ Returns the version of the specified executable,
3762     or False if the executable is not present """
3763     try:
3764         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3765         # SIGTTOU if youtube-dlc is run in the background.
3766         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3767         out, _ = process_communicate_or_kill(subprocess.Popen(
3768             [encodeArgument(exe)] + args,
3769             stdin=subprocess.PIPE,
3770             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3771     except OSError:
3772         return False
3773     if isinstance(out, bytes):  # Python 2.x
3774         out = out.decode('ascii', 'ignore')
3775     return detect_exe_version(out, version_re, unrecognized)
3776
3777
3778 def detect_exe_version(output, version_re=None, unrecognized='present'):
3779     assert isinstance(output, compat_str)
3780     if version_re is None:
3781         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3782     m = re.search(version_re, output)
3783     if m:
3784         return m.group(1)
3785     else:
3786         return unrecognized
3787
3788
3789 class PagedList(object):
3790     def __len__(self):
3791         # This is only useful for tests
3792         return len(self.getslice())
3793
3794
3795 class OnDemandPagedList(PagedList):
3796     def __init__(self, pagefunc, pagesize, use_cache=True):
3797         self._pagefunc = pagefunc
3798         self._pagesize = pagesize
3799         self._use_cache = use_cache
3800         if use_cache:
3801             self._cache = {}
3802
3803     def getslice(self, start=0, end=None):
3804         res = []
3805         for pagenum in itertools.count(start // self._pagesize):
3806             firstid = pagenum * self._pagesize
3807             nextfirstid = pagenum * self._pagesize + self._pagesize
3808             if start >= nextfirstid:
3809                 continue
3810
3811             page_results = None
3812             if self._use_cache:
3813                 page_results = self._cache.get(pagenum)
3814             if page_results is None:
3815                 page_results = list(self._pagefunc(pagenum))
3816             if self._use_cache:
3817                 self._cache[pagenum] = page_results
3818
3819             startv = (
3820                 start % self._pagesize
3821                 if firstid <= start < nextfirstid
3822                 else 0)
3823
3824             endv = (
3825                 ((end - 1) % self._pagesize) + 1
3826                 if (end is not None and firstid <= end <= nextfirstid)
3827                 else None)
3828
3829             if startv != 0 or endv is not None:
3830                 page_results = page_results[startv:endv]
3831             res.extend(page_results)
3832
3833             # A little optimization - if current page is not "full", ie. does
3834             # not contain page_size videos then we can assume that this page
3835             # is the last one - there are no more ids on further pages -
3836             # i.e. no need to query again.
3837             if len(page_results) + startv < self._pagesize:
3838                 break
3839
3840             # If we got the whole page, but the next page is not interesting,
3841             # break out early as well
3842             if end == nextfirstid:
3843                 break
3844         return res
3845
3846
3847 class InAdvancePagedList(PagedList):
3848     def __init__(self, pagefunc, pagecount, pagesize):
3849         self._pagefunc = pagefunc
3850         self._pagecount = pagecount
3851         self._pagesize = pagesize
3852
3853     def getslice(self, start=0, end=None):
3854         res = []
3855         start_page = start // self._pagesize
3856         end_page = (
3857             self._pagecount if end is None else (end // self._pagesize + 1))
3858         skip_elems = start - start_page * self._pagesize
3859         only_more = None if end is None else end - start
3860         for pagenum in range(start_page, end_page):
3861             page = list(self._pagefunc(pagenum))
3862             if skip_elems:
3863                 page = page[skip_elems:]
3864                 skip_elems = None
3865             if only_more is not None:
3866                 if len(page) < only_more:
3867                     only_more -= len(page)
3868                 else:
3869                     page = page[:only_more]
3870                     res.extend(page)
3871                     break
3872             res.extend(page)
3873         return res
3874
3875
3876 def uppercase_escape(s):
3877     unicode_escape = codecs.getdecoder('unicode_escape')
3878     return re.sub(
3879         r'\\U[0-9a-fA-F]{8}',
3880         lambda m: unicode_escape(m.group(0))[0],
3881         s)
3882
3883
3884 def lowercase_escape(s):
3885     unicode_escape = codecs.getdecoder('unicode_escape')
3886     return re.sub(
3887         r'\\u[0-9a-fA-F]{4}',
3888         lambda m: unicode_escape(m.group(0))[0],
3889         s)
3890
3891
3892 def escape_rfc3986(s):
3893     """Escape non-ASCII characters as suggested by RFC 3986"""
3894     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3895         s = s.encode('utf-8')
3896     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3897
3898
3899 def escape_url(url):
3900     """Escape URL as suggested by RFC 3986"""
3901     url_parsed = compat_urllib_parse_urlparse(url)
3902     return url_parsed._replace(
3903         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3904         path=escape_rfc3986(url_parsed.path),
3905         params=escape_rfc3986(url_parsed.params),
3906         query=escape_rfc3986(url_parsed.query),
3907         fragment=escape_rfc3986(url_parsed.fragment)
3908     ).geturl()
3909
3910
3911 def read_batch_urls(batch_fd):
3912     def fixup(url):
3913         if not isinstance(url, compat_str):
3914             url = url.decode('utf-8', 'replace')
3915         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3916         for bom in BOM_UTF8:
3917             if url.startswith(bom):
3918                 url = url[len(bom):]
3919         url = url.lstrip()
3920         if not url or url.startswith(('#', ';', ']')):
3921             return False
3922         # "#" cannot be stripped out since it is part of the URI
3923         # However, it can be safely stipped out if follwing a whitespace
3924         return re.split(r'\s#', url, 1)[0].rstrip()
3925
3926     with contextlib.closing(batch_fd) as fd:
3927         return [url for url in map(fixup, fd) if url]
3928
3929
3930 def urlencode_postdata(*args, **kargs):
3931     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3932
3933
3934 def update_url_query(url, query):
3935     if not query:
3936         return url
3937     parsed_url = compat_urlparse.urlparse(url)
3938     qs = compat_parse_qs(parsed_url.query)
3939     qs.update(query)
3940     return compat_urlparse.urlunparse(parsed_url._replace(
3941         query=compat_urllib_parse_urlencode(qs, True)))
3942
3943
3944 def update_Request(req, url=None, data=None, headers={}, query={}):
3945     req_headers = req.headers.copy()
3946     req_headers.update(headers)
3947     req_data = data or req.data
3948     req_url = update_url_query(url or req.get_full_url(), query)
3949     req_get_method = req.get_method()
3950     if req_get_method == 'HEAD':
3951         req_type = HEADRequest
3952     elif req_get_method == 'PUT':
3953         req_type = PUTRequest
3954     else:
3955         req_type = compat_urllib_request.Request
3956     new_req = req_type(
3957         req_url, data=req_data, headers=req_headers,
3958         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3959     if hasattr(req, 'timeout'):
3960         new_req.timeout = req.timeout
3961     return new_req
3962
3963
3964 def _multipart_encode_impl(data, boundary):
3965     content_type = 'multipart/form-data; boundary=%s' % boundary
3966
3967     out = b''
3968     for k, v in data.items():
3969         out += b'--' + boundary.encode('ascii') + b'\r\n'
3970         if isinstance(k, compat_str):
3971             k = k.encode('utf-8')
3972         if isinstance(v, compat_str):
3973             v = v.encode('utf-8')
3974         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3975         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3976         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3977         if boundary.encode('ascii') in content:
3978             raise ValueError('Boundary overlaps with data')
3979         out += content
3980
3981     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3982
3983     return out, content_type
3984
3985
3986 def multipart_encode(data, boundary=None):
3987     '''
3988     Encode a dict to RFC 7578-compliant form-data
3989
3990     data:
3991         A dict where keys and values can be either Unicode or bytes-like
3992         objects.
3993     boundary:
3994         If specified a Unicode object, it's used as the boundary. Otherwise
3995         a random boundary is generated.
3996
3997     Reference: https://tools.ietf.org/html/rfc7578
3998     '''
3999     has_specified_boundary = boundary is not None
4000
4001     while True:
4002         if boundary is None:
4003             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4004
4005         try:
4006             out, content_type = _multipart_encode_impl(data, boundary)
4007             break
4008         except ValueError:
4009             if has_specified_boundary:
4010                 raise
4011             boundary = None
4012
4013     return out, content_type
4014
4015
4016 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4017     if isinstance(key_or_keys, (list, tuple)):
4018         for key in key_or_keys:
4019             if key not in d or d[key] is None or skip_false_values and not d[key]:
4020                 continue
4021             return d[key]
4022         return default
4023     return d.get(key_or_keys, default)
4024
4025
4026 def try_get(src, getter, expected_type=None):
4027     if not isinstance(getter, (list, tuple)):
4028         getter = [getter]
4029     for get in getter:
4030         try:
4031             v = get(src)
4032         except (AttributeError, KeyError, TypeError, IndexError):
4033             pass
4034         else:
4035             if expected_type is None or isinstance(v, expected_type):
4036                 return v
4037
4038
4039 def merge_dicts(*dicts):
4040     merged = {}
4041     for a_dict in dicts:
4042         for k, v in a_dict.items():
4043             if v is None:
4044                 continue
4045             if (k not in merged
4046                     or (isinstance(v, compat_str) and v
4047                         and isinstance(merged[k], compat_str)
4048                         and not merged[k])):
4049                 merged[k] = v
4050     return merged
4051
4052
4053 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4054     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4055
4056
4057 US_RATINGS = {
4058     'G': 0,
4059     'PG': 10,
4060     'PG-13': 13,
4061     'R': 16,
4062     'NC': 18,
4063 }
4064
4065
4066 TV_PARENTAL_GUIDELINES = {
4067     'TV-Y': 0,
4068     'TV-Y7': 7,
4069     'TV-G': 0,
4070     'TV-PG': 0,
4071     'TV-14': 14,
4072     'TV-MA': 17,
4073 }
4074
4075
4076 def parse_age_limit(s):
4077     if type(s) == int:
4078         return s if 0 <= s <= 21 else None
4079     if not isinstance(s, compat_basestring):
4080         return None
4081     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4082     if m:
4083         return int(m.group('age'))
4084     if s in US_RATINGS:
4085         return US_RATINGS[s]
4086     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4087     if m:
4088         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4089     return None
4090
4091
4092 def strip_jsonp(code):
4093     return re.sub(
4094         r'''(?sx)^
4095             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4096             (?:\s*&&\s*(?P=func_name))?
4097             \s*\(\s*(?P<callback_data>.*)\);?
4098             \s*?(?://[^\n]*)*$''',
4099         r'\g<callback_data>', code)
4100
4101
4102 def js_to_json(code):
4103     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4104     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4105     INTEGER_TABLE = (
4106         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4107         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4108     )
4109
4110     def fix_kv(m):
4111         v = m.group(0)
4112         if v in ('true', 'false', 'null'):
4113             return v
4114         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4115             return ""
4116
4117         if v[0] in ("'", '"'):
4118             v = re.sub(r'(?s)\\.|"', lambda m: {
4119                 '"': '\\"',
4120                 "\\'": "'",
4121                 '\\\n': '',
4122                 '\\x': '\\u00',
4123             }.get(m.group(0), m.group(0)), v[1:-1])
4124         else:
4125             for regex, base in INTEGER_TABLE:
4126                 im = re.match(regex, v)
4127                 if im:
4128                     i = int(im.group(1), base)
4129                     return '"%d":' % i if v.endswith(':') else '%d' % i
4130
4131         return '"%s"' % v
4132
4133     return re.sub(r'''(?sx)
4134         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4135         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4136         {comment}|,(?={skip}[\]}}])|
4137         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4138         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4139         [0-9]+(?={skip}:)|
4140         !+
4141         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4142
4143
4144 def qualities(quality_ids):
4145     """ Get a numeric quality value out of a list of possible values """
4146     def q(qid):
4147         try:
4148             return quality_ids.index(qid)
4149         except ValueError:
4150             return -1
4151     return q
4152
4153
4154 DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
4155
4156
4157 def limit_length(s, length):
4158     """ Add ellipses to overly long strings """
4159     if s is None:
4160         return None
4161     ELLIPSES = '...'
4162     if len(s) > length:
4163         return s[:length - len(ELLIPSES)] + ELLIPSES
4164     return s
4165
4166
4167 def version_tuple(v):
4168     return tuple(int(e) for e in re.split(r'[-.]', v))
4169
4170
4171 def is_outdated_version(version, limit, assume_new=True):
4172     if not version:
4173         return not assume_new
4174     try:
4175         return version_tuple(version) < version_tuple(limit)
4176     except ValueError:
4177         return not assume_new
4178
4179
4180 def ytdl_is_updateable():
4181     """ Returns if youtube-dlc can be updated with -U """
4182     return False
4183
4184     from zipimport import zipimporter
4185
4186     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4187
4188
4189 def args_to_str(args):
4190     # Get a short string representation for a subprocess command
4191     return ' '.join(compat_shlex_quote(a) for a in args)
4192
4193
4194 def error_to_compat_str(err):
4195     err_str = str(err)
4196     # On python 2 error byte string must be decoded with proper
4197     # encoding rather than ascii
4198     if sys.version_info[0] < 3:
4199         err_str = err_str.decode(preferredencoding())
4200     return err_str
4201
4202
4203 def mimetype2ext(mt):
4204     if mt is None:
4205         return None
4206
4207     ext = {
4208         'audio/mp4': 'm4a',
4209         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4210         # it's the most popular one
4211         'audio/mpeg': 'mp3',
4212         'audio/x-wav': 'wav',
4213     }.get(mt)
4214     if ext is not None:
4215         return ext
4216
4217     _, _, res = mt.rpartition('/')
4218     res = res.split(';')[0].strip().lower()
4219
4220     return {
4221         '3gpp': '3gp',
4222         'smptett+xml': 'tt',
4223         'ttaf+xml': 'dfxp',
4224         'ttml+xml': 'ttml',
4225         'x-flv': 'flv',
4226         'x-mp4-fragmented': 'mp4',
4227         'x-ms-sami': 'sami',
4228         'x-ms-wmv': 'wmv',
4229         'mpegurl': 'm3u8',
4230         'x-mpegurl': 'm3u8',
4231         'vnd.apple.mpegurl': 'm3u8',
4232         'dash+xml': 'mpd',
4233         'f4m+xml': 'f4m',
4234         'hds+xml': 'f4m',
4235         'vnd.ms-sstr+xml': 'ism',
4236         'quicktime': 'mov',
4237         'mp2t': 'ts',
4238         'x-wav': 'wav',
4239     }.get(res, res)
4240
4241
4242 def parse_codecs(codecs_str):
4243     # http://tools.ietf.org/html/rfc6381
4244     if not codecs_str:
4245         return {}
4246     split_codecs = list(filter(None, map(
4247         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4248     vcodec, acodec = None, None
4249     for full_codec in split_codecs:
4250         codec = full_codec.split('.')[0]
4251         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4252             if not vcodec:
4253                 vcodec = full_codec
4254         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4255             if not acodec:
4256                 acodec = full_codec
4257         else:
4258             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4259     if not vcodec and not acodec:
4260         if len(split_codecs) == 2:
4261             return {
4262                 'vcodec': split_codecs[0],
4263                 'acodec': split_codecs[1],
4264             }
4265     else:
4266         return {
4267             'vcodec': vcodec or 'none',
4268             'acodec': acodec or 'none',
4269         }
4270     return {}
4271
4272
4273 def urlhandle_detect_ext(url_handle):
4274     getheader = url_handle.headers.get
4275
4276     cd = getheader('Content-Disposition')
4277     if cd:
4278         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4279         if m:
4280             e = determine_ext(m.group('filename'), default_ext=None)
4281             if e:
4282                 return e
4283
4284     return mimetype2ext(getheader('Content-Type'))
4285
4286
4287 def encode_data_uri(data, mime_type):
4288     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4289
4290
4291 def age_restricted(content_limit, age_limit):
4292     """ Returns True iff the content should be blocked """
4293
4294     if age_limit is None:  # No limit set
4295         return False
4296     if content_limit is None:
4297         return False  # Content available for everyone
4298     return age_limit < content_limit
4299
4300
4301 def is_html(first_bytes):
4302     """ Detect whether a file contains HTML by examining its first bytes. """
4303
4304     BOMS = [
4305         (b'\xef\xbb\xbf', 'utf-8'),
4306         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4307         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4308         (b'\xff\xfe', 'utf-16-le'),
4309         (b'\xfe\xff', 'utf-16-be'),
4310     ]
4311     for bom, enc in BOMS:
4312         if first_bytes.startswith(bom):
4313             s = first_bytes[len(bom):].decode(enc, 'replace')
4314             break
4315     else:
4316         s = first_bytes.decode('utf-8', 'replace')
4317
4318     return re.match(r'^\s*<', s)
4319
4320
4321 def determine_protocol(info_dict):
4322     protocol = info_dict.get('protocol')
4323     if protocol is not None:
4324         return protocol
4325
4326     url = info_dict['url']
4327     if url.startswith('rtmp'):
4328         return 'rtmp'
4329     elif url.startswith('mms'):
4330         return 'mms'
4331     elif url.startswith('rtsp'):
4332         return 'rtsp'
4333
4334     ext = determine_ext(url)
4335     if ext == 'm3u8':
4336         return 'm3u8'
4337     elif ext == 'f4m':
4338         return 'f4m'
4339
4340     return compat_urllib_parse_urlparse(url).scheme
4341
4342
4343 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4344     """ Render a list of rows, each as a list of values """
4345
4346     def get_max_lens(table):
4347         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4348
4349     def filter_using_list(row, filterArray):
4350         return [col for (take, col) in zip(filterArray, row) if take]
4351
4352     if hideEmpty:
4353         max_lens = get_max_lens(data)
4354         header_row = filter_using_list(header_row, max_lens)
4355         data = [filter_using_list(row, max_lens) for row in data]
4356
4357     table = [header_row] + data
4358     max_lens = get_max_lens(table)
4359     if delim:
4360         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4361     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4362     return '\n'.join(format_str % tuple(row) for row in table)
4363
4364
4365 def _match_one(filter_part, dct):
4366     COMPARISON_OPERATORS = {
4367         '<': operator.lt,
4368         '<=': operator.le,
4369         '>': operator.gt,
4370         '>=': operator.ge,
4371         '=': operator.eq,
4372         '!=': operator.ne,
4373     }
4374     operator_rex = re.compile(r'''(?x)\s*
4375         (?P<key>[a-z_]+)
4376         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4377         (?:
4378             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4379             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4380             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4381         )
4382         \s*$
4383         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4384     m = operator_rex.search(filter_part)
4385     if m:
4386         op = COMPARISON_OPERATORS[m.group('op')]
4387         actual_value = dct.get(m.group('key'))
4388         if (m.group('quotedstrval') is not None
4389             or m.group('strval') is not None
4390             # If the original field is a string and matching comparisonvalue is
4391             # a number we should respect the origin of the original field
4392             # and process comparison value as a string (see
4393             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4394             or actual_value is not None and m.group('intval') is not None
4395                 and isinstance(actual_value, compat_str)):
4396             if m.group('op') not in ('=', '!='):
4397                 raise ValueError(
4398                     'Operator %s does not support string values!' % m.group('op'))
4399             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4400             quote = m.group('quote')
4401             if quote is not None:
4402                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4403         else:
4404             try:
4405                 comparison_value = int(m.group('intval'))
4406             except ValueError:
4407                 comparison_value = parse_filesize(m.group('intval'))
4408                 if comparison_value is None:
4409                     comparison_value = parse_filesize(m.group('intval') + 'B')
4410                 if comparison_value is None:
4411                     raise ValueError(
4412                         'Invalid integer value %r in filter part %r' % (
4413                             m.group('intval'), filter_part))
4414         if actual_value is None:
4415             return m.group('none_inclusive')
4416         return op(actual_value, comparison_value)
4417
4418     UNARY_OPERATORS = {
4419         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4420         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4421     }
4422     operator_rex = re.compile(r'''(?x)\s*
4423         (?P<op>%s)\s*(?P<key>[a-z_]+)
4424         \s*$
4425         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4426     m = operator_rex.search(filter_part)
4427     if m:
4428         op = UNARY_OPERATORS[m.group('op')]
4429         actual_value = dct.get(m.group('key'))
4430         return op(actual_value)
4431
4432     raise ValueError('Invalid filter part %r' % filter_part)
4433
4434
4435 def match_str(filter_str, dct):
4436     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4437
4438     return all(
4439         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4440
4441
4442 def match_filter_func(filter_str):
4443     def _match_func(info_dict):
4444         if match_str(filter_str, info_dict):
4445             return None
4446         else:
4447             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4448             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4449     return _match_func
4450
4451
4452 def parse_dfxp_time_expr(time_expr):
4453     if not time_expr:
4454         return
4455
4456     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4457     if mobj:
4458         return float(mobj.group('time_offset'))
4459
4460     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4461     if mobj:
4462         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4463
4464
4465 def srt_subtitles_timecode(seconds):
4466     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4467
4468
4469 def dfxp2srt(dfxp_data):
4470     '''
4471     @param dfxp_data A bytes-like object containing DFXP data
4472     @returns A unicode object containing converted SRT data
4473     '''
4474     LEGACY_NAMESPACES = (
4475         (b'http://www.w3.org/ns/ttml', [
4476             b'http://www.w3.org/2004/11/ttaf1',
4477             b'http://www.w3.org/2006/04/ttaf1',
4478             b'http://www.w3.org/2006/10/ttaf1',
4479         ]),
4480         (b'http://www.w3.org/ns/ttml#styling', [
4481             b'http://www.w3.org/ns/ttml#style',
4482         ]),
4483     )
4484
4485     SUPPORTED_STYLING = [
4486         'color',
4487         'fontFamily',
4488         'fontSize',
4489         'fontStyle',
4490         'fontWeight',
4491         'textDecoration'
4492     ]
4493
4494     _x = functools.partial(xpath_with_ns, ns_map={
4495         'xml': 'http://www.w3.org/XML/1998/namespace',
4496         'ttml': 'http://www.w3.org/ns/ttml',
4497         'tts': 'http://www.w3.org/ns/ttml#styling',
4498     })
4499
4500     styles = {}
4501     default_style = {}
4502
4503     class TTMLPElementParser(object):
4504         _out = ''
4505         _unclosed_elements = []
4506         _applied_styles = []
4507
4508         def start(self, tag, attrib):
4509             if tag in (_x('ttml:br'), 'br'):
4510                 self._out += '\n'
4511             else:
4512                 unclosed_elements = []
4513                 style = {}
4514                 element_style_id = attrib.get('style')
4515                 if default_style:
4516                     style.update(default_style)
4517                 if element_style_id:
4518                     style.update(styles.get(element_style_id, {}))
4519                 for prop in SUPPORTED_STYLING:
4520                     prop_val = attrib.get(_x('tts:' + prop))
4521                     if prop_val:
4522                         style[prop] = prop_val
4523                 if style:
4524                     font = ''
4525                     for k, v in sorted(style.items()):
4526                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4527                             continue
4528                         if k == 'color':
4529                             font += ' color="%s"' % v
4530                         elif k == 'fontSize':
4531                             font += ' size="%s"' % v
4532                         elif k == 'fontFamily':
4533                             font += ' face="%s"' % v
4534                         elif k == 'fontWeight' and v == 'bold':
4535                             self._out += '<b>'
4536                             unclosed_elements.append('b')
4537                         elif k == 'fontStyle' and v == 'italic':
4538                             self._out += '<i>'
4539                             unclosed_elements.append('i')
4540                         elif k == 'textDecoration' and v == 'underline':
4541                             self._out += '<u>'
4542                             unclosed_elements.append('u')
4543                     if font:
4544                         self._out += '<font' + font + '>'
4545                         unclosed_elements.append('font')
4546                     applied_style = {}
4547                     if self._applied_styles:
4548                         applied_style.update(self._applied_styles[-1])
4549                     applied_style.update(style)
4550                     self._applied_styles.append(applied_style)
4551                 self._unclosed_elements.append(unclosed_elements)
4552
4553         def end(self, tag):
4554             if tag not in (_x('ttml:br'), 'br'):
4555                 unclosed_elements = self._unclosed_elements.pop()
4556                 for element in reversed(unclosed_elements):
4557                     self._out += '</%s>' % element
4558                 if unclosed_elements and self._applied_styles:
4559                     self._applied_styles.pop()
4560
4561         def data(self, data):
4562             self._out += data
4563
4564         def close(self):
4565             return self._out.strip()
4566
4567     def parse_node(node):
4568         target = TTMLPElementParser()
4569         parser = xml.etree.ElementTree.XMLParser(target=target)
4570         parser.feed(xml.etree.ElementTree.tostring(node))
4571         return parser.close()
4572
4573     for k, v in LEGACY_NAMESPACES:
4574         for ns in v:
4575             dfxp_data = dfxp_data.replace(ns, k)
4576
4577     dfxp = compat_etree_fromstring(dfxp_data)
4578     out = []
4579     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4580
4581     if not paras:
4582         raise ValueError('Invalid dfxp/TTML subtitle')
4583
4584     repeat = False
4585     while True:
4586         for style in dfxp.findall(_x('.//ttml:style')):
4587             style_id = style.get('id') or style.get(_x('xml:id'))
4588             if not style_id:
4589                 continue
4590             parent_style_id = style.get('style')
4591             if parent_style_id:
4592                 if parent_style_id not in styles:
4593                     repeat = True
4594                     continue
4595                 styles[style_id] = styles[parent_style_id].copy()
4596             for prop in SUPPORTED_STYLING:
4597                 prop_val = style.get(_x('tts:' + prop))
4598                 if prop_val:
4599                     styles.setdefault(style_id, {})[prop] = prop_val
4600         if repeat:
4601             repeat = False
4602         else:
4603             break
4604
4605     for p in ('body', 'div'):
4606         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4607         if ele is None:
4608             continue
4609         style = styles.get(ele.get('style'))
4610         if not style:
4611             continue
4612         default_style.update(style)
4613
4614     for para, index in zip(paras, itertools.count(1)):
4615         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4616         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4617         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4618         if begin_time is None:
4619             continue
4620         if not end_time:
4621             if not dur:
4622                 continue
4623             end_time = begin_time + dur
4624         out.append('%d\n%s --> %s\n%s\n\n' % (
4625             index,
4626             srt_subtitles_timecode(begin_time),
4627             srt_subtitles_timecode(end_time),
4628             parse_node(para)))
4629
4630     return ''.join(out)
4631
4632
4633 def cli_option(params, command_option, param):
4634     param = params.get(param)
4635     if param:
4636         param = compat_str(param)
4637     return [command_option, param] if param is not None else []
4638
4639
4640 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4641     param = params.get(param)
4642     if param is None:
4643         return []
4644     assert isinstance(param, bool)
4645     if separator:
4646         return [command_option + separator + (true_value if param else false_value)]
4647     return [command_option, true_value if param else false_value]
4648
4649
4650 def cli_valueless_option(params, command_option, param, expected_value=True):
4651     param = params.get(param)
4652     return [command_option] if param == expected_value else []
4653
4654
4655 def cli_configuration_args(params, param, default=[]):
4656     ex_args = params.get(param)
4657     if ex_args is None:
4658         return default
4659     assert isinstance(ex_args, list)
4660     return ex_args
4661
4662
4663 class ISO639Utils(object):
4664     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4665     _lang_map = {
4666         'aa': 'aar',
4667         'ab': 'abk',
4668         'ae': 'ave',
4669         'af': 'afr',
4670         'ak': 'aka',
4671         'am': 'amh',
4672         'an': 'arg',
4673         'ar': 'ara',
4674         'as': 'asm',
4675         'av': 'ava',
4676         'ay': 'aym',
4677         'az': 'aze',
4678         'ba': 'bak',
4679         'be': 'bel',
4680         'bg': 'bul',
4681         'bh': 'bih',
4682         'bi': 'bis',
4683         'bm': 'bam',
4684         'bn': 'ben',
4685         'bo': 'bod',
4686         'br': 'bre',
4687         'bs': 'bos',
4688         'ca': 'cat',
4689         'ce': 'che',
4690         'ch': 'cha',
4691         'co': 'cos',
4692         'cr': 'cre',
4693         'cs': 'ces',
4694         'cu': 'chu',
4695         'cv': 'chv',
4696         'cy': 'cym',
4697         'da': 'dan',
4698         'de': 'deu',
4699         'dv': 'div',
4700         'dz': 'dzo',
4701         'ee': 'ewe',
4702         'el': 'ell',
4703         'en': 'eng',
4704         'eo': 'epo',
4705         'es': 'spa',
4706         'et': 'est',
4707         'eu': 'eus',
4708         'fa': 'fas',
4709         'ff': 'ful',
4710         'fi': 'fin',
4711         'fj': 'fij',
4712         'fo': 'fao',
4713         'fr': 'fra',
4714         'fy': 'fry',
4715         'ga': 'gle',
4716         'gd': 'gla',
4717         'gl': 'glg',
4718         'gn': 'grn',
4719         'gu': 'guj',
4720         'gv': 'glv',
4721         'ha': 'hau',
4722         'he': 'heb',
4723         'iw': 'heb',  # Replaced by he in 1989 revision
4724         'hi': 'hin',
4725         'ho': 'hmo',
4726         'hr': 'hrv',
4727         'ht': 'hat',
4728         'hu': 'hun',
4729         'hy': 'hye',
4730         'hz': 'her',
4731         'ia': 'ina',
4732         'id': 'ind',
4733         'in': 'ind',  # Replaced by id in 1989 revision
4734         'ie': 'ile',
4735         'ig': 'ibo',
4736         'ii': 'iii',
4737         'ik': 'ipk',
4738         'io': 'ido',
4739         'is': 'isl',
4740         'it': 'ita',
4741         'iu': 'iku',
4742         'ja': 'jpn',
4743         'jv': 'jav',
4744         'ka': 'kat',
4745         'kg': 'kon',
4746         'ki': 'kik',
4747         'kj': 'kua',
4748         'kk': 'kaz',
4749         'kl': 'kal',
4750         'km': 'khm',
4751         'kn': 'kan',
4752         'ko': 'kor',
4753         'kr': 'kau',
4754         'ks': 'kas',
4755         'ku': 'kur',
4756         'kv': 'kom',
4757         'kw': 'cor',
4758         'ky': 'kir',
4759         'la': 'lat',
4760         'lb': 'ltz',
4761         'lg': 'lug',
4762         'li': 'lim',
4763         'ln': 'lin',
4764         'lo': 'lao',
4765         'lt': 'lit',
4766         'lu': 'lub',
4767         'lv': 'lav',
4768         'mg': 'mlg',
4769         'mh': 'mah',
4770         'mi': 'mri',
4771         'mk': 'mkd',
4772         'ml': 'mal',
4773         'mn': 'mon',
4774         'mr': 'mar',
4775         'ms': 'msa',
4776         'mt': 'mlt',
4777         'my': 'mya',
4778         'na': 'nau',
4779         'nb': 'nob',
4780         'nd': 'nde',
4781         'ne': 'nep',
4782         'ng': 'ndo',
4783         'nl': 'nld',
4784         'nn': 'nno',
4785         'no': 'nor',
4786         'nr': 'nbl',
4787         'nv': 'nav',
4788         'ny': 'nya',
4789         'oc': 'oci',
4790         'oj': 'oji',
4791         'om': 'orm',
4792         'or': 'ori',
4793         'os': 'oss',
4794         'pa': 'pan',
4795         'pi': 'pli',
4796         'pl': 'pol',
4797         'ps': 'pus',
4798         'pt': 'por',
4799         'qu': 'que',
4800         'rm': 'roh',
4801         'rn': 'run',
4802         'ro': 'ron',
4803         'ru': 'rus',
4804         'rw': 'kin',
4805         'sa': 'san',
4806         'sc': 'srd',
4807         'sd': 'snd',
4808         'se': 'sme',
4809         'sg': 'sag',
4810         'si': 'sin',
4811         'sk': 'slk',
4812         'sl': 'slv',
4813         'sm': 'smo',
4814         'sn': 'sna',
4815         'so': 'som',
4816         'sq': 'sqi',
4817         'sr': 'srp',
4818         'ss': 'ssw',
4819         'st': 'sot',
4820         'su': 'sun',
4821         'sv': 'swe',
4822         'sw': 'swa',
4823         'ta': 'tam',
4824         'te': 'tel',
4825         'tg': 'tgk',
4826         'th': 'tha',
4827         'ti': 'tir',
4828         'tk': 'tuk',
4829         'tl': 'tgl',
4830         'tn': 'tsn',
4831         'to': 'ton',
4832         'tr': 'tur',
4833         'ts': 'tso',
4834         'tt': 'tat',
4835         'tw': 'twi',
4836         'ty': 'tah',
4837         'ug': 'uig',
4838         'uk': 'ukr',
4839         'ur': 'urd',
4840         'uz': 'uzb',
4841         've': 'ven',
4842         'vi': 'vie',
4843         'vo': 'vol',
4844         'wa': 'wln',
4845         'wo': 'wol',
4846         'xh': 'xho',
4847         'yi': 'yid',
4848         'ji': 'yid',  # Replaced by yi in 1989 revision
4849         'yo': 'yor',
4850         'za': 'zha',
4851         'zh': 'zho',
4852         'zu': 'zul',
4853     }
4854
4855     @classmethod
4856     def short2long(cls, code):
4857         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4858         return cls._lang_map.get(code[:2])
4859
4860     @classmethod
4861     def long2short(cls, code):
4862         """Convert language code from ISO 639-2/T to ISO 639-1"""
4863         for short_name, long_name in cls._lang_map.items():
4864             if long_name == code:
4865                 return short_name
4866
4867
4868 class ISO3166Utils(object):
4869     # From http://data.okfn.org/data/core/country-list
4870     _country_map = {
4871         'AF': 'Afghanistan',
4872         'AX': 'Åland Islands',
4873         'AL': 'Albania',
4874         'DZ': 'Algeria',
4875         'AS': 'American Samoa',
4876         'AD': 'Andorra',
4877         'AO': 'Angola',
4878         'AI': 'Anguilla',
4879         'AQ': 'Antarctica',
4880         'AG': 'Antigua and Barbuda',
4881         'AR': 'Argentina',
4882         'AM': 'Armenia',
4883         'AW': 'Aruba',
4884         'AU': 'Australia',
4885         'AT': 'Austria',
4886         'AZ': 'Azerbaijan',
4887         'BS': 'Bahamas',
4888         'BH': 'Bahrain',
4889         'BD': 'Bangladesh',
4890         'BB': 'Barbados',
4891         'BY': 'Belarus',
4892         'BE': 'Belgium',
4893         'BZ': 'Belize',
4894         'BJ': 'Benin',
4895         'BM': 'Bermuda',
4896         'BT': 'Bhutan',
4897         'BO': 'Bolivia, Plurinational State of',
4898         'BQ': 'Bonaire, Sint Eustatius and Saba',
4899         'BA': 'Bosnia and Herzegovina',
4900         'BW': 'Botswana',
4901         'BV': 'Bouvet Island',
4902         'BR': 'Brazil',
4903         'IO': 'British Indian Ocean Territory',
4904         'BN': 'Brunei Darussalam',
4905         'BG': 'Bulgaria',
4906         'BF': 'Burkina Faso',
4907         'BI': 'Burundi',
4908         'KH': 'Cambodia',
4909         'CM': 'Cameroon',
4910         'CA': 'Canada',
4911         'CV': 'Cape Verde',
4912         'KY': 'Cayman Islands',
4913         'CF': 'Central African Republic',
4914         'TD': 'Chad',
4915         'CL': 'Chile',
4916         'CN': 'China',
4917         'CX': 'Christmas Island',
4918         'CC': 'Cocos (Keeling) Islands',
4919         'CO': 'Colombia',
4920         'KM': 'Comoros',
4921         'CG': 'Congo',
4922         'CD': 'Congo, the Democratic Republic of the',
4923         'CK': 'Cook Islands',
4924         'CR': 'Costa Rica',
4925         'CI': 'Côte d\'Ivoire',
4926         'HR': 'Croatia',
4927         'CU': 'Cuba',
4928         'CW': 'Curaçao',
4929         'CY': 'Cyprus',
4930         'CZ': 'Czech Republic',
4931         'DK': 'Denmark',
4932         'DJ': 'Djibouti',
4933         'DM': 'Dominica',
4934         'DO': 'Dominican Republic',
4935         'EC': 'Ecuador',
4936         'EG': 'Egypt',
4937         'SV': 'El Salvador',
4938         'GQ': 'Equatorial Guinea',
4939         'ER': 'Eritrea',
4940         'EE': 'Estonia',
4941         'ET': 'Ethiopia',
4942         'FK': 'Falkland Islands (Malvinas)',
4943         'FO': 'Faroe Islands',
4944         'FJ': 'Fiji',
4945         'FI': 'Finland',
4946         'FR': 'France',
4947         'GF': 'French Guiana',
4948         'PF': 'French Polynesia',
4949         'TF': 'French Southern Territories',
4950         'GA': 'Gabon',
4951         'GM': 'Gambia',
4952         'GE': 'Georgia',
4953         'DE': 'Germany',
4954         'GH': 'Ghana',
4955         'GI': 'Gibraltar',
4956         'GR': 'Greece',
4957         'GL': 'Greenland',
4958         'GD': 'Grenada',
4959         'GP': 'Guadeloupe',
4960         'GU': 'Guam',
4961         'GT': 'Guatemala',
4962         'GG': 'Guernsey',
4963         'GN': 'Guinea',
4964         'GW': 'Guinea-Bissau',
4965         'GY': 'Guyana',
4966         'HT': 'Haiti',
4967         'HM': 'Heard Island and McDonald Islands',
4968         'VA': 'Holy See (Vatican City State)',
4969         'HN': 'Honduras',
4970         'HK': 'Hong Kong',
4971         'HU': 'Hungary',
4972         'IS': 'Iceland',
4973         'IN': 'India',
4974         'ID': 'Indonesia',
4975         'IR': 'Iran, Islamic Republic of',
4976         'IQ': 'Iraq',
4977         'IE': 'Ireland',
4978         'IM': 'Isle of Man',
4979         'IL': 'Israel',
4980         'IT': 'Italy',
4981         'JM': 'Jamaica',
4982         'JP': 'Japan',
4983         'JE': 'Jersey',
4984         'JO': 'Jordan',
4985         'KZ': 'Kazakhstan',
4986         'KE': 'Kenya',
4987         'KI': 'Kiribati',
4988         'KP': 'Korea, Democratic People\'s Republic of',
4989         'KR': 'Korea, Republic of',
4990         'KW': 'Kuwait',
4991         'KG': 'Kyrgyzstan',
4992         'LA': 'Lao People\'s Democratic Republic',
4993         'LV': 'Latvia',
4994         'LB': 'Lebanon',
4995         'LS': 'Lesotho',
4996         'LR': 'Liberia',
4997         'LY': 'Libya',
4998         'LI': 'Liechtenstein',
4999         'LT': 'Lithuania',
5000         'LU': 'Luxembourg',
5001         'MO': 'Macao',
5002         'MK': 'Macedonia, the Former Yugoslav Republic of',
5003         'MG': 'Madagascar',
5004         'MW': 'Malawi',
5005         'MY': 'Malaysia',
5006         'MV': 'Maldives',
5007         'ML': 'Mali',
5008         'MT': 'Malta',
5009         'MH': 'Marshall Islands',
5010         'MQ': 'Martinique',
5011         'MR': 'Mauritania',
5012         'MU': 'Mauritius',
5013         'YT': 'Mayotte',
5014         'MX': 'Mexico',
5015         'FM': 'Micronesia, Federated States of',
5016         'MD': 'Moldova, Republic of',
5017         'MC': 'Monaco',
5018         'MN': 'Mongolia',
5019         'ME': 'Montenegro',
5020         'MS': 'Montserrat',
5021         'MA': 'Morocco',
5022         'MZ': 'Mozambique',
5023         'MM': 'Myanmar',
5024         'NA': 'Namibia',
5025         'NR': 'Nauru',
5026         'NP': 'Nepal',
5027         'NL': 'Netherlands',
5028         'NC': 'New Caledonia',
5029         'NZ': 'New Zealand',
5030         'NI': 'Nicaragua',
5031         'NE': 'Niger',
5032         'NG': 'Nigeria',
5033         'NU': 'Niue',
5034         'NF': 'Norfolk Island',
5035         'MP': 'Northern Mariana Islands',
5036         'NO': 'Norway',
5037         'OM': 'Oman',
5038         'PK': 'Pakistan',
5039         'PW': 'Palau',
5040         'PS': 'Palestine, State of',
5041         'PA': 'Panama',
5042         'PG': 'Papua New Guinea',
5043         'PY': 'Paraguay',
5044         'PE': 'Peru',
5045         'PH': 'Philippines',
5046         'PN': 'Pitcairn',
5047         'PL': 'Poland',
5048         'PT': 'Portugal',
5049         'PR': 'Puerto Rico',
5050         'QA': 'Qatar',
5051         'RE': 'Réunion',
5052         'RO': 'Romania',
5053         'RU': 'Russian Federation',
5054         'RW': 'Rwanda',
5055         'BL': 'Saint Barthélemy',
5056         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5057         'KN': 'Saint Kitts and Nevis',
5058         'LC': 'Saint Lucia',
5059         'MF': 'Saint Martin (French part)',
5060         'PM': 'Saint Pierre and Miquelon',
5061         'VC': 'Saint Vincent and the Grenadines',
5062         'WS': 'Samoa',
5063         'SM': 'San Marino',
5064         'ST': 'Sao Tome and Principe',
5065         'SA': 'Saudi Arabia',
5066         'SN': 'Senegal',
5067         'RS': 'Serbia',
5068         'SC': 'Seychelles',
5069         'SL': 'Sierra Leone',
5070         'SG': 'Singapore',
5071         'SX': 'Sint Maarten (Dutch part)',
5072         'SK': 'Slovakia',
5073         'SI': 'Slovenia',
5074         'SB': 'Solomon Islands',
5075         'SO': 'Somalia',
5076         'ZA': 'South Africa',
5077         'GS': 'South Georgia and the South Sandwich Islands',
5078         'SS': 'South Sudan',
5079         'ES': 'Spain',
5080         'LK': 'Sri Lanka',
5081         'SD': 'Sudan',
5082         'SR': 'Suriname',
5083         'SJ': 'Svalbard and Jan Mayen',
5084         'SZ': 'Swaziland',
5085         'SE': 'Sweden',
5086         'CH': 'Switzerland',
5087         'SY': 'Syrian Arab Republic',
5088         'TW': 'Taiwan, Province of China',
5089         'TJ': 'Tajikistan',
5090         'TZ': 'Tanzania, United Republic of',
5091         'TH': 'Thailand',
5092         'TL': 'Timor-Leste',
5093         'TG': 'Togo',
5094         'TK': 'Tokelau',
5095         'TO': 'Tonga',
5096         'TT': 'Trinidad and Tobago',
5097         'TN': 'Tunisia',
5098         'TR': 'Turkey',
5099         'TM': 'Turkmenistan',
5100         'TC': 'Turks and Caicos Islands',
5101         'TV': 'Tuvalu',
5102         'UG': 'Uganda',
5103         'UA': 'Ukraine',
5104         'AE': 'United Arab Emirates',
5105         'GB': 'United Kingdom',
5106         'US': 'United States',
5107         'UM': 'United States Minor Outlying Islands',
5108         'UY': 'Uruguay',
5109         'UZ': 'Uzbekistan',
5110         'VU': 'Vanuatu',
5111         'VE': 'Venezuela, Bolivarian Republic of',
5112         'VN': 'Viet Nam',
5113         'VG': 'Virgin Islands, British',
5114         'VI': 'Virgin Islands, U.S.',
5115         'WF': 'Wallis and Futuna',
5116         'EH': 'Western Sahara',
5117         'YE': 'Yemen',
5118         'ZM': 'Zambia',
5119         'ZW': 'Zimbabwe',
5120     }
5121
5122     @classmethod
5123     def short2full(cls, code):
5124         """Convert an ISO 3166-2 country code to the corresponding full name"""
5125         return cls._country_map.get(code.upper())
5126
5127
5128 class GeoUtils(object):
5129     # Major IPv4 address blocks per country
5130     _country_ip_map = {
5131         'AD': '46.172.224.0/19',
5132         'AE': '94.200.0.0/13',
5133         'AF': '149.54.0.0/17',
5134         'AG': '209.59.64.0/18',
5135         'AI': '204.14.248.0/21',
5136         'AL': '46.99.0.0/16',
5137         'AM': '46.70.0.0/15',
5138         'AO': '105.168.0.0/13',
5139         'AP': '182.50.184.0/21',
5140         'AQ': '23.154.160.0/24',
5141         'AR': '181.0.0.0/12',
5142         'AS': '202.70.112.0/20',
5143         'AT': '77.116.0.0/14',
5144         'AU': '1.128.0.0/11',
5145         'AW': '181.41.0.0/18',
5146         'AX': '185.217.4.0/22',
5147         'AZ': '5.197.0.0/16',
5148         'BA': '31.176.128.0/17',
5149         'BB': '65.48.128.0/17',
5150         'BD': '114.130.0.0/16',
5151         'BE': '57.0.0.0/8',
5152         'BF': '102.178.0.0/15',
5153         'BG': '95.42.0.0/15',
5154         'BH': '37.131.0.0/17',
5155         'BI': '154.117.192.0/18',
5156         'BJ': '137.255.0.0/16',
5157         'BL': '185.212.72.0/23',
5158         'BM': '196.12.64.0/18',
5159         'BN': '156.31.0.0/16',
5160         'BO': '161.56.0.0/16',
5161         'BQ': '161.0.80.0/20',
5162         'BR': '191.128.0.0/12',
5163         'BS': '24.51.64.0/18',
5164         'BT': '119.2.96.0/19',
5165         'BW': '168.167.0.0/16',
5166         'BY': '178.120.0.0/13',
5167         'BZ': '179.42.192.0/18',
5168         'CA': '99.224.0.0/11',
5169         'CD': '41.243.0.0/16',
5170         'CF': '197.242.176.0/21',
5171         'CG': '160.113.0.0/16',
5172         'CH': '85.0.0.0/13',
5173         'CI': '102.136.0.0/14',
5174         'CK': '202.65.32.0/19',
5175         'CL': '152.172.0.0/14',
5176         'CM': '102.244.0.0/14',
5177         'CN': '36.128.0.0/10',
5178         'CO': '181.240.0.0/12',
5179         'CR': '201.192.0.0/12',
5180         'CU': '152.206.0.0/15',
5181         'CV': '165.90.96.0/19',
5182         'CW': '190.88.128.0/17',
5183         'CY': '31.153.0.0/16',
5184         'CZ': '88.100.0.0/14',
5185         'DE': '53.0.0.0/8',
5186         'DJ': '197.241.0.0/17',
5187         'DK': '87.48.0.0/12',
5188         'DM': '192.243.48.0/20',
5189         'DO': '152.166.0.0/15',
5190         'DZ': '41.96.0.0/12',
5191         'EC': '186.68.0.0/15',
5192         'EE': '90.190.0.0/15',
5193         'EG': '156.160.0.0/11',
5194         'ER': '196.200.96.0/20',
5195         'ES': '88.0.0.0/11',
5196         'ET': '196.188.0.0/14',
5197         'EU': '2.16.0.0/13',
5198         'FI': '91.152.0.0/13',
5199         'FJ': '144.120.0.0/16',
5200         'FK': '80.73.208.0/21',
5201         'FM': '119.252.112.0/20',
5202         'FO': '88.85.32.0/19',
5203         'FR': '90.0.0.0/9',
5204         'GA': '41.158.0.0/15',
5205         'GB': '25.0.0.0/8',
5206         'GD': '74.122.88.0/21',
5207         'GE': '31.146.0.0/16',
5208         'GF': '161.22.64.0/18',
5209         'GG': '62.68.160.0/19',
5210         'GH': '154.160.0.0/12',
5211         'GI': '95.164.0.0/16',
5212         'GL': '88.83.0.0/19',
5213         'GM': '160.182.0.0/15',
5214         'GN': '197.149.192.0/18',
5215         'GP': '104.250.0.0/19',
5216         'GQ': '105.235.224.0/20',
5217         'GR': '94.64.0.0/13',
5218         'GT': '168.234.0.0/16',
5219         'GU': '168.123.0.0/16',
5220         'GW': '197.214.80.0/20',
5221         'GY': '181.41.64.0/18',
5222         'HK': '113.252.0.0/14',
5223         'HN': '181.210.0.0/16',
5224         'HR': '93.136.0.0/13',
5225         'HT': '148.102.128.0/17',
5226         'HU': '84.0.0.0/14',
5227         'ID': '39.192.0.0/10',
5228         'IE': '87.32.0.0/12',
5229         'IL': '79.176.0.0/13',
5230         'IM': '5.62.80.0/20',
5231         'IN': '117.192.0.0/10',
5232         'IO': '203.83.48.0/21',
5233         'IQ': '37.236.0.0/14',
5234         'IR': '2.176.0.0/12',
5235         'IS': '82.221.0.0/16',
5236         'IT': '79.0.0.0/10',
5237         'JE': '87.244.64.0/18',
5238         'JM': '72.27.0.0/17',
5239         'JO': '176.29.0.0/16',
5240         'JP': '133.0.0.0/8',
5241         'KE': '105.48.0.0/12',
5242         'KG': '158.181.128.0/17',
5243         'KH': '36.37.128.0/17',
5244         'KI': '103.25.140.0/22',
5245         'KM': '197.255.224.0/20',
5246         'KN': '198.167.192.0/19',
5247         'KP': '175.45.176.0/22',
5248         'KR': '175.192.0.0/10',
5249         'KW': '37.36.0.0/14',
5250         'KY': '64.96.0.0/15',
5251         'KZ': '2.72.0.0/13',
5252         'LA': '115.84.64.0/18',
5253         'LB': '178.135.0.0/16',
5254         'LC': '24.92.144.0/20',
5255         'LI': '82.117.0.0/19',
5256         'LK': '112.134.0.0/15',
5257         'LR': '102.183.0.0/16',
5258         'LS': '129.232.0.0/17',
5259         'LT': '78.56.0.0/13',
5260         'LU': '188.42.0.0/16',
5261         'LV': '46.109.0.0/16',
5262         'LY': '41.252.0.0/14',
5263         'MA': '105.128.0.0/11',
5264         'MC': '88.209.64.0/18',
5265         'MD': '37.246.0.0/16',
5266         'ME': '178.175.0.0/17',
5267         'MF': '74.112.232.0/21',
5268         'MG': '154.126.0.0/17',
5269         'MH': '117.103.88.0/21',
5270         'MK': '77.28.0.0/15',
5271         'ML': '154.118.128.0/18',
5272         'MM': '37.111.0.0/17',
5273         'MN': '49.0.128.0/17',
5274         'MO': '60.246.0.0/16',
5275         'MP': '202.88.64.0/20',
5276         'MQ': '109.203.224.0/19',
5277         'MR': '41.188.64.0/18',
5278         'MS': '208.90.112.0/22',
5279         'MT': '46.11.0.0/16',
5280         'MU': '105.16.0.0/12',
5281         'MV': '27.114.128.0/18',
5282         'MW': '102.70.0.0/15',
5283         'MX': '187.192.0.0/11',
5284         'MY': '175.136.0.0/13',
5285         'MZ': '197.218.0.0/15',
5286         'NA': '41.182.0.0/16',
5287         'NC': '101.101.0.0/18',
5288         'NE': '197.214.0.0/18',
5289         'NF': '203.17.240.0/22',
5290         'NG': '105.112.0.0/12',
5291         'NI': '186.76.0.0/15',
5292         'NL': '145.96.0.0/11',
5293         'NO': '84.208.0.0/13',
5294         'NP': '36.252.0.0/15',
5295         'NR': '203.98.224.0/19',
5296         'NU': '49.156.48.0/22',
5297         'NZ': '49.224.0.0/14',
5298         'OM': '5.36.0.0/15',
5299         'PA': '186.72.0.0/15',
5300         'PE': '186.160.0.0/14',
5301         'PF': '123.50.64.0/18',
5302         'PG': '124.240.192.0/19',
5303         'PH': '49.144.0.0/13',
5304         'PK': '39.32.0.0/11',
5305         'PL': '83.0.0.0/11',
5306         'PM': '70.36.0.0/20',
5307         'PR': '66.50.0.0/16',
5308         'PS': '188.161.0.0/16',
5309         'PT': '85.240.0.0/13',
5310         'PW': '202.124.224.0/20',
5311         'PY': '181.120.0.0/14',
5312         'QA': '37.210.0.0/15',
5313         'RE': '102.35.0.0/16',
5314         'RO': '79.112.0.0/13',
5315         'RS': '93.86.0.0/15',
5316         'RU': '5.136.0.0/13',
5317         'RW': '41.186.0.0/16',
5318         'SA': '188.48.0.0/13',
5319         'SB': '202.1.160.0/19',
5320         'SC': '154.192.0.0/11',
5321         'SD': '102.120.0.0/13',
5322         'SE': '78.64.0.0/12',
5323         'SG': '8.128.0.0/10',
5324         'SI': '188.196.0.0/14',
5325         'SK': '78.98.0.0/15',
5326         'SL': '102.143.0.0/17',
5327         'SM': '89.186.32.0/19',
5328         'SN': '41.82.0.0/15',
5329         'SO': '154.115.192.0/18',
5330         'SR': '186.179.128.0/17',
5331         'SS': '105.235.208.0/21',
5332         'ST': '197.159.160.0/19',
5333         'SV': '168.243.0.0/16',
5334         'SX': '190.102.0.0/20',
5335         'SY': '5.0.0.0/16',
5336         'SZ': '41.84.224.0/19',
5337         'TC': '65.255.48.0/20',
5338         'TD': '154.68.128.0/19',
5339         'TG': '196.168.0.0/14',
5340         'TH': '171.96.0.0/13',
5341         'TJ': '85.9.128.0/18',
5342         'TK': '27.96.24.0/21',
5343         'TL': '180.189.160.0/20',
5344         'TM': '95.85.96.0/19',
5345         'TN': '197.0.0.0/11',
5346         'TO': '175.176.144.0/21',
5347         'TR': '78.160.0.0/11',
5348         'TT': '186.44.0.0/15',
5349         'TV': '202.2.96.0/19',
5350         'TW': '120.96.0.0/11',
5351         'TZ': '156.156.0.0/14',
5352         'UA': '37.52.0.0/14',
5353         'UG': '102.80.0.0/13',
5354         'US': '6.0.0.0/8',
5355         'UY': '167.56.0.0/13',
5356         'UZ': '84.54.64.0/18',
5357         'VA': '212.77.0.0/19',
5358         'VC': '207.191.240.0/21',
5359         'VE': '186.88.0.0/13',
5360         'VG': '66.81.192.0/20',
5361         'VI': '146.226.0.0/16',
5362         'VN': '14.160.0.0/11',
5363         'VU': '202.80.32.0/20',
5364         'WF': '117.20.32.0/21',
5365         'WS': '202.4.32.0/19',
5366         'YE': '134.35.0.0/16',
5367         'YT': '41.242.116.0/22',
5368         'ZA': '41.0.0.0/11',
5369         'ZM': '102.144.0.0/13',
5370         'ZW': '102.177.192.0/18',
5371     }
5372
5373     @classmethod
5374     def random_ipv4(cls, code_or_block):
5375         if len(code_or_block) == 2:
5376             block = cls._country_ip_map.get(code_or_block.upper())
5377             if not block:
5378                 return None
5379         else:
5380             block = code_or_block
5381         addr, preflen = block.split('/')
5382         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5383         addr_max = addr_min | (0xffffffff >> int(preflen))
5384         return compat_str(socket.inet_ntoa(
5385             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5386
5387
5388 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5389     def __init__(self, proxies=None):
5390         # Set default handlers
5391         for type in ('http', 'https'):
5392             setattr(self, '%s_open' % type,
5393                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5394                         meth(r, proxy, type))
5395         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5396
5397     def proxy_open(self, req, proxy, type):
5398         req_proxy = req.headers.get('Ytdl-request-proxy')
5399         if req_proxy is not None:
5400             proxy = req_proxy
5401             del req.headers['Ytdl-request-proxy']
5402
5403         if proxy == '__noproxy__':
5404             return None  # No Proxy
5405         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5406             req.add_header('Ytdl-socks-proxy', proxy)
5407             # youtube-dlc's http/https handlers do wrapping the socket with socks
5408             return None
5409         return compat_urllib_request.ProxyHandler.proxy_open(
5410             self, req, proxy, type)
5411
5412
5413 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5414 # released into Public Domain
5415 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5416
5417 def long_to_bytes(n, blocksize=0):
5418     """long_to_bytes(n:long, blocksize:int) : string
5419     Convert a long integer to a byte string.
5420
5421     If optional blocksize is given and greater than zero, pad the front of the
5422     byte string with binary zeros so that the length is a multiple of
5423     blocksize.
5424     """
5425     # after much testing, this algorithm was deemed to be the fastest
5426     s = b''
5427     n = int(n)
5428     while n > 0:
5429         s = compat_struct_pack('>I', n & 0xffffffff) + s
5430         n = n >> 32
5431     # strip off leading zeros
5432     for i in range(len(s)):
5433         if s[i] != b'\000'[0]:
5434             break
5435     else:
5436         # only happens when n == 0
5437         s = b'\000'
5438         i = 0
5439     s = s[i:]
5440     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5441     # de-padding being done above, but sigh...
5442     if blocksize > 0 and len(s) % blocksize:
5443         s = (blocksize - len(s) % blocksize) * b'\000' + s
5444     return s
5445
5446
5447 def bytes_to_long(s):
5448     """bytes_to_long(string) : long
5449     Convert a byte string to a long integer.
5450
5451     This is (essentially) the inverse of long_to_bytes().
5452     """
5453     acc = 0
5454     length = len(s)
5455     if length % 4:
5456         extra = (4 - length % 4)
5457         s = b'\000' * extra + s
5458         length = length + extra
5459     for i in range(0, length, 4):
5460         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5461     return acc
5462
5463
5464 def ohdave_rsa_encrypt(data, exponent, modulus):
5465     '''
5466     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5467
5468     Input:
5469         data: data to encrypt, bytes-like object
5470         exponent, modulus: parameter e and N of RSA algorithm, both integer
5471     Output: hex string of encrypted data
5472
5473     Limitation: supports one block encryption only
5474     '''
5475
5476     payload = int(binascii.hexlify(data[::-1]), 16)
5477     encrypted = pow(payload, exponent, modulus)
5478     return '%x' % encrypted
5479
5480
5481 def pkcs1pad(data, length):
5482     """
5483     Padding input data with PKCS#1 scheme
5484
5485     @param {int[]} data        input data
5486     @param {int}   length      target length
5487     @returns {int[]}           padded data
5488     """
5489     if len(data) > length - 11:
5490         raise ValueError('Input data too long for PKCS#1 padding')
5491
5492     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5493     return [0, 2] + pseudo_random + [0] + data
5494
5495
5496 def encode_base_n(num, n, table=None):
5497     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5498     if not table:
5499         table = FULL_TABLE[:n]
5500
5501     if n > len(table):
5502         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5503
5504     if num == 0:
5505         return table[0]
5506
5507     ret = ''
5508     while num:
5509         ret = table[num % n] + ret
5510         num = num // n
5511     return ret
5512
5513
5514 def decode_packed_codes(code):
5515     mobj = re.search(PACKED_CODES_RE, code)
5516     obfuscated_code, base, count, symbols = mobj.groups()
5517     base = int(base)
5518     count = int(count)
5519     symbols = symbols.split('|')
5520     symbol_table = {}
5521
5522     while count:
5523         count -= 1
5524         base_n_count = encode_base_n(count, base)
5525         symbol_table[base_n_count] = symbols[count] or base_n_count
5526
5527     return re.sub(
5528         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5529         obfuscated_code)
5530
5531
5532 def caesar(s, alphabet, shift):
5533     if shift == 0:
5534         return s
5535     l = len(alphabet)
5536     return ''.join(
5537         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5538         for c in s)
5539
5540
5541 def rot47(s):
5542     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5543
5544
5545 def parse_m3u8_attributes(attrib):
5546     info = {}
5547     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5548         if val.startswith('"'):
5549             val = val[1:-1]
5550         info[key] = val
5551     return info
5552
5553
5554 def urshift(val, n):
5555     return val >> n if val >= 0 else (val + 0x100000000) >> n
5556
5557
5558 # Based on png2str() written by @gdkchan and improved by @yokrysty
5559 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5560 def decode_png(png_data):
5561     # Reference: https://www.w3.org/TR/PNG/
5562     header = png_data[8:]
5563
5564     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5565         raise IOError('Not a valid PNG file.')
5566
5567     int_map = {1: '>B', 2: '>H', 4: '>I'}
5568     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5569
5570     chunks = []
5571
5572     while header:
5573         length = unpack_integer(header[:4])
5574         header = header[4:]
5575
5576         chunk_type = header[:4]
5577         header = header[4:]
5578
5579         chunk_data = header[:length]
5580         header = header[length:]
5581
5582         header = header[4:]  # Skip CRC
5583
5584         chunks.append({
5585             'type': chunk_type,
5586             'length': length,
5587             'data': chunk_data
5588         })
5589
5590     ihdr = chunks[0]['data']
5591
5592     width = unpack_integer(ihdr[:4])
5593     height = unpack_integer(ihdr[4:8])
5594
5595     idat = b''
5596
5597     for chunk in chunks:
5598         if chunk['type'] == b'IDAT':
5599             idat += chunk['data']
5600
5601     if not idat:
5602         raise IOError('Unable to read PNG data.')
5603
5604     decompressed_data = bytearray(zlib.decompress(idat))
5605
5606     stride = width * 3
5607     pixels = []
5608
5609     def _get_pixel(idx):
5610         x = idx % stride
5611         y = idx // stride
5612         return pixels[y][x]
5613
5614     for y in range(height):
5615         basePos = y * (1 + stride)
5616         filter_type = decompressed_data[basePos]
5617
5618         current_row = []
5619
5620         pixels.append(current_row)
5621
5622         for x in range(stride):
5623             color = decompressed_data[1 + basePos + x]
5624             basex = y * stride + x
5625             left = 0
5626             up = 0
5627
5628             if x > 2:
5629                 left = _get_pixel(basex - 3)
5630             if y > 0:
5631                 up = _get_pixel(basex - stride)
5632
5633             if filter_type == 1:  # Sub
5634                 color = (color + left) & 0xff
5635             elif filter_type == 2:  # Up
5636                 color = (color + up) & 0xff
5637             elif filter_type == 3:  # Average
5638                 color = (color + ((left + up) >> 1)) & 0xff
5639             elif filter_type == 4:  # Paeth
5640                 a = left
5641                 b = up
5642                 c = 0
5643
5644                 if x > 2 and y > 0:
5645                     c = _get_pixel(basex - stride - 3)
5646
5647                 p = a + b - c
5648
5649                 pa = abs(p - a)
5650                 pb = abs(p - b)
5651                 pc = abs(p - c)
5652
5653                 if pa <= pb and pa <= pc:
5654                     color = (color + a) & 0xff
5655                 elif pb <= pc:
5656                     color = (color + b) & 0xff
5657                 else:
5658                     color = (color + c) & 0xff
5659
5660             current_row.append(color)
5661
5662     return width, height, pixels
5663
5664
5665 def write_xattr(path, key, value):
5666     # This mess below finds the best xattr tool for the job
5667     try:
5668         # try the pyxattr module...
5669         import xattr
5670
5671         if hasattr(xattr, 'set'):  # pyxattr
5672             # Unicode arguments are not supported in python-pyxattr until
5673             # version 0.5.0
5674             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5675             pyxattr_required_version = '0.5.0'
5676             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5677                 # TODO: fallback to CLI tools
5678                 raise XAttrUnavailableError(
5679                     'python-pyxattr is detected but is too old. '
5680                     'youtube-dlc requires %s or above while your version is %s. '
5681                     'Falling back to other xattr implementations' % (
5682                         pyxattr_required_version, xattr.__version__))
5683
5684             setxattr = xattr.set
5685         else:  # xattr
5686             setxattr = xattr.setxattr
5687
5688         try:
5689             setxattr(path, key, value)
5690         except EnvironmentError as e:
5691             raise XAttrMetadataError(e.errno, e.strerror)
5692
5693     except ImportError:
5694         if compat_os_name == 'nt':
5695             # Write xattrs to NTFS Alternate Data Streams:
5696             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5697             assert ':' not in key
5698             assert os.path.exists(path)
5699
5700             ads_fn = path + ':' + key
5701             try:
5702                 with open(ads_fn, 'wb') as f:
5703                     f.write(value)
5704             except EnvironmentError as e:
5705                 raise XAttrMetadataError(e.errno, e.strerror)
5706         else:
5707             user_has_setfattr = check_executable('setfattr', ['--version'])
5708             user_has_xattr = check_executable('xattr', ['-h'])
5709
5710             if user_has_setfattr or user_has_xattr:
5711
5712                 value = value.decode('utf-8')
5713                 if user_has_setfattr:
5714                     executable = 'setfattr'
5715                     opts = ['-n', key, '-v', value]
5716                 elif user_has_xattr:
5717                     executable = 'xattr'
5718                     opts = ['-w', key, value]
5719
5720                 cmd = ([encodeFilename(executable, True)]
5721                        + [encodeArgument(o) for o in opts]
5722                        + [encodeFilename(path, True)])
5723
5724                 try:
5725                     p = subprocess.Popen(
5726                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5727                 except EnvironmentError as e:
5728                     raise XAttrMetadataError(e.errno, e.strerror)
5729                 stdout, stderr = process_communicate_or_kill(p)
5730                 stderr = stderr.decode('utf-8', 'replace')
5731                 if p.returncode != 0:
5732                     raise XAttrMetadataError(p.returncode, stderr)
5733
5734             else:
5735                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5736                 if sys.platform.startswith('linux'):
5737                     raise XAttrUnavailableError(
5738                         "Couldn't find a tool to set the xattrs. "
5739                         "Install either the python 'pyxattr' or 'xattr' "
5740                         "modules, or the GNU 'attr' package "
5741                         "(which contains the 'setfattr' tool).")
5742                 else:
5743                     raise XAttrUnavailableError(
5744                         "Couldn't find a tool to set the xattrs. "
5745                         "Install either the python 'xattr' module, "
5746                         "or the 'xattr' binary.")
5747
5748
5749 def random_birthday(year_field, month_field, day_field):
5750     start_date = datetime.date(1950, 1, 1)
5751     end_date = datetime.date(1995, 12, 31)
5752     offset = random.randint(0, (end_date - start_date).days)
5753     random_date = start_date + datetime.timedelta(offset)
5754     return {
5755         year_field: str(random_date.year),
5756         month_field: str(random_date.month),
5757         day_field: str(random_date.day),
5758     }
5759
5760
5761 # Templates for internet shortcut files, which are plain text files.
5762 DOT_URL_LINK_TEMPLATE = '''
5763 [InternetShortcut]
5764 URL=%(url)s
5765 '''.lstrip()
5766
5767 DOT_WEBLOC_LINK_TEMPLATE = '''
5768 <?xml version="1.0" encoding="UTF-8"?>
5769 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5770 <plist version="1.0">
5771 <dict>
5772 \t<key>URL</key>
5773 \t<string>%(url)s</string>
5774 </dict>
5775 </plist>
5776 '''.lstrip()
5777
5778 DOT_DESKTOP_LINK_TEMPLATE = '''
5779 [Desktop Entry]
5780 Encoding=UTF-8
5781 Name=%(filename)s
5782 Type=Link
5783 URL=%(url)s
5784 Icon=text-html
5785 '''.lstrip()
5786
5787
5788 def iri_to_uri(iri):
5789     """
5790     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5791
5792     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5793     """
5794
5795     iri_parts = compat_urllib_parse_urlparse(iri)
5796
5797     if '[' in iri_parts.netloc:
5798         raise ValueError('IPv6 URIs are not, yet, supported.')
5799         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5800
5801     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5802
5803     net_location = ''
5804     if iri_parts.username:
5805         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5806         if iri_parts.password is not None:
5807             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5808         net_location += '@'
5809
5810     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5811     # The 'idna' encoding produces ASCII text.
5812     if iri_parts.port is not None and iri_parts.port != 80:
5813         net_location += ':' + str(iri_parts.port)
5814
5815     return compat_urllib_parse_urlunparse(
5816         (iri_parts.scheme,
5817             net_location,
5818
5819             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5820
5821             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5822             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5823
5824             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5825             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5826
5827             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5828
5829     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5830
5831
5832 def to_high_limit_path(path):
5833     if sys.platform in ['win32', 'cygwin']:
5834         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5835         return r'\\?\ '.rstrip() + os.path.abspath(path)
5836
5837     return path
5838
5839
5840 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5841     val = obj.get(field, default)
5842     if func and val not in ignore:
5843         val = func(val)
5844     return template % val if val not in ignore else default
5845
5846
5847 def clean_podcast_url(url):
5848     return re.sub(r'''(?x)
5849         (?:
5850             (?:
5851                 chtbl\.com/track|
5852                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5853                 play\.podtrac\.com
5854             )/[^/]+|
5855             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5856             flex\.acast\.com|
5857             pd(?:
5858                 cn\.co| # https://podcorn.com/analytics-prefix/
5859                 st\.fm # https://podsights.com/docs/
5860             )/e
5861         )/''', '', url)