yt_dlp/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_numeric_types,
  54     compat_kwargs,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_parse_urlunparse,
  66     compat_urllib_parse_quote,
  67     compat_urllib_parse_quote_plus,
  68     compat_urllib_parse_unquote_plus,
  69     compat_urllib_request,
  70     compat_urlparse,
  71     compat_xpath,
  72 )
  73
  74 from .socks import (
  75     ProxyType,
  76     sockssocket,
  77 )
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '74.0.3729.129',
  97         '76.0.3780.3',
  98         '76.0.3780.2',
  99         '74.0.3729.128',
 100         '76.0.3780.1',
 101         '76.0.3780.0',
 102         '75.0.3770.15',
 103         '74.0.3729.127',
 104         '74.0.3729.126',
 105         '76.0.3779.1',
 106         '76.0.3779.0',
 107         '75.0.3770.14',
 108         '74.0.3729.125',
 109         '76.0.3778.1',
 110         '76.0.3778.0',
 111         '75.0.3770.13',
 112         '74.0.3729.124',
 113         '74.0.3729.123',
 114         '73.0.3683.121',
 115         '76.0.3777.1',
 116         '76.0.3777.0',
 117         '75.0.3770.12',
 118         '74.0.3729.122',
 119         '76.0.3776.4',
 120         '75.0.3770.11',
 121         '74.0.3729.121',
 122         '76.0.3776.3',
 123         '76.0.3776.2',
 124         '73.0.3683.120',
 125         '74.0.3729.120',
 126         '74.0.3729.119',
 127         '74.0.3729.118',
 128         '76.0.3776.1',
 129         '76.0.3776.0',
 130         '76.0.3775.5',
 131         '75.0.3770.10',
 132         '74.0.3729.117',
 133         '76.0.3775.4',
 134         '76.0.3775.3',
 135         '74.0.3729.116',
 136         '75.0.3770.9',
 137         '76.0.3775.2',
 138         '76.0.3775.1',
 139         '76.0.3775.0',
 140         '75.0.3770.8',
 141         '74.0.3729.115',
 142         '74.0.3729.114',
 143         '76.0.3774.1',
 144         '76.0.3774.0',
 145         '75.0.3770.7',
 146         '74.0.3729.113',
 147         '74.0.3729.112',
 148         '74.0.3729.111',
 149         '76.0.3773.1',
 150         '76.0.3773.0',
 151         '75.0.3770.6',
 152         '74.0.3729.110',
 153         '74.0.3729.109',
 154         '76.0.3772.1',
 155         '76.0.3772.0',
 156         '75.0.3770.5',
 157         '74.0.3729.108',
 158         '74.0.3729.107',
 159         '76.0.3771.1',
 160         '76.0.3771.0',
 161         '75.0.3770.4',
 162         '74.0.3729.106',
 163         '74.0.3729.105',
 164         '75.0.3770.3',
 165         '74.0.3729.104',
 166         '74.0.3729.103',
 167         '74.0.3729.102',
 168         '75.0.3770.2',
 169         '74.0.3729.101',
 170         '75.0.3770.1',
 171         '75.0.3770.0',
 172         '74.0.3729.100',
 173         '75.0.3769.5',
 174         '75.0.3769.4',
 175         '74.0.3729.99',
 176         '75.0.3769.3',
 177         '75.0.3769.2',
 178         '75.0.3768.6',
 179         '74.0.3729.98',
 180         '75.0.3769.1',
 181         '75.0.3769.0',
 182         '74.0.3729.97',
 183         '73.0.3683.119',
 184         '73.0.3683.118',
 185         '74.0.3729.96',
 186         '75.0.3768.5',
 187         '75.0.3768.4',
 188         '75.0.3768.3',
 189         '75.0.3768.2',
 190         '74.0.3729.95',
 191         '74.0.3729.94',
 192         '75.0.3768.1',
 193         '75.0.3768.0',
 194         '74.0.3729.93',
 195         '74.0.3729.92',
 196         '73.0.3683.117',
 197         '74.0.3729.91',
 198         '75.0.3766.3',
 199         '74.0.3729.90',
 200         '75.0.3767.2',
 201         '75.0.3767.1',
 202         '75.0.3767.0',
 203         '74.0.3729.89',
 204         '73.0.3683.116',
 205         '75.0.3766.2',
 206         '74.0.3729.88',
 207         '75.0.3766.1',
 208         '75.0.3766.0',
 209         '74.0.3729.87',
 210         '73.0.3683.115',
 211         '74.0.3729.86',
 212         '75.0.3765.1',
 213         '75.0.3765.0',
 214         '74.0.3729.85',
 215         '73.0.3683.114',
 216         '74.0.3729.84',
 217         '75.0.3764.1',
 218         '75.0.3764.0',
 219         '74.0.3729.83',
 220         '73.0.3683.113',
 221         '75.0.3763.2',
 222         '75.0.3761.4',
 223         '74.0.3729.82',
 224         '75.0.3763.1',
 225         '75.0.3763.0',
 226         '74.0.3729.81',
 227         '73.0.3683.112',
 228         '75.0.3762.1',
 229         '75.0.3762.0',
 230         '74.0.3729.80',
 231         '75.0.3761.3',
 232         '74.0.3729.79',
 233         '73.0.3683.111',
 234         '75.0.3761.2',
 235         '74.0.3729.78',
 236         '74.0.3729.77',
 237         '75.0.3761.1',
 238         '75.0.3761.0',
 239         '73.0.3683.110',
 240         '74.0.3729.76',
 241         '74.0.3729.75',
 242         '75.0.3760.0',
 243         '74.0.3729.74',
 244         '75.0.3759.8',
 245         '75.0.3759.7',
 246         '75.0.3759.6',
 247         '74.0.3729.73',
 248         '75.0.3759.5',
 249         '74.0.3729.72',
 250         '73.0.3683.109',
 251         '75.0.3759.4',
 252         '75.0.3759.3',
 253         '74.0.3729.71',
 254         '75.0.3759.2',
 255         '74.0.3729.70',
 256         '73.0.3683.108',
 257         '74.0.3729.69',
 258         '75.0.3759.1',
 259         '75.0.3759.0',
 260         '74.0.3729.68',
 261         '73.0.3683.107',
 262         '74.0.3729.67',
 263         '75.0.3758.1',
 264         '75.0.3758.0',
 265         '74.0.3729.66',
 266         '73.0.3683.106',
 267         '74.0.3729.65',
 268         '75.0.3757.1',
 269         '75.0.3757.0',
 270         '74.0.3729.64',
 271         '73.0.3683.105',
 272         '74.0.3729.63',
 273         '75.0.3756.1',
 274         '75.0.3756.0',
 275         '74.0.3729.62',
 276         '73.0.3683.104',
 277         '75.0.3755.3',
 278         '75.0.3755.2',
 279         '73.0.3683.103',
 280         '75.0.3755.1',
 281         '75.0.3755.0',
 282         '74.0.3729.61',
 283         '73.0.3683.102',
 284         '74.0.3729.60',
 285         '75.0.3754.2',
 286         '74.0.3729.59',
 287         '75.0.3753.4',
 288         '74.0.3729.58',
 289         '75.0.3754.1',
 290         '75.0.3754.0',
 291         '74.0.3729.57',
 292         '73.0.3683.101',
 293         '75.0.3753.3',
 294         '75.0.3752.2',
 295         '75.0.3753.2',
 296         '74.0.3729.56',
 297         '75.0.3753.1',
 298         '75.0.3753.0',
 299         '74.0.3729.55',
 300         '73.0.3683.100',
 301         '74.0.3729.54',
 302         '75.0.3752.1',
 303         '75.0.3752.0',
 304         '74.0.3729.53',
 305         '73.0.3683.99',
 306         '74.0.3729.52',
 307         '75.0.3751.1',
 308         '75.0.3751.0',
 309         '74.0.3729.51',
 310         '73.0.3683.98',
 311         '74.0.3729.50',
 312         '75.0.3750.0',
 313         '74.0.3729.49',
 314         '74.0.3729.48',
 315         '74.0.3729.47',
 316         '75.0.3749.3',
 317         '74.0.3729.46',
 318         '73.0.3683.97',
 319         '75.0.3749.2',
 320         '74.0.3729.45',
 321         '75.0.3749.1',
 322         '75.0.3749.0',
 323         '74.0.3729.44',
 324         '73.0.3683.96',
 325         '74.0.3729.43',
 326         '74.0.3729.42',
 327         '75.0.3748.1',
 328         '75.0.3748.0',
 329         '74.0.3729.41',
 330         '75.0.3747.1',
 331         '73.0.3683.95',
 332         '75.0.3746.4',
 333         '74.0.3729.40',
 334         '74.0.3729.39',
 335         '75.0.3747.0',
 336         '75.0.3746.3',
 337         '75.0.3746.2',
 338         '74.0.3729.38',
 339         '75.0.3746.1',
 340         '75.0.3746.0',
 341         '74.0.3729.37',
 342         '73.0.3683.94',
 343         '75.0.3745.5',
 344         '75.0.3745.4',
 345         '75.0.3745.3',
 346         '75.0.3745.2',
 347         '74.0.3729.36',
 348         '75.0.3745.1',
 349         '75.0.3745.0',
 350         '75.0.3744.2',
 351         '74.0.3729.35',
 352         '73.0.3683.93',
 353         '74.0.3729.34',
 354         '75.0.3744.1',
 355         '75.0.3744.0',
 356         '74.0.3729.33',
 357         '73.0.3683.92',
 358         '74.0.3729.32',
 359         '74.0.3729.31',
 360         '73.0.3683.91',
 361         '75.0.3741.2',
 362         '75.0.3740.5',
 363         '74.0.3729.30',
 364         '75.0.3741.1',
 365         '75.0.3741.0',
 366         '74.0.3729.29',
 367         '75.0.3740.4',
 368         '73.0.3683.90',
 369         '74.0.3729.28',
 370         '75.0.3740.3',
 371         '73.0.3683.89',
 372         '75.0.3740.2',
 373         '74.0.3729.27',
 374         '75.0.3740.1',
 375         '75.0.3740.0',
 376         '74.0.3729.26',
 377         '73.0.3683.88',
 378         '73.0.3683.87',
 379         '74.0.3729.25',
 380         '75.0.3739.1',
 381         '75.0.3739.0',
 382         '73.0.3683.86',
 383         '74.0.3729.24',
 384         '73.0.3683.85',
 385         '75.0.3738.4',
 386         '75.0.3738.3',
 387         '75.0.3738.2',
 388         '75.0.3738.1',
 389         '75.0.3738.0',
 390         '74.0.3729.23',
 391         '73.0.3683.84',
 392         '74.0.3729.22',
 393         '74.0.3729.21',
 394         '75.0.3737.1',
 395         '75.0.3737.0',
 396         '74.0.3729.20',
 397         '73.0.3683.83',
 398         '74.0.3729.19',
 399         '75.0.3736.1',
 400         '75.0.3736.0',
 401         '74.0.3729.18',
 402         '73.0.3683.82',
 403         '74.0.3729.17',
 404         '75.0.3735.1',
 405         '75.0.3735.0',
 406         '74.0.3729.16',
 407         '73.0.3683.81',
 408         '75.0.3734.1',
 409         '75.0.3734.0',
 410         '74.0.3729.15',
 411         '73.0.3683.80',
 412         '74.0.3729.14',
 413         '75.0.3733.1',
 414         '75.0.3733.0',
 415         '75.0.3732.1',
 416         '74.0.3729.13',
 417         '74.0.3729.12',
 418         '73.0.3683.79',
 419         '74.0.3729.11',
 420         '75.0.3732.0',
 421         '74.0.3729.10',
 422         '73.0.3683.78',
 423         '74.0.3729.9',
 424         '74.0.3729.8',
 425         '74.0.3729.7',
 426         '75.0.3731.3',
 427         '75.0.3731.2',
 428         '75.0.3731.0',
 429         '74.0.3729.6',
 430         '73.0.3683.77',
 431         '73.0.3683.76',
 432         '75.0.3730.5',
 433         '75.0.3730.4',
 434         '73.0.3683.75',
 435         '74.0.3729.5',
 436         '73.0.3683.74',
 437         '75.0.3730.3',
 438         '75.0.3730.2',
 439         '74.0.3729.4',
 440         '73.0.3683.73',
 441         '73.0.3683.72',
 442         '75.0.3730.1',
 443         '75.0.3730.0',
 444         '74.0.3729.3',
 445         '73.0.3683.71',
 446         '74.0.3729.2',
 447         '73.0.3683.70',
 448         '74.0.3729.1',
 449         '74.0.3729.0',
 450         '74.0.3726.4',
 451         '73.0.3683.69',
 452         '74.0.3726.3',
 453         '74.0.3728.0',
 454         '74.0.3726.2',
 455         '73.0.3683.68',
 456         '74.0.3726.1',
 457         '74.0.3726.0',
 458         '74.0.3725.4',
 459         '73.0.3683.67',
 460         '73.0.3683.66',
 461         '74.0.3725.3',
 462         '74.0.3725.2',
 463         '74.0.3725.1',
 464         '74.0.3724.8',
 465         '74.0.3725.0',
 466         '73.0.3683.65',
 467         '74.0.3724.7',
 468         '74.0.3724.6',
 469         '74.0.3724.5',
 470         '74.0.3724.4',
 471         '74.0.3724.3',
 472         '74.0.3724.2',
 473         '74.0.3724.1',
 474         '74.0.3724.0',
 475         '73.0.3683.64',
 476         '74.0.3723.1',
 477         '74.0.3723.0',
 478         '73.0.3683.63',
 479         '74.0.3722.1',
 480         '74.0.3722.0',
 481         '73.0.3683.62',
 482         '74.0.3718.9',
 483         '74.0.3702.3',
 484         '74.0.3721.3',
 485         '74.0.3721.2',
 486         '74.0.3721.1',
 487         '74.0.3721.0',
 488         '74.0.3720.6',
 489         '73.0.3683.61',
 490         '72.0.3626.122',
 491         '73.0.3683.60',
 492         '74.0.3720.5',
 493         '72.0.3626.121',
 494         '74.0.3718.8',
 495         '74.0.3720.4',
 496         '74.0.3720.3',
 497         '74.0.3718.7',
 498         '74.0.3720.2',
 499         '74.0.3720.1',
 500         '74.0.3720.0',
 501         '74.0.3718.6',
 502         '74.0.3719.5',
 503         '73.0.3683.59',
 504         '74.0.3718.5',
 505         '74.0.3718.4',
 506         '74.0.3719.4',
 507         '74.0.3719.3',
 508         '74.0.3719.2',
 509         '74.0.3719.1',
 510         '73.0.3683.58',
 511         '74.0.3719.0',
 512         '73.0.3683.57',
 513         '73.0.3683.56',
 514         '74.0.3718.3',
 515         '73.0.3683.55',
 516         '74.0.3718.2',
 517         '74.0.3718.1',
 518         '74.0.3718.0',
 519         '73.0.3683.54',
 520         '74.0.3717.2',
 521         '73.0.3683.53',
 522         '74.0.3717.1',
 523         '74.0.3717.0',
 524         '73.0.3683.52',
 525         '74.0.3716.1',
 526         '74.0.3716.0',
 527         '73.0.3683.51',
 528         '74.0.3715.1',
 529         '74.0.3715.0',
 530         '73.0.3683.50',
 531         '74.0.3711.2',
 532         '74.0.3714.2',
 533         '74.0.3713.3',
 534         '74.0.3714.1',
 535         '74.0.3714.0',
 536         '73.0.3683.49',
 537         '74.0.3713.1',
 538         '74.0.3713.0',
 539         '72.0.3626.120',
 540         '73.0.3683.48',
 541         '74.0.3712.2',
 542         '74.0.3712.1',
 543         '74.0.3712.0',
 544         '73.0.3683.47',
 545         '72.0.3626.119',
 546         '73.0.3683.46',
 547         '74.0.3710.2',
 548         '72.0.3626.118',
 549         '74.0.3711.1',
 550         '74.0.3711.0',
 551         '73.0.3683.45',
 552         '72.0.3626.117',
 553         '74.0.3710.1',
 554         '74.0.3710.0',
 555         '73.0.3683.44',
 556         '72.0.3626.116',
 557         '74.0.3709.1',
 558         '74.0.3709.0',
 559         '74.0.3704.9',
 560         '73.0.3683.43',
 561         '72.0.3626.115',
 562         '74.0.3704.8',
 563         '74.0.3704.7',
 564         '74.0.3708.0',
 565         '74.0.3706.7',
 566         '74.0.3704.6',
 567         '73.0.3683.42',
 568         '72.0.3626.114',
 569         '74.0.3706.6',
 570         '72.0.3626.113',
 571         '74.0.3704.5',
 572         '74.0.3706.5',
 573         '74.0.3706.4',
 574         '74.0.3706.3',
 575         '74.0.3706.2',
 576         '74.0.3706.1',
 577         '74.0.3706.0',
 578         '73.0.3683.41',
 579         '72.0.3626.112',
 580         '74.0.3705.1',
 581         '74.0.3705.0',
 582         '73.0.3683.40',
 583         '72.0.3626.111',
 584         '73.0.3683.39',
 585         '74.0.3704.4',
 586         '73.0.3683.38',
 587         '74.0.3704.3',
 588         '74.0.3704.2',
 589         '74.0.3704.1',
 590         '74.0.3704.0',
 591         '73.0.3683.37',
 592         '72.0.3626.110',
 593         '72.0.3626.109',
 594         '74.0.3703.3',
 595         '74.0.3703.2',
 596         '73.0.3683.36',
 597         '74.0.3703.1',
 598         '74.0.3703.0',
 599         '73.0.3683.35',
 600         '72.0.3626.108',
 601         '74.0.3702.2',
 602         '74.0.3699.3',
 603         '74.0.3702.1',
 604         '74.0.3702.0',
 605         '73.0.3683.34',
 606         '72.0.3626.107',
 607         '73.0.3683.33',
 608         '74.0.3701.1',
 609         '74.0.3701.0',
 610         '73.0.3683.32',
 611         '73.0.3683.31',
 612         '72.0.3626.105',
 613         '74.0.3700.1',
 614         '74.0.3700.0',
 615         '73.0.3683.29',
 616         '72.0.3626.103',
 617         '74.0.3699.2',
 618         '74.0.3699.1',
 619         '74.0.3699.0',
 620         '73.0.3683.28',
 621         '72.0.3626.102',
 622         '73.0.3683.27',
 623         '73.0.3683.26',
 624         '74.0.3698.0',
 625         '74.0.3696.2',
 626         '72.0.3626.101',
 627         '73.0.3683.25',
 628         '74.0.3696.1',
 629         '74.0.3696.0',
 630         '74.0.3694.8',
 631         '72.0.3626.100',
 632         '74.0.3694.7',
 633         '74.0.3694.6',
 634         '74.0.3694.5',
 635         '74.0.3694.4',
 636         '72.0.3626.99',
 637         '72.0.3626.98',
 638         '74.0.3694.3',
 639         '73.0.3683.24',
 640         '72.0.3626.97',
 641         '72.0.3626.96',
 642         '72.0.3626.95',
 643         '73.0.3683.23',
 644         '72.0.3626.94',
 645         '73.0.3683.22',
 646         '73.0.3683.21',
 647         '72.0.3626.93',
 648         '74.0.3694.2',
 649         '72.0.3626.92',
 650         '74.0.3694.1',
 651         '74.0.3694.0',
 652         '74.0.3693.6',
 653         '73.0.3683.20',
 654         '72.0.3626.91',
 655         '74.0.3693.5',
 656         '74.0.3693.4',
 657         '74.0.3693.3',
 658         '74.0.3693.2',
 659         '73.0.3683.19',
 660         '74.0.3693.1',
 661         '74.0.3693.0',
 662         '73.0.3683.18',
 663         '72.0.3626.90',
 664         '74.0.3692.1',
 665         '74.0.3692.0',
 666         '73.0.3683.17',
 667         '72.0.3626.89',
 668         '74.0.3687.3',
 669         '74.0.3691.1',
 670         '74.0.3691.0',
 671         '73.0.3683.16',
 672         '72.0.3626.88',
 673         '72.0.3626.87',
 674         '73.0.3683.15',
 675         '74.0.3690.1',
 676         '74.0.3690.0',
 677         '73.0.3683.14',
 678         '72.0.3626.86',
 679         '73.0.3683.13',
 680         '73.0.3683.12',
 681         '74.0.3689.1',
 682         '74.0.3689.0',
 683         '73.0.3683.11',
 684         '72.0.3626.85',
 685         '73.0.3683.10',
 686         '72.0.3626.84',
 687         '73.0.3683.9',
 688         '74.0.3688.1',
 689         '74.0.3688.0',
 690         '73.0.3683.8',
 691         '72.0.3626.83',
 692         '74.0.3687.2',
 693         '74.0.3687.1',
 694         '74.0.3687.0',
 695         '73.0.3683.7',
 696         '72.0.3626.82',
 697         '74.0.3686.4',
 698         '72.0.3626.81',
 699         '74.0.3686.3',
 700         '74.0.3686.2',
 701         '74.0.3686.1',
 702         '74.0.3686.0',
 703         '73.0.3683.6',
 704         '72.0.3626.80',
 705         '74.0.3685.1',
 706         '74.0.3685.0',
 707         '73.0.3683.5',
 708         '72.0.3626.79',
 709         '74.0.3684.1',
 710         '74.0.3684.0',
 711         '73.0.3683.4',
 712         '72.0.3626.78',
 713         '72.0.3626.77',
 714         '73.0.3683.3',
 715         '73.0.3683.2',
 716         '72.0.3626.76',
 717         '73.0.3683.1',
 718         '73.0.3683.0',
 719         '72.0.3626.75',
 720         '71.0.3578.141',
 721         '73.0.3682.1',
 722         '73.0.3682.0',
 723         '72.0.3626.74',
 724         '71.0.3578.140',
 725         '73.0.3681.4',
 726         '73.0.3681.3',
 727         '73.0.3681.2',
 728         '73.0.3681.1',
 729         '73.0.3681.0',
 730         '72.0.3626.73',
 731         '71.0.3578.139',
 732         '72.0.3626.72',
 733         '72.0.3626.71',
 734         '73.0.3680.1',
 735         '73.0.3680.0',
 736         '72.0.3626.70',
 737         '71.0.3578.138',
 738         '73.0.3678.2',
 739         '73.0.3679.1',
 740         '73.0.3679.0',
 741         '72.0.3626.69',
 742         '71.0.3578.137',
 743         '73.0.3678.1',
 744         '73.0.3678.0',
 745         '71.0.3578.136',
 746         '73.0.3677.1',
 747         '73.0.3677.0',
 748         '72.0.3626.68',
 749         '72.0.3626.67',
 750         '71.0.3578.135',
 751         '73.0.3676.1',
 752         '73.0.3676.0',
 753         '73.0.3674.2',
 754         '72.0.3626.66',
 755         '71.0.3578.134',
 756         '73.0.3674.1',
 757         '73.0.3674.0',
 758         '72.0.3626.65',
 759         '71.0.3578.133',
 760         '73.0.3673.2',
 761         '73.0.3673.1',
 762         '73.0.3673.0',
 763         '72.0.3626.64',
 764         '71.0.3578.132',
 765         '72.0.3626.63',
 766         '72.0.3626.62',
 767         '72.0.3626.61',
 768         '72.0.3626.60',
 769         '73.0.3672.1',
 770         '73.0.3672.0',
 771         '72.0.3626.59',
 772         '71.0.3578.131',
 773         '73.0.3671.3',
 774         '73.0.3671.2',
 775         '73.0.3671.1',
 776         '73.0.3671.0',
 777         '72.0.3626.58',
 778         '71.0.3578.130',
 779         '73.0.3670.1',
 780         '73.0.3670.0',
 781         '72.0.3626.57',
 782         '71.0.3578.129',
 783         '73.0.3669.1',
 784         '73.0.3669.0',
 785         '72.0.3626.56',
 786         '71.0.3578.128',
 787         '73.0.3668.2',
 788         '73.0.3668.1',
 789         '73.0.3668.0',
 790         '72.0.3626.55',
 791         '71.0.3578.127',
 792         '73.0.3667.2',
 793         '73.0.3667.1',
 794         '73.0.3667.0',
 795         '72.0.3626.54',
 796         '71.0.3578.126',
 797         '73.0.3666.1',
 798         '73.0.3666.0',
 799         '72.0.3626.53',
 800         '71.0.3578.125',
 801         '73.0.3665.4',
 802         '73.0.3665.3',
 803         '72.0.3626.52',
 804         '73.0.3665.2',
 805         '73.0.3664.4',
 806         '73.0.3665.1',
 807         '73.0.3665.0',
 808         '72.0.3626.51',
 809         '71.0.3578.124',
 810         '72.0.3626.50',
 811         '73.0.3664.3',
 812         '73.0.3664.2',
 813         '73.0.3664.1',
 814         '73.0.3664.0',
 815         '73.0.3663.2',
 816         '72.0.3626.49',
 817         '71.0.3578.123',
 818         '73.0.3663.1',
 819         '73.0.3663.0',
 820         '72.0.3626.48',
 821         '71.0.3578.122',
 822         '73.0.3662.1',
 823         '73.0.3662.0',
 824         '72.0.3626.47',
 825         '71.0.3578.121',
 826         '73.0.3661.1',
 827         '72.0.3626.46',
 828         '73.0.3661.0',
 829         '72.0.3626.45',
 830         '71.0.3578.120',
 831         '73.0.3660.2',
 832         '73.0.3660.1',
 833         '73.0.3660.0',
 834         '72.0.3626.44',
 835         '71.0.3578.119',
 836         '73.0.3659.1',
 837         '73.0.3659.0',
 838         '72.0.3626.43',
 839         '71.0.3578.118',
 840         '73.0.3658.1',
 841         '73.0.3658.0',
 842         '72.0.3626.42',
 843         '71.0.3578.117',
 844         '73.0.3657.1',
 845         '73.0.3657.0',
 846         '72.0.3626.41',
 847         '71.0.3578.116',
 848         '73.0.3656.1',
 849         '73.0.3656.0',
 850         '72.0.3626.40',
 851         '71.0.3578.115',
 852         '73.0.3655.1',
 853         '73.0.3655.0',
 854         '72.0.3626.39',
 855         '71.0.3578.114',
 856         '73.0.3654.1',
 857         '73.0.3654.0',
 858         '72.0.3626.38',
 859         '71.0.3578.113',
 860         '73.0.3653.1',
 861         '73.0.3653.0',
 862         '72.0.3626.37',
 863         '71.0.3578.112',
 864         '73.0.3652.1',
 865         '73.0.3652.0',
 866         '72.0.3626.36',
 867         '71.0.3578.111',
 868         '73.0.3651.1',
 869         '73.0.3651.0',
 870         '72.0.3626.35',
 871         '71.0.3578.110',
 872         '73.0.3650.1',
 873         '73.0.3650.0',
 874         '72.0.3626.34',
 875         '71.0.3578.109',
 876         '73.0.3649.1',
 877         '73.0.3649.0',
 878         '72.0.3626.33',
 879         '71.0.3578.108',
 880         '73.0.3648.2',
 881         '73.0.3648.1',
 882         '73.0.3648.0',
 883         '72.0.3626.32',
 884         '71.0.3578.107',
 885         '73.0.3647.2',
 886         '73.0.3647.1',
 887         '73.0.3647.0',
 888         '72.0.3626.31',
 889         '71.0.3578.106',
 890         '73.0.3635.3',
 891         '73.0.3646.2',
 892         '73.0.3646.1',
 893         '73.0.3646.0',
 894         '72.0.3626.30',
 895         '71.0.3578.105',
 896         '72.0.3626.29',
 897         '73.0.3645.2',
 898         '73.0.3645.1',
 899         '73.0.3645.0',
 900         '72.0.3626.28',
 901         '71.0.3578.104',
 902         '72.0.3626.27',
 903         '72.0.3626.26',
 904         '72.0.3626.25',
 905         '72.0.3626.24',
 906         '73.0.3644.0',
 907         '73.0.3643.2',
 908         '72.0.3626.23',
 909         '71.0.3578.103',
 910         '73.0.3643.1',
 911         '73.0.3643.0',
 912         '72.0.3626.22',
 913         '71.0.3578.102',
 914         '73.0.3642.1',
 915         '73.0.3642.0',
 916         '72.0.3626.21',
 917         '71.0.3578.101',
 918         '73.0.3641.1',
 919         '73.0.3641.0',
 920         '72.0.3626.20',
 921         '71.0.3578.100',
 922         '72.0.3626.19',
 923         '73.0.3640.1',
 924         '73.0.3640.0',
 925         '72.0.3626.18',
 926         '73.0.3639.1',
 927         '71.0.3578.99',
 928         '73.0.3639.0',
 929         '72.0.3626.17',
 930         '73.0.3638.2',
 931         '72.0.3626.16',
 932         '73.0.3638.1',
 933         '73.0.3638.0',
 934         '72.0.3626.15',
 935         '71.0.3578.98',
 936         '73.0.3635.2',
 937         '71.0.3578.97',
 938         '73.0.3637.1',
 939         '73.0.3637.0',
 940         '72.0.3626.14',
 941         '71.0.3578.96',
 942         '71.0.3578.95',
 943         '72.0.3626.13',
 944         '71.0.3578.94',
 945         '73.0.3636.2',
 946         '71.0.3578.93',
 947         '73.0.3636.1',
 948         '73.0.3636.0',
 949         '72.0.3626.12',
 950         '71.0.3578.92',
 951         '73.0.3635.1',
 952         '73.0.3635.0',
 953         '72.0.3626.11',
 954         '71.0.3578.91',
 955         '73.0.3634.2',
 956         '73.0.3634.1',
 957         '73.0.3634.0',
 958         '72.0.3626.10',
 959         '71.0.3578.90',
 960         '71.0.3578.89',
 961         '73.0.3633.2',
 962         '73.0.3633.1',
 963         '73.0.3633.0',
 964         '72.0.3610.4',
 965         '72.0.3626.9',
 966         '71.0.3578.88',
 967         '73.0.3632.5',
 968         '73.0.3632.4',
 969         '73.0.3632.3',
 970         '73.0.3632.2',
 971         '73.0.3632.1',
 972         '73.0.3632.0',
 973         '72.0.3626.8',
 974         '71.0.3578.87',
 975         '73.0.3631.2',
 976         '73.0.3631.1',
 977         '73.0.3631.0',
 978         '72.0.3626.7',
 979         '71.0.3578.86',
 980         '72.0.3626.6',
 981         '73.0.3630.1',
 982         '73.0.3630.0',
 983         '72.0.3626.5',
 984         '71.0.3578.85',
 985         '72.0.3626.4',
 986         '73.0.3628.3',
 987         '73.0.3628.2',
 988         '73.0.3629.1',
 989         '73.0.3629.0',
 990         '72.0.3626.3',
 991         '71.0.3578.84',
 992         '73.0.3628.1',
 993         '73.0.3628.0',
 994         '71.0.3578.83',
 995         '73.0.3627.1',
 996         '73.0.3627.0',
 997         '72.0.3626.2',
 998         '71.0.3578.82',
 999         '71.0.3578.81',
1000         '71.0.3578.80',
1001         '72.0.3626.1',
1002         '72.0.3626.0',
1003         '71.0.3578.79',
1004         '70.0.3538.124',
1005         '71.0.3578.78',
1006         '72.0.3623.4',
1007         '72.0.3625.2',
1008         '72.0.3625.1',
1009         '72.0.3625.0',
1010         '71.0.3578.77',
1011         '70.0.3538.123',
1012         '72.0.3624.4',
1013         '72.0.3624.3',
1014         '72.0.3624.2',
1015         '71.0.3578.76',
1016         '72.0.3624.1',
1017         '72.0.3624.0',
1018         '72.0.3623.3',
1019         '71.0.3578.75',
1020         '70.0.3538.122',
1021         '71.0.3578.74',
1022         '72.0.3623.2',
1023         '72.0.3610.3',
1024         '72.0.3623.1',
1025         '72.0.3623.0',
1026         '72.0.3622.3',
1027         '72.0.3622.2',
1028         '71.0.3578.73',
1029         '70.0.3538.121',
1030         '72.0.3622.1',
1031         '72.0.3622.0',
1032         '71.0.3578.72',
1033         '70.0.3538.120',
1034         '72.0.3621.1',
1035         '72.0.3621.0',
1036         '71.0.3578.71',
1037         '70.0.3538.119',
1038         '72.0.3620.1',
1039         '72.0.3620.0',
1040         '71.0.3578.70',
1041         '70.0.3538.118',
1042         '71.0.3578.69',
1043         '72.0.3619.1',
1044         '72.0.3619.0',
1045         '71.0.3578.68',
1046         '70.0.3538.117',
1047         '71.0.3578.67',
1048         '72.0.3618.1',
1049         '72.0.3618.0',
1050         '71.0.3578.66',
1051         '70.0.3538.116',
1052         '72.0.3617.1',
1053         '72.0.3617.0',
1054         '71.0.3578.65',
1055         '70.0.3538.115',
1056         '72.0.3602.3',
1057         '71.0.3578.64',
1058         '72.0.3616.1',
1059         '72.0.3616.0',
1060         '71.0.3578.63',
1061         '70.0.3538.114',
1062         '71.0.3578.62',
1063         '72.0.3615.1',
1064         '72.0.3615.0',
1065         '71.0.3578.61',
1066         '70.0.3538.113',
1067         '72.0.3614.1',
1068         '72.0.3614.0',
1069         '71.0.3578.60',
1070         '70.0.3538.112',
1071         '72.0.3613.1',
1072         '72.0.3613.0',
1073         '71.0.3578.59',
1074         '70.0.3538.111',
1075         '72.0.3612.2',
1076         '72.0.3612.1',
1077         '72.0.3612.0',
1078         '70.0.3538.110',
1079         '71.0.3578.58',
1080         '70.0.3538.109',
1081         '72.0.3611.2',
1082         '72.0.3611.1',
1083         '72.0.3611.0',
1084         '71.0.3578.57',
1085         '70.0.3538.108',
1086         '72.0.3610.2',
1087         '71.0.3578.56',
1088         '71.0.3578.55',
1089         '72.0.3610.1',
1090         '72.0.3610.0',
1091         '71.0.3578.54',
1092         '70.0.3538.107',
1093         '71.0.3578.53',
1094         '72.0.3609.3',
1095         '71.0.3578.52',
1096         '72.0.3609.2',
1097         '71.0.3578.51',
1098         '72.0.3608.5',
1099         '72.0.3609.1',
1100         '72.0.3609.0',
1101         '71.0.3578.50',
1102         '70.0.3538.106',
1103         '72.0.3608.4',
1104         '72.0.3608.3',
1105         '72.0.3608.2',
1106         '71.0.3578.49',
1107         '72.0.3608.1',
1108         '72.0.3608.0',
1109         '70.0.3538.105',
1110         '71.0.3578.48',
1111         '72.0.3607.1',
1112         '72.0.3607.0',
1113         '71.0.3578.47',
1114         '70.0.3538.104',
1115         '72.0.3606.2',
1116         '72.0.3606.1',
1117         '72.0.3606.0',
1118         '71.0.3578.46',
1119         '70.0.3538.103',
1120         '70.0.3538.102',
1121         '72.0.3605.3',
1122         '72.0.3605.2',
1123         '72.0.3605.1',
1124         '72.0.3605.0',
1125         '71.0.3578.45',
1126         '70.0.3538.101',
1127         '71.0.3578.44',
1128         '71.0.3578.43',
1129         '70.0.3538.100',
1130         '70.0.3538.99',
1131         '71.0.3578.42',
1132         '72.0.3604.1',
1133         '72.0.3604.0',
1134         '71.0.3578.41',
1135         '70.0.3538.98',
1136         '71.0.3578.40',
1137         '72.0.3603.2',
1138         '72.0.3603.1',
1139         '72.0.3603.0',
1140         '71.0.3578.39',
1141         '70.0.3538.97',
1142         '72.0.3602.2',
1143         '71.0.3578.38',
1144         '71.0.3578.37',
1145         '72.0.3602.1',
1146         '72.0.3602.0',
1147         '71.0.3578.36',
1148         '70.0.3538.96',
1149         '72.0.3601.1',
1150         '72.0.3601.0',
1151         '71.0.3578.35',
1152         '70.0.3538.95',
1153         '72.0.3600.1',
1154         '72.0.3600.0',
1155         '71.0.3578.34',
1156         '70.0.3538.94',
1157         '72.0.3599.3',
1158         '72.0.3599.2',
1159         '72.0.3599.1',
1160         '72.0.3599.0',
1161         '71.0.3578.33',
1162         '70.0.3538.93',
1163         '72.0.3598.1',
1164         '72.0.3598.0',
1165         '71.0.3578.32',
1166         '70.0.3538.87',
1167         '72.0.3597.1',
1168         '72.0.3597.0',
1169         '72.0.3596.2',
1170         '71.0.3578.31',
1171         '70.0.3538.86',
1172         '71.0.3578.30',
1173         '71.0.3578.29',
1174         '72.0.3596.1',
1175         '72.0.3596.0',
1176         '71.0.3578.28',
1177         '70.0.3538.85',
1178         '72.0.3595.2',
1179         '72.0.3591.3',
1180         '72.0.3595.1',
1181         '72.0.3595.0',
1182         '71.0.3578.27',
1183         '70.0.3538.84',
1184         '72.0.3594.1',
1185         '72.0.3594.0',
1186         '71.0.3578.26',
1187         '70.0.3538.83',
1188         '72.0.3593.2',
1189         '72.0.3593.1',
1190         '72.0.3593.0',
1191         '71.0.3578.25',
1192         '70.0.3538.82',
1193         '72.0.3589.3',
1194         '72.0.3592.2',
1195         '72.0.3592.1',
1196         '72.0.3592.0',
1197         '71.0.3578.24',
1198         '72.0.3589.2',
1199         '70.0.3538.81',
1200         '70.0.3538.80',
1201         '72.0.3591.2',
1202         '72.0.3591.1',
1203         '72.0.3591.0',
1204         '71.0.3578.23',
1205         '70.0.3538.79',
1206         '71.0.3578.22',
1207         '72.0.3590.1',
1208         '72.0.3590.0',
1209         '71.0.3578.21',
1210         '70.0.3538.78',
1211         '70.0.3538.77',
1212         '72.0.3589.1',
1213         '72.0.3589.0',
1214         '71.0.3578.20',
1215         '70.0.3538.76',
1216         '71.0.3578.19',
1217         '70.0.3538.75',
1218         '72.0.3588.1',
1219         '72.0.3588.0',
1220         '71.0.3578.18',
1221         '70.0.3538.74',
1222         '72.0.3586.2',
1223         '72.0.3587.0',
1224         '71.0.3578.17',
1225         '70.0.3538.73',
1226         '72.0.3586.1',
1227         '72.0.3586.0',
1228         '71.0.3578.16',
1229         '70.0.3538.72',
1230         '72.0.3585.1',
1231         '72.0.3585.0',
1232         '71.0.3578.15',
1233         '70.0.3538.71',
1234         '71.0.3578.14',
1235         '72.0.3584.1',
1236         '72.0.3584.0',
1237         '71.0.3578.13',
1238         '70.0.3538.70',
1239         '72.0.3583.2',
1240         '71.0.3578.12',
1241         '72.0.3583.1',
1242         '72.0.3583.0',
1243         '71.0.3578.11',
1244         '70.0.3538.69',
1245         '71.0.3578.10',
1246         '72.0.3582.0',
1247         '72.0.3581.4',
1248         '71.0.3578.9',
1249         '70.0.3538.67',
1250         '72.0.3581.3',
1251         '72.0.3581.2',
1252         '72.0.3581.1',
1253         '72.0.3581.0',
1254         '71.0.3578.8',
1255         '70.0.3538.66',
1256         '72.0.3580.1',
1257         '72.0.3580.0',
1258         '71.0.3578.7',
1259         '70.0.3538.65',
1260         '71.0.3578.6',
1261         '72.0.3579.1',
1262         '72.0.3579.0',
1263         '71.0.3578.5',
1264         '70.0.3538.64',
1265         '71.0.3578.4',
1266         '71.0.3578.3',
1267         '71.0.3578.2',
1268         '71.0.3578.1',
1269         '71.0.3578.0',
1270         '70.0.3538.63',
1271         '69.0.3497.128',
1272         '70.0.3538.62',
1273         '70.0.3538.61',
1274         '70.0.3538.60',
1275         '70.0.3538.59',
1276         '71.0.3577.1',
1277         '71.0.3577.0',
1278         '70.0.3538.58',
1279         '69.0.3497.127',
1280         '71.0.3576.2',
1281         '71.0.3576.1',
1282         '71.0.3576.0',
1283         '70.0.3538.57',
1284         '70.0.3538.56',
1285         '71.0.3575.2',
1286         '70.0.3538.55',
1287         '69.0.3497.126',
1288         '70.0.3538.54',
1289         '71.0.3575.1',
1290         '71.0.3575.0',
1291         '71.0.3574.1',
1292         '71.0.3574.0',
1293         '70.0.3538.53',
1294         '69.0.3497.125',
1295         '70.0.3538.52',
1296         '71.0.3573.1',
1297         '71.0.3573.0',
1298         '70.0.3538.51',
1299         '69.0.3497.124',
1300         '71.0.3572.1',
1301         '71.0.3572.0',
1302         '70.0.3538.50',
1303         '69.0.3497.123',
1304         '71.0.3571.2',
1305         '70.0.3538.49',
1306         '69.0.3497.122',
1307         '71.0.3571.1',
1308         '71.0.3571.0',
1309         '70.0.3538.48',
1310         '69.0.3497.121',
1311         '71.0.3570.1',
1312         '71.0.3570.0',
1313         '70.0.3538.47',
1314         '69.0.3497.120',
1315         '71.0.3568.2',
1316         '71.0.3569.1',
1317         '71.0.3569.0',
1318         '70.0.3538.46',
1319         '69.0.3497.119',
1320         '70.0.3538.45',
1321         '71.0.3568.1',
1322         '71.0.3568.0',
1323         '70.0.3538.44',
1324         '69.0.3497.118',
1325         '70.0.3538.43',
1326         '70.0.3538.42',
1327         '71.0.3567.1',
1328         '71.0.3567.0',
1329         '70.0.3538.41',
1330         '69.0.3497.117',
1331         '71.0.3566.1',
1332         '71.0.3566.0',
1333         '70.0.3538.40',
1334         '69.0.3497.116',
1335         '71.0.3565.1',
1336         '71.0.3565.0',
1337         '70.0.3538.39',
1338         '69.0.3497.115',
1339         '71.0.3564.1',
1340         '71.0.3564.0',
1341         '70.0.3538.38',
1342         '69.0.3497.114',
1343         '71.0.3563.0',
1344         '71.0.3562.2',
1345         '70.0.3538.37',
1346         '69.0.3497.113',
1347         '70.0.3538.36',
1348         '70.0.3538.35',
1349         '71.0.3562.1',
1350         '71.0.3562.0',
1351         '70.0.3538.34',
1352         '69.0.3497.112',
1353         '70.0.3538.33',
1354         '71.0.3561.1',
1355         '71.0.3561.0',
1356         '70.0.3538.32',
1357         '69.0.3497.111',
1358         '71.0.3559.6',
1359         '71.0.3560.1',
1360         '71.0.3560.0',
1361         '71.0.3559.5',
1362         '71.0.3559.4',
1363         '70.0.3538.31',
1364         '69.0.3497.110',
1365         '71.0.3559.3',
1366         '70.0.3538.30',
1367         '69.0.3497.109',
1368         '71.0.3559.2',
1369         '71.0.3559.1',
1370         '71.0.3559.0',
1371         '70.0.3538.29',
1372         '69.0.3497.108',
1373         '71.0.3558.2',
1374         '71.0.3558.1',
1375         '71.0.3558.0',
1376         '70.0.3538.28',
1377         '69.0.3497.107',
1378         '71.0.3557.2',
1379         '71.0.3557.1',
1380         '71.0.3557.0',
1381         '70.0.3538.27',
1382         '69.0.3497.106',
1383         '71.0.3554.4',
1384         '70.0.3538.26',
1385         '71.0.3556.1',
1386         '71.0.3556.0',
1387         '70.0.3538.25',
1388         '71.0.3554.3',
1389         '69.0.3497.105',
1390         '71.0.3554.2',
1391         '70.0.3538.24',
1392         '69.0.3497.104',
1393         '71.0.3555.2',
1394         '70.0.3538.23',
1395         '71.0.3555.1',
1396         '71.0.3555.0',
1397         '70.0.3538.22',
1398         '69.0.3497.103',
1399         '71.0.3554.1',
1400         '71.0.3554.0',
1401         '70.0.3538.21',
1402         '69.0.3497.102',
1403         '71.0.3553.3',
1404         '70.0.3538.20',
1405         '69.0.3497.101',
1406         '71.0.3553.2',
1407         '69.0.3497.100',
1408         '71.0.3553.1',
1409         '71.0.3553.0',
1410         '70.0.3538.19',
1411         '69.0.3497.99',
1412         '69.0.3497.98',
1413         '69.0.3497.97',
1414         '71.0.3552.6',
1415         '71.0.3552.5',
1416         '71.0.3552.4',
1417         '71.0.3552.3',
1418         '71.0.3552.2',
1419         '71.0.3552.1',
1420         '71.0.3552.0',
1421         '70.0.3538.18',
1422         '69.0.3497.96',
1423         '71.0.3551.3',
1424         '71.0.3551.2',
1425         '71.0.3551.1',
1426         '71.0.3551.0',
1427         '70.0.3538.17',
1428         '69.0.3497.95',
1429         '71.0.3550.3',
1430         '71.0.3550.2',
1431         '71.0.3550.1',
1432         '71.0.3550.0',
1433         '70.0.3538.16',
1434         '69.0.3497.94',
1435         '71.0.3549.1',
1436         '71.0.3549.0',
1437         '70.0.3538.15',
1438         '69.0.3497.93',
1439         '69.0.3497.92',
1440         '71.0.3548.1',
1441         '71.0.3548.0',
1442         '70.0.3538.14',
1443         '69.0.3497.91',
1444         '71.0.3547.1',
1445         '71.0.3547.0',
1446         '70.0.3538.13',
1447         '69.0.3497.90',
1448         '71.0.3546.2',
1449         '69.0.3497.89',
1450         '71.0.3546.1',
1451         '71.0.3546.0',
1452         '70.0.3538.12',
1453         '69.0.3497.88',
1454         '71.0.3545.4',
1455         '71.0.3545.3',
1456         '71.0.3545.2',
1457         '71.0.3545.1',
1458         '71.0.3545.0',
1459         '70.0.3538.11',
1460         '69.0.3497.87',
1461         '71.0.3544.5',
1462         '71.0.3544.4',
1463         '71.0.3544.3',
1464         '71.0.3544.2',
1465         '71.0.3544.1',
1466         '71.0.3544.0',
1467         '69.0.3497.86',
1468         '70.0.3538.10',
1469         '69.0.3497.85',
1470         '70.0.3538.9',
1471         '69.0.3497.84',
1472         '71.0.3543.4',
1473         '70.0.3538.8',
1474         '71.0.3543.3',
1475         '71.0.3543.2',
1476         '71.0.3543.1',
1477         '71.0.3543.0',
1478         '70.0.3538.7',
1479         '69.0.3497.83',
1480         '71.0.3542.2',
1481         '71.0.3542.1',
1482         '71.0.3542.0',
1483         '70.0.3538.6',
1484         '69.0.3497.82',
1485         '69.0.3497.81',
1486         '71.0.3541.1',
1487         '71.0.3541.0',
1488         '70.0.3538.5',
1489         '69.0.3497.80',
1490         '71.0.3540.1',
1491         '71.0.3540.0',
1492         '70.0.3538.4',
1493         '69.0.3497.79',
1494         '70.0.3538.3',
1495         '71.0.3539.1',
1496         '71.0.3539.0',
1497         '69.0.3497.78',
1498         '68.0.3440.134',
1499         '69.0.3497.77',
1500         '70.0.3538.2',
1501         '70.0.3538.1',
1502         '70.0.3538.0',
1503         '69.0.3497.76',
1504         '68.0.3440.133',
1505         '69.0.3497.75',
1506         '70.0.3537.2',
1507         '70.0.3537.1',
1508         '70.0.3537.0',
1509         '69.0.3497.74',
1510         '68.0.3440.132',
1511         '70.0.3536.0',
1512         '70.0.3535.5',
1513         '70.0.3535.4',
1514         '70.0.3535.3',
1515         '69.0.3497.73',
1516         '68.0.3440.131',
1517         '70.0.3532.8',
1518         '70.0.3532.7',
1519         '69.0.3497.72',
1520         '69.0.3497.71',
1521         '70.0.3535.2',
1522         '70.0.3535.1',
1523         '70.0.3535.0',
1524         '69.0.3497.70',
1525         '68.0.3440.130',
1526         '69.0.3497.69',
1527         '68.0.3440.129',
1528         '70.0.3534.4',
1529         '70.0.3534.3',
1530         '70.0.3534.2',
1531         '70.0.3534.1',
1532         '70.0.3534.0',
1533         '69.0.3497.68',
1534         '68.0.3440.128',
1535         '70.0.3533.2',
1536         '70.0.3533.1',
1537         '70.0.3533.0',
1538         '69.0.3497.67',
1539         '68.0.3440.127',
1540         '70.0.3532.6',
1541         '70.0.3532.5',
1542         '70.0.3532.4',
1543         '69.0.3497.66',
1544         '68.0.3440.126',
1545         '70.0.3532.3',
1546         '70.0.3532.2',
1547         '70.0.3532.1',
1548         '69.0.3497.60',
1549         '69.0.3497.65',
1550         '69.0.3497.64',
1551         '70.0.3532.0',
1552         '70.0.3531.0',
1553         '70.0.3530.4',
1554         '70.0.3530.3',
1555         '70.0.3530.2',
1556         '69.0.3497.58',
1557         '68.0.3440.125',
1558         '69.0.3497.57',
1559         '69.0.3497.56',
1560         '69.0.3497.55',
1561         '69.0.3497.54',
1562         '70.0.3530.1',
1563         '70.0.3530.0',
1564         '69.0.3497.53',
1565         '68.0.3440.124',
1566         '69.0.3497.52',
1567         '70.0.3529.3',
1568         '70.0.3529.2',
1569         '70.0.3529.1',
1570         '70.0.3529.0',
1571         '69.0.3497.51',
1572         '70.0.3528.4',
1573         '68.0.3440.123',
1574         '70.0.3528.3',
1575         '70.0.3528.2',
1576         '70.0.3528.1',
1577         '70.0.3528.0',
1578         '69.0.3497.50',
1579         '68.0.3440.122',
1580         '70.0.3527.1',
1581         '70.0.3527.0',
1582         '69.0.3497.49',
1583         '68.0.3440.121',
1584         '70.0.3526.1',
1585         '70.0.3526.0',
1586         '68.0.3440.120',
1587         '69.0.3497.48',
1588         '69.0.3497.47',
1589         '68.0.3440.119',
1590         '68.0.3440.118',
1591         '70.0.3525.5',
1592         '70.0.3525.4',
1593         '70.0.3525.3',
1594         '68.0.3440.117',
1595         '69.0.3497.46',
1596         '70.0.3525.2',
1597         '70.0.3525.1',
1598         '70.0.3525.0',
1599         '69.0.3497.45',
1600         '68.0.3440.116',
1601         '70.0.3524.4',
1602         '70.0.3524.3',
1603         '69.0.3497.44',
1604         '70.0.3524.2',
1605         '70.0.3524.1',
1606         '70.0.3524.0',
1607         '70.0.3523.2',
1608         '69.0.3497.43',
1609         '68.0.3440.115',
1610         '70.0.3505.9',
1611         '69.0.3497.42',
1612         '70.0.3505.8',
1613         '70.0.3523.1',
1614         '70.0.3523.0',
1615         '69.0.3497.41',
1616         '68.0.3440.114',
1617         '70.0.3505.7',
1618         '69.0.3497.40',
1619         '70.0.3522.1',
1620         '70.0.3522.0',
1621         '70.0.3521.2',
1622         '69.0.3497.39',
1623         '68.0.3440.113',
1624         '70.0.3505.6',
1625         '70.0.3521.1',
1626         '70.0.3521.0',
1627         '69.0.3497.38',
1628         '68.0.3440.112',
1629         '70.0.3520.1',
1630         '70.0.3520.0',
1631         '69.0.3497.37',
1632         '68.0.3440.111',
1633         '70.0.3519.3',
1634         '70.0.3519.2',
1635         '70.0.3519.1',
1636         '70.0.3519.0',
1637         '69.0.3497.36',
1638         '68.0.3440.110',
1639         '70.0.3518.1',
1640         '70.0.3518.0',
1641         '69.0.3497.35',
1642         '69.0.3497.34',
1643         '68.0.3440.109',
1644         '70.0.3517.1',
1645         '70.0.3517.0',
1646         '69.0.3497.33',
1647         '68.0.3440.108',
1648         '69.0.3497.32',
1649         '70.0.3516.3',
1650         '70.0.3516.2',
1651         '70.0.3516.1',
1652         '70.0.3516.0',
1653         '69.0.3497.31',
1654         '68.0.3440.107',
1655         '70.0.3515.4',
1656         '68.0.3440.106',
1657         '70.0.3515.3',
1658         '70.0.3515.2',
1659         '70.0.3515.1',
1660         '70.0.3515.0',
1661         '69.0.3497.30',
1662         '68.0.3440.105',
1663         '68.0.3440.104',
1664         '70.0.3514.2',
1665         '70.0.3514.1',
1666         '70.0.3514.0',
1667         '69.0.3497.29',
1668         '68.0.3440.103',
1669         '70.0.3513.1',
1670         '70.0.3513.0',
1671         '69.0.3497.28',
1672     )
1673     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677     'User-Agent': random_user_agent(),
1678     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680     'Accept-Encoding': 'gzip, deflate',
1681     'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693     'January', 'February', 'March', 'April', 'May', 'June',
1694     'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697     'en': ENGLISH_MONTH_NAMES,
1698     'fr': [
1699         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705     'flv', 'f4v', 'f4a', 'f4b',
1706     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707     'mkv', 'mka', 'mk3d',
1708     'avi', 'divx',
1709     'mov',
1710     'asf', 'wmv', 'wma',
1711     '3gp', '3g2',
1712     'mp3',
1713     'flac',
1714     'ape',
1715     'wav',
1716     'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
1720 # needed for sanitizing filenames in restricted mode
1721 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1722                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1724
1725 DATE_FORMATS = (
1726     '%d %B %Y',
1727     '%d %b %Y',
1728     '%B %d %Y',
1729     '%B %dst %Y',
1730     '%B %dnd %Y',
1731     '%B %drd %Y',
1732     '%B %dth %Y',
1733     '%b %d %Y',
1734     '%b %dst %Y',
1735     '%b %dnd %Y',
1736     '%b %drd %Y',
1737     '%b %dth %Y',
1738     '%b %dst %Y %I:%M',
1739     '%b %dnd %Y %I:%M',
1740     '%b %drd %Y %I:%M',
1741     '%b %dth %Y %I:%M',
1742     '%Y %m %d',
1743     '%Y-%m-%d',
1744     '%Y/%m/%d',
1745     '%Y/%m/%d %H:%M',
1746     '%Y/%m/%d %H:%M:%S',
1747     '%Y-%m-%d %H:%M',
1748     '%Y-%m-%d %H:%M:%S',
1749     '%Y-%m-%d %H:%M:%S.%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     # Handle timestamps
2111     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2112     result = ''.join(map(replace_insane, s))
2113     if not is_id:
2114         while '__' in result:
2115             result = result.replace('__', '_')
2116         result = result.strip('_')
2117         # Common case of "Foreign band name - English song title"
2118         if restricted and result.startswith('-_'):
2119             result = result[2:]
2120         if result.startswith('-'):
2121             result = '_' + result[len('-'):]
2122         result = result.lstrip('.')
2123         if not result:
2124             result = '_'
2125     return result
2126
2127
2128 def sanitize_path(s, force=False):
2129     """Sanitizes and normalizes path on Windows"""
2130     if sys.platform == 'win32':
2131         force = False
2132         drive_or_unc, _ = os.path.splitdrive(s)
2133         if sys.version_info < (2, 7) and not drive_or_unc:
2134             drive_or_unc, _ = os.path.splitunc(s)
2135     elif force:
2136         drive_or_unc = ''
2137     else:
2138         return s
2139
2140     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141     if drive_or_unc:
2142         norm_path.pop(0)
2143     sanitized_path = [
2144         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2145         for path_part in norm_path]
2146     if drive_or_unc:
2147         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2148     elif force and s[0] == os.path.sep:
2149         sanitized_path.insert(0, os.path.sep)
2150     return os.path.join(*sanitized_path)
2151
2152
2153 def sanitize_url(url):
2154     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155     # the number of unwanted failures due to missing protocol
2156     if url.startswith('//'):
2157         return 'http:%s' % url
2158     # Fix some common typos seen so far
2159     COMMON_TYPOS = (
2160         # https://github.com/ytdl-org/youtube-dl/issues/15649
2161         (r'^httpss://', r'https://'),
2162         # https://bx1.be/lives/direct-tv/
2163         (r'^rmtp([es]?)://', r'rtmp\1://'),
2164     )
2165     for mistake, fixup in COMMON_TYPOS:
2166         if re.match(mistake, url):
2167             return re.sub(mistake, fixup, url)
2168     return url
2169
2170
2171 def sanitized_Request(url, *args, **kwargs):
2172     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2173
2174
2175 def expand_path(s):
2176     """Expand shell variables and ~"""
2177     return os.path.expandvars(compat_expanduser(s))
2178
2179
2180 def orderedSet(iterable):
2181     """ Remove all duplicates from the input iterable """
2182     res = []
2183     for el in iterable:
2184         if el not in res:
2185             res.append(el)
2186     return res
2187
2188
2189 def _htmlentity_transform(entity_with_semicolon):
2190     """Transforms an HTML entity to a character."""
2191     entity = entity_with_semicolon[:-1]
2192
2193     # Known non-numeric HTML entity
2194     if entity in compat_html_entities.name2codepoint:
2195         return compat_chr(compat_html_entities.name2codepoint[entity])
2196
2197     # TODO: HTML5 allows entities without a semicolon. For example,
2198     # '&Eacuteric' should be decoded as 'Éric'.
2199     if entity_with_semicolon in compat_html_entities_html5:
2200         return compat_html_entities_html5[entity_with_semicolon]
2201
2202     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2203     if mobj is not None:
2204         numstr = mobj.group(1)
2205         if numstr.startswith('x'):
2206             base = 16
2207             numstr = '0%s' % numstr
2208         else:
2209             base = 10
2210         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2211         try:
2212             return compat_chr(int(numstr, base))
2213         except ValueError:
2214             pass
2215
2216     # Unknown entity in name, return its literal representation
2217     return '&%s;' % entity
2218
2219
2220 def unescapeHTML(s):
2221     if s is None:
2222         return None
2223     assert type(s) == compat_str
2224
2225     return re.sub(
2226         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2227
2228
2229 def process_communicate_or_kill(p, *args, **kwargs):
2230     try:
2231         return p.communicate(*args, **kwargs)
2232     except BaseException:  # Including KeyboardInterrupt
2233         p.kill()
2234         p.wait()
2235         raise
2236
2237
2238 def get_subprocess_encoding():
2239     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240         # For subprocess calls, encode with locale encoding
2241         # Refer to http://stackoverflow.com/a/9951851/35070
2242         encoding = preferredencoding()
2243     else:
2244         encoding = sys.getfilesystemencoding()
2245     if encoding is None:
2246         encoding = 'utf-8'
2247     return encoding
2248
2249
2250 def encodeFilename(s, for_subprocess=False):
2251     """
2252     @param s The name of the file
2253     """
2254
2255     assert type(s) == compat_str
2256
2257     # Python 3 has a Unicode API
2258     if sys.version_info >= (3, 0):
2259         return s
2260
2261     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265         return s
2266
2267     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268     if sys.platform.startswith('java'):
2269         return s
2270
2271     return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def decodeFilename(b, for_subprocess=False):
2275
2276     if sys.version_info >= (3, 0):
2277         return b
2278
2279     if not isinstance(b, bytes):
2280         return b
2281
2282     return b.decode(get_subprocess_encoding(), 'ignore')
2283
2284
2285 def encodeArgument(s):
2286     if not isinstance(s, compat_str):
2287         # Legacy code that uses byte strings
2288         # Uncomment the following line after fixing all post processors
2289         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2290         s = s.decode('ascii')
2291     return encodeFilename(s, True)
2292
2293
2294 def decodeArgument(b):
2295     return decodeFilename(b, True)
2296
2297
2298 def decodeOption(optval):
2299     if optval is None:
2300         return optval
2301     if isinstance(optval, bytes):
2302         optval = optval.decode(preferredencoding())
2303
2304     assert isinstance(optval, compat_str)
2305     return optval
2306
2307
2308 def formatSeconds(secs, delim=':'):
2309     if secs > 3600:
2310         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2311     elif secs > 60:
2312         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2313     else:
2314         return '%d' % secs
2315
2316
2317 def make_HTTPS_handler(params, **kwargs):
2318     opts_no_check_certificate = params.get('nocheckcertificate', False)
2319     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2320         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2321         if opts_no_check_certificate:
2322             context.check_hostname = False
2323             context.verify_mode = ssl.CERT_NONE
2324         try:
2325             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2326         except TypeError:
2327             # Python 2.7.8
2328             # (create_default_context present but HTTPSHandler has no context=)
2329             pass
2330
2331     if sys.version_info < (3, 2):
2332         return YoutubeDLHTTPSHandler(params, **kwargs)
2333     else:  # Python < 3.4
2334         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2335         context.verify_mode = (ssl.CERT_NONE
2336                                if opts_no_check_certificate
2337                                else ssl.CERT_REQUIRED)
2338         context.set_default_verify_paths()
2339         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2340
2341
2342 def bug_reports_message():
2343     if ytdl_is_updateable():
2344         update_cmd = 'type  yt-dlp -U  to update'
2345     else:
2346         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2347     msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
2348     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2349     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2350     return msg
2351
2352
2353 class YoutubeDLError(Exception):
2354     """Base exception for YoutubeDL errors."""
2355     pass
2356
2357
2358 class ExtractorError(YoutubeDLError):
2359     """Error during info extraction."""
2360
2361     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2362         """ tb, if given, is the original traceback (so that it can be printed out).
2363         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2364         """
2365
2366         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367             expected = True
2368         if video_id is not None:
2369             msg = video_id + ': ' + msg
2370         if cause:
2371             msg += ' (caused by %r)' % cause
2372         if not expected:
2373             msg += bug_reports_message()
2374         super(ExtractorError, self).__init__(msg)
2375
2376         self.traceback = tb
2377         self.exc_info = sys.exc_info()  # preserve original exception
2378         self.cause = cause
2379         self.video_id = video_id
2380
2381     def format_traceback(self):
2382         if self.traceback is None:
2383             return None
2384         return ''.join(traceback.format_tb(self.traceback))
2385
2386
2387 class UnsupportedError(ExtractorError):
2388     def __init__(self, url):
2389         super(UnsupportedError, self).__init__(
2390             'Unsupported URL: %s' % url, expected=True)
2391         self.url = url
2392
2393
2394 class RegexNotFoundError(ExtractorError):
2395     """Error when a regex didn't match"""
2396     pass
2397
2398
2399 class GeoRestrictedError(ExtractorError):
2400     """Geographic restriction Error exception.
2401
2402     This exception may be thrown when a video is not available from your
2403     geographic location due to geographic restrictions imposed by a website.
2404     """
2405
2406     def __init__(self, msg, countries=None):
2407         super(GeoRestrictedError, self).__init__(msg, expected=True)
2408         self.msg = msg
2409         self.countries = countries
2410
2411
2412 class DownloadError(YoutubeDLError):
2413     """Download Error exception.
2414
2415     This exception may be thrown by FileDownloader objects if they are not
2416     configured to continue on errors. They will contain the appropriate
2417     error message.
2418     """
2419
2420     def __init__(self, msg, exc_info=None):
2421         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422         super(DownloadError, self).__init__(msg)
2423         self.exc_info = exc_info
2424
2425
2426 class SameFileError(YoutubeDLError):
2427     """Same File exception.
2428
2429     This exception will be thrown by FileDownloader objects if they detect
2430     multiple files would have to be downloaded to the same file on disk.
2431     """
2432     pass
2433
2434
2435 class PostProcessingError(YoutubeDLError):
2436     """Post Processing exception.
2437
2438     This exception may be raised by PostProcessor's .run() method to
2439     indicate an error in the postprocessing task.
2440     """
2441
2442     def __init__(self, msg):
2443         super(PostProcessingError, self).__init__(msg)
2444         self.msg = msg
2445
2446
2447 class ExistingVideoReached(YoutubeDLError):
2448     """ --max-downloads limit has been reached. """
2449     pass
2450
2451
2452 class RejectedVideoReached(YoutubeDLError):
2453     """ --max-downloads limit has been reached. """
2454     pass
2455
2456
2457 class MaxDownloadsReached(YoutubeDLError):
2458     """ --max-downloads limit has been reached. """
2459     pass
2460
2461
2462 class UnavailableVideoError(YoutubeDLError):
2463     """Unavailable Format exception.
2464
2465     This exception will be thrown when a video is requested
2466     in a format that is not available for that video.
2467     """
2468     pass
2469
2470
2471 class ContentTooShortError(YoutubeDLError):
2472     """Content Too Short exception.
2473
2474     This exception may be raised by FileDownloader objects when a file they
2475     download is too small for what the server announced first, indicating
2476     the connection was probably interrupted.
2477     """
2478
2479     def __init__(self, downloaded, expected):
2480         super(ContentTooShortError, self).__init__(
2481             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482         )
2483         # Both in bytes
2484         self.downloaded = downloaded
2485         self.expected = expected
2486
2487
2488 class XAttrMetadataError(YoutubeDLError):
2489     def __init__(self, code=None, msg='Unknown error'):
2490         super(XAttrMetadataError, self).__init__(msg)
2491         self.code = code
2492         self.msg = msg
2493
2494         # Parsing code and msg
2495         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2496                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2497             self.reason = 'NO_SPACE'
2498         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499             self.reason = 'VALUE_TOO_LONG'
2500         else:
2501             self.reason = 'NOT_SUPPORTED'
2502
2503
2504 class XAttrUnavailableError(YoutubeDLError):
2505     pass
2506
2507
2508 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2509     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510     # expected HTTP responses to meet HTTP/1.0 or later (see also
2511     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2512     if sys.version_info < (3, 0):
2513         kwargs['strict'] = True
2514     hc = http_class(*args, **compat_kwargs(kwargs))
2515     source_address = ydl_handler._params.get('source_address')
2516
2517     if source_address is not None:
2518         # This is to workaround _create_connection() from socket where it will try all
2519         # address data from getaddrinfo() including IPv6. This filters the result from
2520         # getaddrinfo() based on the source_address value.
2521         # This is based on the cpython socket.create_connection() function.
2522         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524             host, port = address
2525             err = None
2526             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2527             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528             ip_addrs = [addr for addr in addrs if addr[0] == af]
2529             if addrs and not ip_addrs:
2530                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531                 raise socket.error(
2532                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533                     % (ip_version, source_address[0]))
2534             for res in ip_addrs:
2535                 af, socktype, proto, canonname, sa = res
2536                 sock = None
2537                 try:
2538                     sock = socket.socket(af, socktype, proto)
2539                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540                         sock.settimeout(timeout)
2541                     sock.bind(source_address)
2542                     sock.connect(sa)
2543                     err = None  # Explicitly break reference cycle
2544                     return sock
2545                 except socket.error as _:
2546                     err = _
2547                     if sock is not None:
2548                         sock.close()
2549             if err is not None:
2550                 raise err
2551             else:
2552                 raise socket.error('getaddrinfo returns an empty list')
2553         if hasattr(hc, '_create_connection'):
2554             hc._create_connection = _create_connection
2555         sa = (source_address, 0)
2556         if hasattr(hc, 'source_address'):  # Python 2.7+
2557             hc.source_address = sa
2558         else:  # Python 2.6
2559             def _hc_connect(self, *args, **kwargs):
2560                 sock = _create_connection(
2561                     (self.host, self.port), self.timeout, sa)
2562                 if is_https:
2563                     self.sock = ssl.wrap_socket(
2564                         sock, self.key_file, self.cert_file,
2565                         ssl_version=ssl.PROTOCOL_TLSv1)
2566                 else:
2567                     self.sock = sock
2568             hc.connect = functools.partial(_hc_connect, hc)
2569
2570     return hc
2571
2572
2573 def handle_youtubedl_headers(headers):
2574     filtered_headers = headers
2575
2576     if 'Youtubedl-no-compression' in filtered_headers:
2577         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2578         del filtered_headers['Youtubedl-no-compression']
2579
2580     return filtered_headers
2581
2582
2583 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2584     """Handler for HTTP requests and responses.
2585
2586     This class, when installed with an OpenerDirector, automatically adds
2587     the standard headers to every HTTP request and handles gzipped and
2588     deflated responses from web servers. If compression is to be avoided in
2589     a particular request, the original request in the program code only has
2590     to include the HTTP header "Youtubedl-no-compression", which will be
2591     removed before making the real request.
2592
2593     Part of this code was copied from:
2594
2595     http://techknack.net/python-urllib2-handlers/
2596
2597     Andrew Rowls, the author of that code, agreed to release it to the
2598     public domain.
2599     """
2600
2601     def __init__(self, params, *args, **kwargs):
2602         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603         self._params = params
2604
2605     def http_open(self, req):
2606         conn_class = compat_http_client.HTTPConnection
2607
2608         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609         if socks_proxy:
2610             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611             del req.headers['Ytdl-socks-proxy']
2612
2613         return self.do_open(functools.partial(
2614             _create_http_connection, self, conn_class, False),
2615             req)
2616
2617     @staticmethod
2618     def deflate(data):
2619         if not data:
2620             return data
2621         try:
2622             return zlib.decompress(data, -zlib.MAX_WBITS)
2623         except zlib.error:
2624             return zlib.decompress(data)
2625
2626     def http_request(self, req):
2627         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628         # always respected by websites, some tend to give out URLs with non percent-encoded
2629         # non-ASCII characters (see telemb.py, ard.py [#3412])
2630         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631         # To work around aforementioned issue we will replace request's original URL with
2632         # percent-encoded one
2633         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635         url = req.get_full_url()
2636         url_escaped = escape_url(url)
2637
2638         # Substitute URL if any change after escaping
2639         if url != url_escaped:
2640             req = update_Request(req, url=url_escaped)
2641
2642         for h, v in std_headers.items():
2643             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644             # The dict keys are capitalized because of this bug by urllib
2645             if h.capitalize() not in req.headers:
2646                 req.add_header(h, v)
2647
2648         req.headers = handle_youtubedl_headers(req.headers)
2649
2650         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651             # Python 2.6 is brain-dead when it comes to fragments
2652             req._Request__original = req._Request__original.partition('#')[0]
2653             req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
2655         return req
2656
2657     def http_response(self, req, resp):
2658         old_resp = resp
2659         # gzip
2660         if resp.headers.get('Content-encoding', '') == 'gzip':
2661             content = resp.read()
2662             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663             try:
2664                 uncompressed = io.BytesIO(gz.read())
2665             except IOError as original_ioerror:
2666                 # There may be junk add the end of the file
2667                 # See http://stackoverflow.com/q/4928560/35070 for details
2668                 for i in range(1, 1024):
2669                     try:
2670                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671                         uncompressed = io.BytesIO(gz.read())
2672                     except IOError:
2673                         continue
2674                     break
2675                 else:
2676                     raise original_ioerror
2677             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2678             resp.msg = old_resp.msg
2679             del resp.headers['Content-encoding']
2680         # deflate
2681         if resp.headers.get('Content-encoding', '') == 'deflate':
2682             gz = io.BytesIO(self.deflate(resp.read()))
2683             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2684             resp.msg = old_resp.msg
2685             del resp.headers['Content-encoding']
2686         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2687         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2688         if 300 <= resp.code < 400:
2689             location = resp.headers.get('Location')
2690             if location:
2691                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692                 if sys.version_info >= (3, 0):
2693                     location = location.encode('iso-8859-1').decode('utf-8')
2694                 else:
2695                     location = location.decode('utf-8')
2696                 location_escaped = escape_url(location)
2697                 if location != location_escaped:
2698                     del resp.headers['Location']
2699                     if sys.version_info < (3, 0):
2700                         location_escaped = location_escaped.encode('utf-8')
2701                     resp.headers['Location'] = location_escaped
2702         return resp
2703
2704     https_request = http_request
2705     https_response = http_response
2706
2707
2708 def make_socks_conn_class(base_class, socks_proxy):
2709     assert issubclass(base_class, (
2710         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712     url_components = compat_urlparse.urlparse(socks_proxy)
2713     if url_components.scheme.lower() == 'socks5':
2714         socks_type = ProxyType.SOCKS5
2715     elif url_components.scheme.lower() in ('socks', 'socks4'):
2716         socks_type = ProxyType.SOCKS4
2717     elif url_components.scheme.lower() == 'socks4a':
2718         socks_type = ProxyType.SOCKS4A
2719
2720     def unquote_if_non_empty(s):
2721         if not s:
2722             return s
2723         return compat_urllib_parse_unquote_plus(s)
2724
2725     proxy_args = (
2726         socks_type,
2727         url_components.hostname, url_components.port or 1080,
2728         True,  # Remote DNS
2729         unquote_if_non_empty(url_components.username),
2730         unquote_if_non_empty(url_components.password),
2731     )
2732
2733     class SocksConnection(base_class):
2734         def connect(self):
2735             self.sock = sockssocket()
2736             self.sock.setproxy(*proxy_args)
2737             if type(self.timeout) in (int, float):
2738                 self.sock.settimeout(self.timeout)
2739             self.sock.connect((self.host, self.port))
2740
2741             if isinstance(self, compat_http_client.HTTPSConnection):
2742                 if hasattr(self, '_context'):  # Python > 2.6
2743                     self.sock = self._context.wrap_socket(
2744                         self.sock, server_hostname=self.host)
2745                 else:
2746                     self.sock = ssl.wrap_socket(self.sock)
2747
2748     return SocksConnection
2749
2750
2751 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755         self._params = params
2756
2757     def https_open(self, req):
2758         kwargs = {}
2759         conn_class = self._https_conn_class
2760
2761         if hasattr(self, '_context'):  # python > 2.6
2762             kwargs['context'] = self._context
2763         if hasattr(self, '_check_hostname'):  # python 3.x
2764             kwargs['check_hostname'] = self._check_hostname
2765
2766         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767         if socks_proxy:
2768             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769             del req.headers['Ytdl-socks-proxy']
2770
2771         return self.do_open(functools.partial(
2772             _create_http_connection, self, conn_class, True),
2773             req, **kwargs)
2774
2775
2776 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2777     """
2778     See [1] for cookie file format.
2779
2780     1. https://curl.haxx.se/docs/http-cookies.html
2781     """
2782     _HTTPONLY_PREFIX = '#HttpOnly_'
2783     _ENTRY_LEN = 7
2784     _HEADER = '''# Netscape HTTP Cookie File
2785 # This file is generated by yt-dlp.  Do not edit.
2786
2787 '''
2788     _CookieFileEntry = collections.namedtuple(
2789         'CookieFileEntry',
2790         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2791
2792     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2793         """
2794         Save cookies to a file.
2795
2796         Most of the code is taken from CPython 3.8 and slightly adapted
2797         to support cookie files with UTF-8 in both python 2 and 3.
2798         """
2799         if filename is None:
2800             if self.filename is not None:
2801                 filename = self.filename
2802             else:
2803                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
2805         # Store session cookies with `expires` set to 0 instead of an empty
2806         # string
2807         for cookie in self:
2808             if cookie.expires is None:
2809                 cookie.expires = 0
2810
2811         with io.open(filename, 'w', encoding='utf-8') as f:
2812             f.write(self._HEADER)
2813             now = time.time()
2814             for cookie in self:
2815                 if not ignore_discard and cookie.discard:
2816                     continue
2817                 if not ignore_expires and cookie.is_expired(now):
2818                     continue
2819                 if cookie.secure:
2820                     secure = 'TRUE'
2821                 else:
2822                     secure = 'FALSE'
2823                 if cookie.domain.startswith('.'):
2824                     initial_dot = 'TRUE'
2825                 else:
2826                     initial_dot = 'FALSE'
2827                 if cookie.expires is not None:
2828                     expires = compat_str(cookie.expires)
2829                 else:
2830                     expires = ''
2831                 if cookie.value is None:
2832                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833                     # with no name, whereas http.cookiejar regards it as a
2834                     # cookie with no value.
2835                     name = ''
2836                     value = cookie.name
2837                 else:
2838                     name = cookie.name
2839                     value = cookie.value
2840                 f.write(
2841                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2842                                secure, expires, name, value]) + '\n')
2843
2844     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2845         """Load cookies from a file."""
2846         if filename is None:
2847             if self.filename is not None:
2848                 filename = self.filename
2849             else:
2850                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
2852         def prepare_line(line):
2853             if line.startswith(self._HTTPONLY_PREFIX):
2854                 line = line[len(self._HTTPONLY_PREFIX):]
2855             # comments and empty lines are fine
2856             if line.startswith('#') or not line.strip():
2857                 return line
2858             cookie_list = line.split('\t')
2859             if len(cookie_list) != self._ENTRY_LEN:
2860                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861             cookie = self._CookieFileEntry(*cookie_list)
2862             if cookie.expires_at and not cookie.expires_at.isdigit():
2863                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864             return line
2865
2866         cf = io.StringIO()
2867         with io.open(filename, encoding='utf-8') as f:
2868             for line in f:
2869                 try:
2870                     cf.write(prepare_line(line))
2871                 except compat_cookiejar.LoadError as e:
2872                     write_string(
2873                         'WARNING: skipping cookie file entry due to %s: %r\n'
2874                         % (e, line), sys.stderr)
2875                     continue
2876         cf.seek(0)
2877         self._really_load(cf, filename, ignore_discard, ignore_expires)
2878         # Session cookies are denoted by either `expires` field set to
2879         # an empty string or 0. MozillaCookieJar only recognizes the former
2880         # (see [1]). So we need force the latter to be recognized as session
2881         # cookies on our own.
2882         # Session cookies may be important for cookies-based authentication,
2883         # e.g. usually, when user does not check 'Remember me' check box while
2884         # logging in on a site, some important cookies are stored as session
2885         # cookies so that not recognizing them will result in failed login.
2886         # 1. https://bugs.python.org/issue17164
2887         for cookie in self:
2888             # Treat `expires=0` cookies as session cookies
2889             if cookie.expires == 0:
2890                 cookie.expires = None
2891                 cookie.discard = True
2892
2893
2894 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895     def __init__(self, cookiejar=None):
2896         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898     def http_response(self, request, response):
2899         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900         # characters in Set-Cookie HTTP header of last response (see
2901         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2902         # In order to at least prevent crashing we will percent encode Set-Cookie
2903         # header before HTTPCookieProcessor starts processing it.
2904         # if sys.version_info < (3, 0) and response.headers:
2905         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906         #         set_cookie = response.headers.get(set_cookie_header)
2907         #         if set_cookie:
2908         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909         #             if set_cookie != set_cookie_escaped:
2910         #                 del response.headers[set_cookie_header]
2911         #                 response.headers[set_cookie_header] = set_cookie_escaped
2912         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915     https_response = http_response
2916
2917
2918 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919     if sys.version_info[0] < 3:
2920         def redirect_request(self, req, fp, code, msg, headers, newurl):
2921             # On python 2 urlh.geturl() may sometimes return redirect URL
2922             # as byte string instead of unicode. This workaround allows
2923             # to force it always return unicode.
2924             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
2927 def extract_timezone(date_str):
2928     m = re.search(
2929         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930         date_str)
2931     if not m:
2932         timezone = datetime.timedelta()
2933     else:
2934         date_str = date_str[:-len(m.group('tz'))]
2935         if not m.group('sign'):
2936             timezone = datetime.timedelta()
2937         else:
2938             sign = 1 if m.group('sign') == '+' else -1
2939             timezone = datetime.timedelta(
2940                 hours=sign * int(m.group('hours')),
2941                 minutes=sign * int(m.group('minutes')))
2942     return timezone, date_str
2943
2944
2945 def parse_iso8601(date_str, delimiter='T', timezone=None):
2946     """ Return a UNIX timestamp from the given date """
2947
2948     if date_str is None:
2949         return None
2950
2951     date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
2953     if timezone is None:
2954         timezone, date_str = extract_timezone(date_str)
2955
2956     try:
2957         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959         return calendar.timegm(dt.timetuple())
2960     except ValueError:
2961         pass
2962
2963
2964 def date_formats(day_first=True):
2965     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
2968 def unified_strdate(date_str, day_first=True):
2969     """Return a string with the date in the format YYYYMMDD"""
2970
2971     if date_str is None:
2972         return None
2973     upload_date = None
2974     # Replace commas
2975     date_str = date_str.replace(',', ' ')
2976     # Remove AM/PM + timezone
2977     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2978     _, date_str = extract_timezone(date_str)
2979
2980     for expression in date_formats(day_first):
2981         try:
2982             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2983         except ValueError:
2984             pass
2985     if upload_date is None:
2986         timetuple = email.utils.parsedate_tz(date_str)
2987         if timetuple:
2988             try:
2989                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990             except ValueError:
2991                 pass
2992     if upload_date is not None:
2993         return compat_str(upload_date)
2994
2995
2996 def unified_timestamp(date_str, day_first=True):
2997     if date_str is None:
2998         return None
2999
3000     date_str = re.sub(r'[,|]', '', date_str)
3001
3002     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3003     timezone, date_str = extract_timezone(date_str)
3004
3005     # Remove AM/PM + timezone
3006     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
3008     # Remove unrecognized timezones from ISO 8601 alike timestamps
3009     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010     if m:
3011         date_str = date_str[:-len(m.group('tz'))]
3012
3013     # Python only supports microseconds, so remove nanoseconds
3014     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015     if m:
3016         date_str = m.group(1)
3017
3018     for expression in date_formats(day_first):
3019         try:
3020             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3021             return calendar.timegm(dt.timetuple())
3022         except ValueError:
3023             pass
3024     timetuple = email.utils.parsedate_tz(date_str)
3025     if timetuple:
3026         return calendar.timegm(timetuple) + pm_delta * 3600
3027
3028
3029 def determine_ext(url, default_ext='unknown_video'):
3030     if url is None or '.' not in url:
3031         return default_ext
3032     guess = url.partition('?')[0].rpartition('.')[2]
3033     if re.match(r'^[A-Za-z0-9]+$', guess):
3034         return guess
3035     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3037         return guess.rstrip('/')
3038     else:
3039         return default_ext
3040
3041
3042 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3044
3045
3046 def date_from_str(date_str):
3047     """
3048     Return a datetime object from a string in the format YYYYMMDD or
3049     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050     today = datetime.date.today()
3051     if date_str in ('now', 'today'):
3052         return today
3053     if date_str == 'yesterday':
3054         return today - datetime.timedelta(days=1)
3055     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3056     if match is not None:
3057         sign = match.group('sign')
3058         time = int(match.group('time'))
3059         if sign == '-':
3060             time = -time
3061         unit = match.group('unit')
3062         # A bad approximation?
3063         if unit == 'month':
3064             unit = 'day'
3065             time *= 30
3066         elif unit == 'year':
3067             unit = 'day'
3068             time *= 365
3069         unit += 's'
3070         delta = datetime.timedelta(**{unit: time})
3071         return today + delta
3072     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3073
3074
3075 def hyphenate_date(date_str):
3076     """
3077     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079     if match is not None:
3080         return '-'.join(match.groups())
3081     else:
3082         return date_str
3083
3084
3085 class DateRange(object):
3086     """Represents a time interval between two dates"""
3087
3088     def __init__(self, start=None, end=None):
3089         """start and end must be strings in the format accepted by date"""
3090         if start is not None:
3091             self.start = date_from_str(start)
3092         else:
3093             self.start = datetime.datetime.min.date()
3094         if end is not None:
3095             self.end = date_from_str(end)
3096         else:
3097             self.end = datetime.datetime.max.date()
3098         if self.start > self.end:
3099             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3100
3101     @classmethod
3102     def day(cls, day):
3103         """Returns a range that only contains the given day"""
3104         return cls(day, day)
3105
3106     def __contains__(self, date):
3107         """Check if the date is in the range"""
3108         if not isinstance(date, datetime.date):
3109             date = date_from_str(date)
3110         return self.start <= date <= self.end
3111
3112     def __str__(self):
3113         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3114
3115
3116 def platform_name():
3117     """ Returns the platform name as a compat_str """
3118     res = platform.platform()
3119     if isinstance(res, bytes):
3120         res = res.decode(preferredencoding())
3121
3122     assert isinstance(res, compat_str)
3123     return res
3124
3125
3126 def _windows_write_string(s, out):
3127     """ Returns True if the string was written using special methods,
3128     False if it has yet to be written out."""
3129     # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131     import ctypes
3132     import ctypes.wintypes
3133
3134     WIN_OUTPUT_IDS = {
3135         1: -11,
3136         2: -12,
3137     }
3138
3139     try:
3140         fileno = out.fileno()
3141     except AttributeError:
3142         # If the output stream doesn't have a fileno, it's virtual
3143         return False
3144     except io.UnsupportedOperation:
3145         # Some strange Windows pseudo files?
3146         return False
3147     if fileno not in WIN_OUTPUT_IDS:
3148         return False
3149
3150     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3151         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3152         ('GetStdHandle', ctypes.windll.kernel32))
3153     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
3155     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3156         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3158         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3159     written = ctypes.wintypes.DWORD(0)
3160
3161     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3162     FILE_TYPE_CHAR = 0x0002
3163     FILE_TYPE_REMOTE = 0x8000
3164     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3165         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166         ctypes.POINTER(ctypes.wintypes.DWORD))(
3167         ('GetConsoleMode', ctypes.windll.kernel32))
3168     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170     def not_a_console(handle):
3171         if handle == INVALID_HANDLE_VALUE or handle is None:
3172             return True
3173         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3175
3176     if not_a_console(h):
3177         return False
3178
3179     def next_nonbmp_pos(s):
3180         try:
3181             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182         except StopIteration:
3183             return len(s)
3184
3185     while s:
3186         count = min(next_nonbmp_pos(s), 1024)
3187
3188         ret = WriteConsoleW(
3189             h, s, count if count else 2, ctypes.byref(written), None)
3190         if ret == 0:
3191             raise OSError('Failed to write string')
3192         if not count:  # We just wrote a non-BMP character
3193             assert written.value == 2
3194             s = s[1:]
3195         else:
3196             assert written.value > 0
3197             s = s[written.value:]
3198     return True
3199
3200
3201 def write_string(s, out=None, encoding=None):
3202     if out is None:
3203         out = sys.stderr
3204     assert type(s) == compat_str
3205
3206     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207         if _windows_write_string(s, out):
3208             return
3209
3210     if ('b' in getattr(out, 'mode', '')
3211             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3212         byt = s.encode(encoding or preferredencoding(), 'ignore')
3213         out.write(byt)
3214     elif hasattr(out, 'buffer'):
3215         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216         byt = s.encode(enc, 'ignore')
3217         out.buffer.write(byt)
3218     else:
3219         out.write(s)
3220     out.flush()
3221
3222
3223 def bytes_to_intlist(bs):
3224     if not bs:
3225         return []
3226     if isinstance(bs[0], int):  # Python 3
3227         return list(bs)
3228     else:
3229         return [ord(c) for c in bs]
3230
3231
3232 def intlist_to_bytes(xs):
3233     if not xs:
3234         return b''
3235     return compat_struct_pack('%dB' % len(xs), *xs)
3236
3237
3238 # Cross-platform file locking
3239 if sys.platform == 'win32':
3240     import ctypes.wintypes
3241     import msvcrt
3242
3243     class OVERLAPPED(ctypes.Structure):
3244         _fields_ = [
3245             ('Internal', ctypes.wintypes.LPVOID),
3246             ('InternalHigh', ctypes.wintypes.LPVOID),
3247             ('Offset', ctypes.wintypes.DWORD),
3248             ('OffsetHigh', ctypes.wintypes.DWORD),
3249             ('hEvent', ctypes.wintypes.HANDLE),
3250         ]
3251
3252     kernel32 = ctypes.windll.kernel32
3253     LockFileEx = kernel32.LockFileEx
3254     LockFileEx.argtypes = [
3255         ctypes.wintypes.HANDLE,     # hFile
3256         ctypes.wintypes.DWORD,      # dwFlags
3257         ctypes.wintypes.DWORD,      # dwReserved
3258         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3259         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3260         ctypes.POINTER(OVERLAPPED)  # Overlapped
3261     ]
3262     LockFileEx.restype = ctypes.wintypes.BOOL
3263     UnlockFileEx = kernel32.UnlockFileEx
3264     UnlockFileEx.argtypes = [
3265         ctypes.wintypes.HANDLE,     # hFile
3266         ctypes.wintypes.DWORD,      # dwReserved
3267         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3268         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3269         ctypes.POINTER(OVERLAPPED)  # Overlapped
3270     ]
3271     UnlockFileEx.restype = ctypes.wintypes.BOOL
3272     whole_low = 0xffffffff
3273     whole_high = 0x7fffffff
3274
3275     def _lock_file(f, exclusive):
3276         overlapped = OVERLAPPED()
3277         overlapped.Offset = 0
3278         overlapped.OffsetHigh = 0
3279         overlapped.hEvent = 0
3280         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281         handle = msvcrt.get_osfhandle(f.fileno())
3282         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283                           whole_low, whole_high, f._lock_file_overlapped_p):
3284             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286     def _unlock_file(f):
3287         assert f._lock_file_overlapped_p
3288         handle = msvcrt.get_osfhandle(f.fileno())
3289         if not UnlockFileEx(handle, 0,
3290                             whole_low, whole_high, f._lock_file_overlapped_p):
3291             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293 else:
3294     # Some platforms, such as Jython, is missing fcntl
3295     try:
3296         import fcntl
3297
3298         def _lock_file(f, exclusive):
3299             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3300
3301         def _unlock_file(f):
3302             fcntl.flock(f, fcntl.LOCK_UN)
3303     except ImportError:
3304         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306         def _lock_file(f, exclusive):
3307             raise IOError(UNSUPPORTED_MSG)
3308
3309         def _unlock_file(f):
3310             raise IOError(UNSUPPORTED_MSG)
3311
3312
3313 class locked_file(object):
3314     def __init__(self, filename, mode, encoding=None):
3315         assert mode in ['r', 'a', 'w']
3316         self.f = io.open(filename, mode, encoding=encoding)
3317         self.mode = mode
3318
3319     def __enter__(self):
3320         exclusive = self.mode != 'r'
3321         try:
3322             _lock_file(self.f, exclusive)
3323         except IOError:
3324             self.f.close()
3325             raise
3326         return self
3327
3328     def __exit__(self, etype, value, traceback):
3329         try:
3330             _unlock_file(self.f)
3331         finally:
3332             self.f.close()
3333
3334     def __iter__(self):
3335         return iter(self.f)
3336
3337     def write(self, *args):
3338         return self.f.write(*args)
3339
3340     def read(self, *args):
3341         return self.f.read(*args)
3342
3343
3344 def get_filesystem_encoding():
3345     encoding = sys.getfilesystemencoding()
3346     return encoding if encoding is not None else 'utf-8'
3347
3348
3349 def shell_quote(args):
3350     quoted_args = []
3351     encoding = get_filesystem_encoding()
3352     for a in args:
3353         if isinstance(a, bytes):
3354             # We may get a filename encoded with 'encodeFilename'
3355             a = a.decode(encoding)
3356         quoted_args.append(compat_shlex_quote(a))
3357     return ' '.join(quoted_args)
3358
3359
3360 def smuggle_url(url, data):
3361     """ Pass additional data in a URL for internal use. """
3362
3363     url, idata = unsmuggle_url(url, {})
3364     data.update(idata)
3365     sdata = compat_urllib_parse_urlencode(
3366         {'__youtubedl_smuggle': json.dumps(data)})
3367     return url + '#' + sdata
3368
3369
3370 def unsmuggle_url(smug_url, default=None):
3371     if '#__youtubedl_smuggle' not in smug_url:
3372         return smug_url, default
3373     url, _, sdata = smug_url.rpartition('#')
3374     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3375     data = json.loads(jsond)
3376     return url, data
3377
3378
3379 def format_bytes(bytes):
3380     if bytes is None:
3381         return 'N/A'
3382     if type(bytes) is str:
3383         bytes = float(bytes)
3384     if bytes == 0.0:
3385         exponent = 0
3386     else:
3387         exponent = int(math.log(bytes, 1024.0))
3388     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3389     converted = float(bytes) / float(1024 ** exponent)
3390     return '%.2f%s' % (converted, suffix)
3391
3392
3393 def lookup_unit_table(unit_table, s):
3394     units_re = '|'.join(re.escape(u) for u in unit_table)
3395     m = re.match(
3396         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3397     if not m:
3398         return None
3399     num_str = m.group('num').replace(',', '.')
3400     mult = unit_table[m.group('unit')]
3401     return int(float(num_str) * mult)
3402
3403
3404 def parse_filesize(s):
3405     if s is None:
3406         return None
3407
3408     # The lower-case forms are of course incorrect and unofficial,
3409     # but we support those too
3410     _UNIT_TABLE = {
3411         'B': 1,
3412         'b': 1,
3413         'bytes': 1,
3414         'KiB': 1024,
3415         'KB': 1000,
3416         'kB': 1024,
3417         'Kb': 1000,
3418         'kb': 1000,
3419         'kilobytes': 1000,
3420         'kibibytes': 1024,
3421         'MiB': 1024 ** 2,
3422         'MB': 1000 ** 2,
3423         'mB': 1024 ** 2,
3424         'Mb': 1000 ** 2,
3425         'mb': 1000 ** 2,
3426         'megabytes': 1000 ** 2,
3427         'mebibytes': 1024 ** 2,
3428         'GiB': 1024 ** 3,
3429         'GB': 1000 ** 3,
3430         'gB': 1024 ** 3,
3431         'Gb': 1000 ** 3,
3432         'gb': 1000 ** 3,
3433         'gigabytes': 1000 ** 3,
3434         'gibibytes': 1024 ** 3,
3435         'TiB': 1024 ** 4,
3436         'TB': 1000 ** 4,
3437         'tB': 1024 ** 4,
3438         'Tb': 1000 ** 4,
3439         'tb': 1000 ** 4,
3440         'terabytes': 1000 ** 4,
3441         'tebibytes': 1024 ** 4,
3442         'PiB': 1024 ** 5,
3443         'PB': 1000 ** 5,
3444         'pB': 1024 ** 5,
3445         'Pb': 1000 ** 5,
3446         'pb': 1000 ** 5,
3447         'petabytes': 1000 ** 5,
3448         'pebibytes': 1024 ** 5,
3449         'EiB': 1024 ** 6,
3450         'EB': 1000 ** 6,
3451         'eB': 1024 ** 6,
3452         'Eb': 1000 ** 6,
3453         'eb': 1000 ** 6,
3454         'exabytes': 1000 ** 6,
3455         'exbibytes': 1024 ** 6,
3456         'ZiB': 1024 ** 7,
3457         'ZB': 1000 ** 7,
3458         'zB': 1024 ** 7,
3459         'Zb': 1000 ** 7,
3460         'zb': 1000 ** 7,
3461         'zettabytes': 1000 ** 7,
3462         'zebibytes': 1024 ** 7,
3463         'YiB': 1024 ** 8,
3464         'YB': 1000 ** 8,
3465         'yB': 1024 ** 8,
3466         'Yb': 1000 ** 8,
3467         'yb': 1000 ** 8,
3468         'yottabytes': 1000 ** 8,
3469         'yobibytes': 1024 ** 8,
3470     }
3471
3472     return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475 def parse_count(s):
3476     if s is None:
3477         return None
3478
3479     s = s.strip()
3480
3481     if re.match(r'^[\d,.]+$', s):
3482         return str_to_int(s)
3483
3484     _UNIT_TABLE = {
3485         'k': 1000,
3486         'K': 1000,
3487         'm': 1000 ** 2,
3488         'M': 1000 ** 2,
3489         'kk': 1000 ** 2,
3490         'KK': 1000 ** 2,
3491     }
3492
3493     return lookup_unit_table(_UNIT_TABLE, s)
3494
3495
3496 def parse_resolution(s):
3497     if s is None:
3498         return {}
3499
3500     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501     if mobj:
3502         return {
3503             'width': int(mobj.group('w')),
3504             'height': int(mobj.group('h')),
3505         }
3506
3507     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508     if mobj:
3509         return {'height': int(mobj.group(1))}
3510
3511     mobj = re.search(r'\b([48])[kK]\b', s)
3512     if mobj:
3513         return {'height': int(mobj.group(1)) * 540}
3514
3515     return {}
3516
3517
3518 def parse_bitrate(s):
3519     if not isinstance(s, compat_str):
3520         return
3521     mobj = re.search(r'\b(\d+)\s*kbps', s)
3522     if mobj:
3523         return int(mobj.group(1))
3524
3525
3526 def month_by_name(name, lang='en'):
3527     """ Return the number of a month by (locale-independently) English name """
3528
3529     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3530
3531     try:
3532         return month_names.index(name) + 1
3533     except ValueError:
3534         return None
3535
3536
3537 def month_by_abbreviation(abbrev):
3538     """ Return the number of a month by (locale-independently) English
3539         abbreviations """
3540
3541     try:
3542         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3543     except ValueError:
3544         return None
3545
3546
3547 def fix_xml_ampersands(xml_str):
3548     """Replace all the '&' by '&amp;' in XML"""
3549     return re.sub(
3550         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3551         '&amp;',
3552         xml_str)
3553
3554
3555 def setproctitle(title):
3556     assert isinstance(title, compat_str)
3557
3558     # ctypes in Jython is not complete
3559     # http://bugs.jython.org/issue2148
3560     if sys.platform.startswith('java'):
3561         return
3562
3563     try:
3564         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3565     except OSError:
3566         return
3567     except TypeError:
3568         # LoadLibrary in Windows Python 2.7.13 only expects
3569         # a bytestring, but since unicode_literals turns
3570         # every string into a unicode string, it fails.
3571         return
3572     title_bytes = title.encode('utf-8')
3573     buf = ctypes.create_string_buffer(len(title_bytes))
3574     buf.value = title_bytes
3575     try:
3576         libc.prctl(15, buf, 0, 0, 0)
3577     except AttributeError:
3578         return  # Strange libc, just skip this
3579
3580
3581 def remove_start(s, start):
3582     return s[len(start):] if s is not None and s.startswith(start) else s
3583
3584
3585 def remove_end(s, end):
3586     return s[:-len(end)] if s is not None and s.endswith(end) else s
3587
3588
3589 def remove_quotes(s):
3590     if s is None or len(s) < 2:
3591         return s
3592     for quote in ('"', "'", ):
3593         if s[0] == quote and s[-1] == quote:
3594             return s[1:-1]
3595     return s
3596
3597
3598 def get_domain(url):
3599     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600     return domain.group('domain') if domain else None
3601
3602
3603 def url_basename(url):
3604     path = compat_urlparse.urlparse(url).path
3605     return path.strip('/').split('/')[-1]
3606
3607
3608 def base_url(url):
3609     return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
3612 def urljoin(base, path):
3613     if isinstance(path, bytes):
3614         path = path.decode('utf-8')
3615     if not isinstance(path, compat_str) or not path:
3616         return None
3617     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3618         return path
3619     if isinstance(base, bytes):
3620         base = base.decode('utf-8')
3621     if not isinstance(base, compat_str) or not re.match(
3622             r'^(?:https?:)?//', base):
3623         return None
3624     return compat_urlparse.urljoin(base, path)
3625
3626
3627 class HEADRequest(compat_urllib_request.Request):
3628     def get_method(self):
3629         return 'HEAD'
3630
3631
3632 class PUTRequest(compat_urllib_request.Request):
3633     def get_method(self):
3634         return 'PUT'
3635
3636
3637 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3638     if get_attr:
3639         if v is not None:
3640             v = getattr(v, get_attr, None)
3641     if v == '':
3642         v = None
3643     if v is None:
3644         return default
3645     try:
3646         return int(v) * invscale // scale
3647     except (ValueError, TypeError):
3648         return default
3649
3650
3651 def str_or_none(v, default=None):
3652     return default if v is None else compat_str(v)
3653
3654
3655 def str_to_int(int_str):
3656     """ A more relaxed version of int_or_none """
3657     if isinstance(int_str, compat_integer_types):
3658         return int_str
3659     elif isinstance(int_str, compat_str):
3660         int_str = re.sub(r'[,\.\+]', '', int_str)
3661         return int_or_none(int_str)
3662
3663
3664 def float_or_none(v, scale=1, invscale=1, default=None):
3665     if v is None:
3666         return default
3667     try:
3668         return float(v) * invscale / scale
3669     except (ValueError, TypeError):
3670         return default
3671
3672
3673 def bool_or_none(v, default=None):
3674     return v if isinstance(v, bool) else default
3675
3676
3677 def strip_or_none(v, default=None):
3678     return v.strip() if isinstance(v, compat_str) else default
3679
3680
3681 def url_or_none(url):
3682     if not url or not isinstance(url, compat_str):
3683         return None
3684     url = url.strip()
3685     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3686
3687
3688 def strftime_or_none(timestamp, date_format, default=None):
3689     datetime_object = None
3690     try:
3691         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3692             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3694             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695         return datetime_object.strftime(date_format)
3696     except (ValueError, TypeError, AttributeError):
3697         return default
3698
3699
3700 def parse_duration(s):
3701     if not isinstance(s, compat_basestring):
3702         return None
3703
3704     s = s.strip()
3705
3706     days, hours, mins, secs, ms = [None] * 5
3707     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3708     if m:
3709         days, hours, mins, secs, ms = m.groups()
3710     else:
3711         m = re.match(
3712             r'''(?ix)(?:P?
3713                 (?:
3714                     [0-9]+\s*y(?:ears?)?\s*
3715                 )?
3716                 (?:
3717                     [0-9]+\s*m(?:onths?)?\s*
3718                 )?
3719                 (?:
3720                     [0-9]+\s*w(?:eeks?)?\s*
3721                 )?
3722                 (?:
3723                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3724                 )?
3725                 T)?
3726                 (?:
3727                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728                 )?
3729                 (?:
3730                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731                 )?
3732                 (?:
3733                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3734                 )?Z?$''', s)
3735         if m:
3736             days, hours, mins, secs, ms = m.groups()
3737         else:
3738             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3739             if m:
3740                 hours, mins = m.groups()
3741             else:
3742                 return None
3743
3744     duration = 0
3745     if secs:
3746         duration += float(secs)
3747     if mins:
3748         duration += float(mins) * 60
3749     if hours:
3750         duration += float(hours) * 60 * 60
3751     if days:
3752         duration += float(days) * 24 * 60 * 60
3753     if ms:
3754         duration += float(ms)
3755     return duration
3756
3757
3758 def prepend_extension(filename, ext, expected_real_ext=None):
3759     name, real_ext = os.path.splitext(filename)
3760     return (
3761         '{0}.{1}{2}'.format(name, ext, real_ext)
3762         if not expected_real_ext or real_ext[1:] == expected_real_ext
3763         else '{0}.{1}'.format(filename, ext))
3764
3765
3766 def replace_extension(filename, ext, expected_real_ext=None):
3767     name, real_ext = os.path.splitext(filename)
3768     return '{0}.{1}'.format(
3769         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770         ext)
3771
3772
3773 def check_executable(exe, args=[]):
3774     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775     args can be a list of arguments for a short output (like -version) """
3776     try:
3777         process_communicate_or_kill(subprocess.Popen(
3778             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3779     except OSError:
3780         return False
3781     return exe
3782
3783
3784 def get_exe_version(exe, args=['--version'],
3785                     version_re=None, unrecognized='present'):
3786     """ Returns the version of the specified executable,
3787     or False if the executable is not present """
3788     try:
3789         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3790         # SIGTTOU if yt-dlp is run in the background.
3791         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3792         out, _ = process_communicate_or_kill(subprocess.Popen(
3793             [encodeArgument(exe)] + args,
3794             stdin=subprocess.PIPE,
3795             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3796     except OSError:
3797         return False
3798     if isinstance(out, bytes):  # Python 2.x
3799         out = out.decode('ascii', 'ignore')
3800     return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803 def detect_exe_version(output, version_re=None, unrecognized='present'):
3804     assert isinstance(output, compat_str)
3805     if version_re is None:
3806         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807     m = re.search(version_re, output)
3808     if m:
3809         return m.group(1)
3810     else:
3811         return unrecognized
3812
3813
3814 class PagedList(object):
3815     def __len__(self):
3816         # This is only useful for tests
3817         return len(self.getslice())
3818
3819
3820 class OnDemandPagedList(PagedList):
3821     def __init__(self, pagefunc, pagesize, use_cache=True):
3822         self._pagefunc = pagefunc
3823         self._pagesize = pagesize
3824         self._use_cache = use_cache
3825         if use_cache:
3826             self._cache = {}
3827
3828     def getslice(self, start=0, end=None):
3829         res = []
3830         for pagenum in itertools.count(start // self._pagesize):
3831             firstid = pagenum * self._pagesize
3832             nextfirstid = pagenum * self._pagesize + self._pagesize
3833             if start >= nextfirstid:
3834                 continue
3835
3836             page_results = None
3837             if self._use_cache:
3838                 page_results = self._cache.get(pagenum)
3839             if page_results is None:
3840                 page_results = list(self._pagefunc(pagenum))
3841             if self._use_cache:
3842                 self._cache[pagenum] = page_results
3843
3844             startv = (
3845                 start % self._pagesize
3846                 if firstid <= start < nextfirstid
3847                 else 0)
3848
3849             endv = (
3850                 ((end - 1) % self._pagesize) + 1
3851                 if (end is not None and firstid <= end <= nextfirstid)
3852                 else None)
3853
3854             if startv != 0 or endv is not None:
3855                 page_results = page_results[startv:endv]
3856             res.extend(page_results)
3857
3858             # A little optimization - if current page is not "full", ie. does
3859             # not contain page_size videos then we can assume that this page
3860             # is the last one - there are no more ids on further pages -
3861             # i.e. no need to query again.
3862             if len(page_results) + startv < self._pagesize:
3863                 break
3864
3865             # If we got the whole page, but the next page is not interesting,
3866             # break out early as well
3867             if end == nextfirstid:
3868                 break
3869         return res
3870
3871
3872 class InAdvancePagedList(PagedList):
3873     def __init__(self, pagefunc, pagecount, pagesize):
3874         self._pagefunc = pagefunc
3875         self._pagecount = pagecount
3876         self._pagesize = pagesize
3877
3878     def getslice(self, start=0, end=None):
3879         res = []
3880         start_page = start // self._pagesize
3881         end_page = (
3882             self._pagecount if end is None else (end // self._pagesize + 1))
3883         skip_elems = start - start_page * self._pagesize
3884         only_more = None if end is None else end - start
3885         for pagenum in range(start_page, end_page):
3886             page = list(self._pagefunc(pagenum))
3887             if skip_elems:
3888                 page = page[skip_elems:]
3889                 skip_elems = None
3890             if only_more is not None:
3891                 if len(page) < only_more:
3892                     only_more -= len(page)
3893                 else:
3894                     page = page[:only_more]
3895                     res.extend(page)
3896                     break
3897             res.extend(page)
3898         return res
3899
3900
3901 def uppercase_escape(s):
3902     unicode_escape = codecs.getdecoder('unicode_escape')
3903     return re.sub(
3904         r'\\U[0-9a-fA-F]{8}',
3905         lambda m: unicode_escape(m.group(0))[0],
3906         s)
3907
3908
3909 def lowercase_escape(s):
3910     unicode_escape = codecs.getdecoder('unicode_escape')
3911     return re.sub(
3912         r'\\u[0-9a-fA-F]{4}',
3913         lambda m: unicode_escape(m.group(0))[0],
3914         s)
3915
3916
3917 def escape_rfc3986(s):
3918     """Escape non-ASCII characters as suggested by RFC 3986"""
3919     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3920         s = s.encode('utf-8')
3921     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3922
3923
3924 def escape_url(url):
3925     """Escape URL as suggested by RFC 3986"""
3926     url_parsed = compat_urllib_parse_urlparse(url)
3927     return url_parsed._replace(
3928         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3929         path=escape_rfc3986(url_parsed.path),
3930         params=escape_rfc3986(url_parsed.params),
3931         query=escape_rfc3986(url_parsed.query),
3932         fragment=escape_rfc3986(url_parsed.fragment)
3933     ).geturl()
3934
3935
3936 def read_batch_urls(batch_fd):
3937     def fixup(url):
3938         if not isinstance(url, compat_str):
3939             url = url.decode('utf-8', 'replace')
3940         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941         for bom in BOM_UTF8:
3942             if url.startswith(bom):
3943                 url = url[len(bom):]
3944         url = url.lstrip()
3945         if not url or url.startswith(('#', ';', ']')):
3946             return False
3947         # "#" cannot be stripped out since it is part of the URI
3948         # However, it can be safely stipped out if follwing a whitespace
3949         return re.split(r'\s#', url, 1)[0].rstrip()
3950
3951     with contextlib.closing(batch_fd) as fd:
3952         return [url for url in map(fixup, fd) if url]
3953
3954
3955 def urlencode_postdata(*args, **kargs):
3956     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3957
3958
3959 def update_url_query(url, query):
3960     if not query:
3961         return url
3962     parsed_url = compat_urlparse.urlparse(url)
3963     qs = compat_parse_qs(parsed_url.query)
3964     qs.update(query)
3965     return compat_urlparse.urlunparse(parsed_url._replace(
3966         query=compat_urllib_parse_urlencode(qs, True)))
3967
3968
3969 def update_Request(req, url=None, data=None, headers={}, query={}):
3970     req_headers = req.headers.copy()
3971     req_headers.update(headers)
3972     req_data = data or req.data
3973     req_url = update_url_query(url or req.get_full_url(), query)
3974     req_get_method = req.get_method()
3975     if req_get_method == 'HEAD':
3976         req_type = HEADRequest
3977     elif req_get_method == 'PUT':
3978         req_type = PUTRequest
3979     else:
3980         req_type = compat_urllib_request.Request
3981     new_req = req_type(
3982         req_url, data=req_data, headers=req_headers,
3983         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984     if hasattr(req, 'timeout'):
3985         new_req.timeout = req.timeout
3986     return new_req
3987
3988
3989 def _multipart_encode_impl(data, boundary):
3990     content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992     out = b''
3993     for k, v in data.items():
3994         out += b'--' + boundary.encode('ascii') + b'\r\n'
3995         if isinstance(k, compat_str):
3996             k = k.encode('utf-8')
3997         if isinstance(v, compat_str):
3998             v = v.encode('utf-8')
3999         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4001         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4002         if boundary.encode('ascii') in content:
4003             raise ValueError('Boundary overlaps with data')
4004         out += content
4005
4006     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008     return out, content_type
4009
4010
4011 def multipart_encode(data, boundary=None):
4012     '''
4013     Encode a dict to RFC 7578-compliant form-data
4014
4015     data:
4016         A dict where keys and values can be either Unicode or bytes-like
4017         objects.
4018     boundary:
4019         If specified a Unicode object, it's used as the boundary. Otherwise
4020         a random boundary is generated.
4021
4022     Reference: https://tools.ietf.org/html/rfc7578
4023     '''
4024     has_specified_boundary = boundary is not None
4025
4026     while True:
4027         if boundary is None:
4028             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030         try:
4031             out, content_type = _multipart_encode_impl(data, boundary)
4032             break
4033         except ValueError:
4034             if has_specified_boundary:
4035                 raise
4036             boundary = None
4037
4038     return out, content_type
4039
4040
4041 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4042     if isinstance(key_or_keys, (list, tuple)):
4043         for key in key_or_keys:
4044             if key not in d or d[key] is None or skip_false_values and not d[key]:
4045                 continue
4046             return d[key]
4047         return default
4048     return d.get(key_or_keys, default)
4049
4050
4051 def try_get(src, getter, expected_type=None):
4052     if not isinstance(getter, (list, tuple)):
4053         getter = [getter]
4054     for get in getter:
4055         try:
4056             v = get(src)
4057         except (AttributeError, KeyError, TypeError, IndexError):
4058             pass
4059         else:
4060             if expected_type is None or isinstance(v, expected_type):
4061                 return v
4062
4063
4064 def merge_dicts(*dicts):
4065     merged = {}
4066     for a_dict in dicts:
4067         for k, v in a_dict.items():
4068             if v is None:
4069                 continue
4070             if (k not in merged
4071                     or (isinstance(v, compat_str) and v
4072                         and isinstance(merged[k], compat_str)
4073                         and not merged[k])):
4074                 merged[k] = v
4075     return merged
4076
4077
4078 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
4081
4082 US_RATINGS = {
4083     'G': 0,
4084     'PG': 10,
4085     'PG-13': 13,
4086     'R': 16,
4087     'NC': 18,
4088 }
4089
4090
4091 TV_PARENTAL_GUIDELINES = {
4092     'TV-Y': 0,
4093     'TV-Y7': 7,
4094     'TV-G': 0,
4095     'TV-PG': 0,
4096     'TV-14': 14,
4097     'TV-MA': 17,
4098 }
4099
4100
4101 def parse_age_limit(s):
4102     if type(s) == int:
4103         return s if 0 <= s <= 21 else None
4104     if not isinstance(s, compat_basestring):
4105         return None
4106     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4107     if m:
4108         return int(m.group('age'))
4109     if s in US_RATINGS:
4110         return US_RATINGS[s]
4111     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4112     if m:
4113         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4114     return None
4115
4116
4117 def strip_jsonp(code):
4118     return re.sub(
4119         r'''(?sx)^
4120             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4121             (?:\s*&&\s*(?P=func_name))?
4122             \s*\(\s*(?P<callback_data>.*)\);?
4123             \s*?(?://[^\n]*)*$''',
4124         r'\g<callback_data>', code)
4125
4126
4127 def js_to_json(code, vars={}):
4128     # vars is a dict of var, val pairs to substitute
4129     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4130     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4131     INTEGER_TABLE = (
4132         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4133         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4134     )
4135
4136     def fix_kv(m):
4137         v = m.group(0)
4138         if v in ('true', 'false', 'null'):
4139             return v
4140         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4141             return ""
4142
4143         if v[0] in ("'", '"'):
4144             v = re.sub(r'(?s)\\.|"', lambda m: {
4145                 '"': '\\"',
4146                 "\\'": "'",
4147                 '\\\n': '',
4148                 '\\x': '\\u00',
4149             }.get(m.group(0), m.group(0)), v[1:-1])
4150         else:
4151             for regex, base in INTEGER_TABLE:
4152                 im = re.match(regex, v)
4153                 if im:
4154                     i = int(im.group(1), base)
4155                     return '"%d":' % i if v.endswith(':') else '%d' % i
4156
4157             if v in vars:
4158                 return vars[v]
4159
4160         return '"%s"' % v
4161
4162     return re.sub(r'''(?sx)
4163         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4164         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4165         {comment}|,(?={skip}[\]}}])|
4166         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4167         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4168         [0-9]+(?={skip}:)|
4169         !+
4170         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4171
4172
4173 def qualities(quality_ids):
4174     """ Get a numeric quality value out of a list of possible values """
4175     def q(qid):
4176         try:
4177             return quality_ids.index(qid)
4178         except ValueError:
4179             return -1
4180     return q
4181
4182
4183 DEFAULT_OUTTMPL = {
4184     'default': '%(title)s [%(id)s].%(ext)s',
4185 }
4186 OUTTMPL_TYPES = {
4187     'subtitle': None,
4188     'thumbnail': None,
4189     'description': 'description',
4190     'annotation': 'annotations.xml',
4191     'infojson': 'info.json',
4192     'pl_description': 'description',
4193     'pl_infojson': 'info.json',
4194 }
4195
4196
4197 def limit_length(s, length):
4198     """ Add ellipses to overly long strings """
4199     if s is None:
4200         return None
4201     ELLIPSES = '...'
4202     if len(s) > length:
4203         return s[:length - len(ELLIPSES)] + ELLIPSES
4204     return s
4205
4206
4207 def version_tuple(v):
4208     return tuple(int(e) for e in re.split(r'[-.]', v))
4209
4210
4211 def is_outdated_version(version, limit, assume_new=True):
4212     if not version:
4213         return not assume_new
4214     try:
4215         return version_tuple(version) < version_tuple(limit)
4216     except ValueError:
4217         return not assume_new
4218
4219
4220 def ytdl_is_updateable():
4221     """ Returns if yt-dlp can be updated with -U """
4222     return False
4223
4224     from zipimport import zipimporter
4225
4226     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4227
4228
4229 def args_to_str(args):
4230     # Get a short string representation for a subprocess command
4231     return ' '.join(compat_shlex_quote(a) for a in args)
4232
4233
4234 def error_to_compat_str(err):
4235     err_str = str(err)
4236     # On python 2 error byte string must be decoded with proper
4237     # encoding rather than ascii
4238     if sys.version_info[0] < 3:
4239         err_str = err_str.decode(preferredencoding())
4240     return err_str
4241
4242
4243 def mimetype2ext(mt):
4244     if mt is None:
4245         return None
4246
4247     ext = {
4248         'audio/mp4': 'm4a',
4249         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4250         # it's the most popular one
4251         'audio/mpeg': 'mp3',
4252         'audio/x-wav': 'wav',
4253     }.get(mt)
4254     if ext is not None:
4255         return ext
4256
4257     _, _, res = mt.rpartition('/')
4258     res = res.split(';')[0].strip().lower()
4259
4260     return {
4261         '3gpp': '3gp',
4262         'smptett+xml': 'tt',
4263         'ttaf+xml': 'dfxp',
4264         'ttml+xml': 'ttml',
4265         'x-flv': 'flv',
4266         'x-mp4-fragmented': 'mp4',
4267         'x-ms-sami': 'sami',
4268         'x-ms-wmv': 'wmv',
4269         'mpegurl': 'm3u8',
4270         'x-mpegurl': 'm3u8',
4271         'vnd.apple.mpegurl': 'm3u8',
4272         'dash+xml': 'mpd',
4273         'f4m+xml': 'f4m',
4274         'hds+xml': 'f4m',
4275         'vnd.ms-sstr+xml': 'ism',
4276         'quicktime': 'mov',
4277         'mp2t': 'ts',
4278         'x-wav': 'wav',
4279     }.get(res, res)
4280
4281
4282 def parse_codecs(codecs_str):
4283     # http://tools.ietf.org/html/rfc6381
4284     if not codecs_str:
4285         return {}
4286     split_codecs = list(filter(None, map(
4287         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4288     vcodec, acodec = None, None
4289     for full_codec in split_codecs:
4290         codec = full_codec.split('.')[0]
4291         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4292             if not vcodec:
4293                 vcodec = full_codec
4294         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4295             if not acodec:
4296                 acodec = full_codec
4297         else:
4298             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4299     if not vcodec and not acodec:
4300         if len(split_codecs) == 2:
4301             return {
4302                 'vcodec': split_codecs[0],
4303                 'acodec': split_codecs[1],
4304             }
4305     else:
4306         return {
4307             'vcodec': vcodec or 'none',
4308             'acodec': acodec or 'none',
4309         }
4310     return {}
4311
4312
4313 def urlhandle_detect_ext(url_handle):
4314     getheader = url_handle.headers.get
4315
4316     cd = getheader('Content-Disposition')
4317     if cd:
4318         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4319         if m:
4320             e = determine_ext(m.group('filename'), default_ext=None)
4321             if e:
4322                 return e
4323
4324     return mimetype2ext(getheader('Content-Type'))
4325
4326
4327 def encode_data_uri(data, mime_type):
4328     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4329
4330
4331 def age_restricted(content_limit, age_limit):
4332     """ Returns True iff the content should be blocked """
4333
4334     if age_limit is None:  # No limit set
4335         return False
4336     if content_limit is None:
4337         return False  # Content available for everyone
4338     return age_limit < content_limit
4339
4340
4341 def is_html(first_bytes):
4342     """ Detect whether a file contains HTML by examining its first bytes. """
4343
4344     BOMS = [
4345         (b'\xef\xbb\xbf', 'utf-8'),
4346         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4347         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4348         (b'\xff\xfe', 'utf-16-le'),
4349         (b'\xfe\xff', 'utf-16-be'),
4350     ]
4351     for bom, enc in BOMS:
4352         if first_bytes.startswith(bom):
4353             s = first_bytes[len(bom):].decode(enc, 'replace')
4354             break
4355     else:
4356         s = first_bytes.decode('utf-8', 'replace')
4357
4358     return re.match(r'^\s*<', s)
4359
4360
4361 def determine_protocol(info_dict):
4362     protocol = info_dict.get('protocol')
4363     if protocol is not None:
4364         return protocol
4365
4366     url = info_dict['url']
4367     if url.startswith('rtmp'):
4368         return 'rtmp'
4369     elif url.startswith('mms'):
4370         return 'mms'
4371     elif url.startswith('rtsp'):
4372         return 'rtsp'
4373
4374     ext = determine_ext(url)
4375     if ext == 'm3u8':
4376         return 'm3u8'
4377     elif ext == 'f4m':
4378         return 'f4m'
4379
4380     return compat_urllib_parse_urlparse(url).scheme
4381
4382
4383 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4384     """ Render a list of rows, each as a list of values """
4385
4386     def get_max_lens(table):
4387         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4388
4389     def filter_using_list(row, filterArray):
4390         return [col for (take, col) in zip(filterArray, row) if take]
4391
4392     if hideEmpty:
4393         max_lens = get_max_lens(data)
4394         header_row = filter_using_list(header_row, max_lens)
4395         data = [filter_using_list(row, max_lens) for row in data]
4396
4397     table = [header_row] + data
4398     max_lens = get_max_lens(table)
4399     if delim:
4400         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4401     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4402     return '\n'.join(format_str % tuple(row) for row in table)
4403
4404
4405 def _match_one(filter_part, dct):
4406     COMPARISON_OPERATORS = {
4407         '<': operator.lt,
4408         '<=': operator.le,
4409         '>': operator.gt,
4410         '>=': operator.ge,
4411         '=': operator.eq,
4412         '!=': operator.ne,
4413     }
4414     operator_rex = re.compile(r'''(?x)\s*
4415         (?P<key>[a-z_]+)
4416         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4417         (?:
4418             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4419             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4420             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4421         )
4422         \s*$
4423         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4424     m = operator_rex.search(filter_part)
4425     if m:
4426         op = COMPARISON_OPERATORS[m.group('op')]
4427         actual_value = dct.get(m.group('key'))
4428         if (m.group('quotedstrval') is not None
4429             or m.group('strval') is not None
4430             # If the original field is a string and matching comparisonvalue is
4431             # a number we should respect the origin of the original field
4432             # and process comparison value as a string (see
4433             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4434             or actual_value is not None and m.group('intval') is not None
4435                 and isinstance(actual_value, compat_str)):
4436             if m.group('op') not in ('=', '!='):
4437                 raise ValueError(
4438                     'Operator %s does not support string values!' % m.group('op'))
4439             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4440             quote = m.group('quote')
4441             if quote is not None:
4442                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4443         else:
4444             try:
4445                 comparison_value = int(m.group('intval'))
4446             except ValueError:
4447                 comparison_value = parse_filesize(m.group('intval'))
4448                 if comparison_value is None:
4449                     comparison_value = parse_filesize(m.group('intval') + 'B')
4450                 if comparison_value is None:
4451                     raise ValueError(
4452                         'Invalid integer value %r in filter part %r' % (
4453                             m.group('intval'), filter_part))
4454         if actual_value is None:
4455             return m.group('none_inclusive')
4456         return op(actual_value, comparison_value)
4457
4458     UNARY_OPERATORS = {
4459         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4460         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4461     }
4462     operator_rex = re.compile(r'''(?x)\s*
4463         (?P<op>%s)\s*(?P<key>[a-z_]+)
4464         \s*$
4465         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4466     m = operator_rex.search(filter_part)
4467     if m:
4468         op = UNARY_OPERATORS[m.group('op')]
4469         actual_value = dct.get(m.group('key'))
4470         return op(actual_value)
4471
4472     raise ValueError('Invalid filter part %r' % filter_part)
4473
4474
4475 def match_str(filter_str, dct):
4476     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4477
4478     return all(
4479         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4480
4481
4482 def match_filter_func(filter_str):
4483     def _match_func(info_dict):
4484         if match_str(filter_str, info_dict):
4485             return None
4486         else:
4487             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4488             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4489     return _match_func
4490
4491
4492 def parse_dfxp_time_expr(time_expr):
4493     if not time_expr:
4494         return
4495
4496     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4497     if mobj:
4498         return float(mobj.group('time_offset'))
4499
4500     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4501     if mobj:
4502         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4503
4504
4505 def srt_subtitles_timecode(seconds):
4506     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4507
4508
4509 def dfxp2srt(dfxp_data):
4510     '''
4511     @param dfxp_data A bytes-like object containing DFXP data
4512     @returns A unicode object containing converted SRT data
4513     '''
4514     LEGACY_NAMESPACES = (
4515         (b'http://www.w3.org/ns/ttml', [
4516             b'http://www.w3.org/2004/11/ttaf1',
4517             b'http://www.w3.org/2006/04/ttaf1',
4518             b'http://www.w3.org/2006/10/ttaf1',
4519         ]),
4520         (b'http://www.w3.org/ns/ttml#styling', [
4521             b'http://www.w3.org/ns/ttml#style',
4522         ]),
4523     )
4524
4525     SUPPORTED_STYLING = [
4526         'color',
4527         'fontFamily',
4528         'fontSize',
4529         'fontStyle',
4530         'fontWeight',
4531         'textDecoration'
4532     ]
4533
4534     _x = functools.partial(xpath_with_ns, ns_map={
4535         'xml': 'http://www.w3.org/XML/1998/namespace',
4536         'ttml': 'http://www.w3.org/ns/ttml',
4537         'tts': 'http://www.w3.org/ns/ttml#styling',
4538     })
4539
4540     styles = {}
4541     default_style = {}
4542
4543     class TTMLPElementParser(object):
4544         _out = ''
4545         _unclosed_elements = []
4546         _applied_styles = []
4547
4548         def start(self, tag, attrib):
4549             if tag in (_x('ttml:br'), 'br'):
4550                 self._out += '\n'
4551             else:
4552                 unclosed_elements = []
4553                 style = {}
4554                 element_style_id = attrib.get('style')
4555                 if default_style:
4556                     style.update(default_style)
4557                 if element_style_id:
4558                     style.update(styles.get(element_style_id, {}))
4559                 for prop in SUPPORTED_STYLING:
4560                     prop_val = attrib.get(_x('tts:' + prop))
4561                     if prop_val:
4562                         style[prop] = prop_val
4563                 if style:
4564                     font = ''
4565                     for k, v in sorted(style.items()):
4566                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4567                             continue
4568                         if k == 'color':
4569                             font += ' color="%s"' % v
4570                         elif k == 'fontSize':
4571                             font += ' size="%s"' % v
4572                         elif k == 'fontFamily':
4573                             font += ' face="%s"' % v
4574                         elif k == 'fontWeight' and v == 'bold':
4575                             self._out += '<b>'
4576                             unclosed_elements.append('b')
4577                         elif k == 'fontStyle' and v == 'italic':
4578                             self._out += '<i>'
4579                             unclosed_elements.append('i')
4580                         elif k == 'textDecoration' and v == 'underline':
4581                             self._out += '<u>'
4582                             unclosed_elements.append('u')
4583                     if font:
4584                         self._out += '<font' + font + '>'
4585                         unclosed_elements.append('font')
4586                     applied_style = {}
4587                     if self._applied_styles:
4588                         applied_style.update(self._applied_styles[-1])
4589                     applied_style.update(style)
4590                     self._applied_styles.append(applied_style)
4591                 self._unclosed_elements.append(unclosed_elements)
4592
4593         def end(self, tag):
4594             if tag not in (_x('ttml:br'), 'br'):
4595                 unclosed_elements = self._unclosed_elements.pop()
4596                 for element in reversed(unclosed_elements):
4597                     self._out += '</%s>' % element
4598                 if unclosed_elements and self._applied_styles:
4599                     self._applied_styles.pop()
4600
4601         def data(self, data):
4602             self._out += data
4603
4604         def close(self):
4605             return self._out.strip()
4606
4607     def parse_node(node):
4608         target = TTMLPElementParser()
4609         parser = xml.etree.ElementTree.XMLParser(target=target)
4610         parser.feed(xml.etree.ElementTree.tostring(node))
4611         return parser.close()
4612
4613     for k, v in LEGACY_NAMESPACES:
4614         for ns in v:
4615             dfxp_data = dfxp_data.replace(ns, k)
4616
4617     dfxp = compat_etree_fromstring(dfxp_data)
4618     out = []
4619     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4620
4621     if not paras:
4622         raise ValueError('Invalid dfxp/TTML subtitle')
4623
4624     repeat = False
4625     while True:
4626         for style in dfxp.findall(_x('.//ttml:style')):
4627             style_id = style.get('id') or style.get(_x('xml:id'))
4628             if not style_id:
4629                 continue
4630             parent_style_id = style.get('style')
4631             if parent_style_id:
4632                 if parent_style_id not in styles:
4633                     repeat = True
4634                     continue
4635                 styles[style_id] = styles[parent_style_id].copy()
4636             for prop in SUPPORTED_STYLING:
4637                 prop_val = style.get(_x('tts:' + prop))
4638                 if prop_val:
4639                     styles.setdefault(style_id, {})[prop] = prop_val
4640         if repeat:
4641             repeat = False
4642         else:
4643             break
4644
4645     for p in ('body', 'div'):
4646         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4647         if ele is None:
4648             continue
4649         style = styles.get(ele.get('style'))
4650         if not style:
4651             continue
4652         default_style.update(style)
4653
4654     for para, index in zip(paras, itertools.count(1)):
4655         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4656         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4657         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4658         if begin_time is None:
4659             continue
4660         if not end_time:
4661             if not dur:
4662                 continue
4663             end_time = begin_time + dur
4664         out.append('%d\n%s --> %s\n%s\n\n' % (
4665             index,
4666             srt_subtitles_timecode(begin_time),
4667             srt_subtitles_timecode(end_time),
4668             parse_node(para)))
4669
4670     return ''.join(out)
4671
4672
4673 def cli_option(params, command_option, param):
4674     param = params.get(param)
4675     if param:
4676         param = compat_str(param)
4677     return [command_option, param] if param is not None else []
4678
4679
4680 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4681     param = params.get(param)
4682     if param is None:
4683         return []
4684     assert isinstance(param, bool)
4685     if separator:
4686         return [command_option + separator + (true_value if param else false_value)]
4687     return [command_option, true_value if param else false_value]
4688
4689
4690 def cli_valueless_option(params, command_option, param, expected_value=True):
4691     param = params.get(param)
4692     return [command_option] if param == expected_value else []
4693
4694
4695 def cli_configuration_args(argdict, key, default=[], exe=None, use_default_arg=True):
4696     # use_default_arg can be True, False, or 'no_compat'
4697     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4698         if use_default_arg is True:
4699             return argdict
4700         else:
4701             argdict = None
4702
4703     if argdict is None:
4704         return default
4705     assert isinstance(argdict, dict)
4706
4707     key = key.lower()
4708     args = exe_args = None
4709     if exe is not None:
4710         assert isinstance(exe, compat_str)
4711         exe = exe.lower()
4712         args = argdict.get('%s+%s' % (key, exe))
4713         if args is None:
4714             exe_args = argdict.get(exe)
4715
4716     if args is None:
4717         args = argdict.get(key) if key != exe else None
4718     if args is None and exe_args is None:
4719         args = argdict.get('default', default) if use_default_arg else default
4720
4721     args, exe_args = args or [], exe_args or []
4722     assert isinstance(args, (list, tuple))
4723     assert isinstance(exe_args, (list, tuple))
4724     return args + exe_args
4725
4726
4727 class ISO639Utils(object):
4728     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4729     _lang_map = {
4730         'aa': 'aar',
4731         'ab': 'abk',
4732         'ae': 'ave',
4733         'af': 'afr',
4734         'ak': 'aka',
4735         'am': 'amh',
4736         'an': 'arg',
4737         'ar': 'ara',
4738         'as': 'asm',
4739         'av': 'ava',
4740         'ay': 'aym',
4741         'az': 'aze',
4742         'ba': 'bak',
4743         'be': 'bel',
4744         'bg': 'bul',
4745         'bh': 'bih',
4746         'bi': 'bis',
4747         'bm': 'bam',
4748         'bn': 'ben',
4749         'bo': 'bod',
4750         'br': 'bre',
4751         'bs': 'bos',
4752         'ca': 'cat',
4753         'ce': 'che',
4754         'ch': 'cha',
4755         'co': 'cos',
4756         'cr': 'cre',
4757         'cs': 'ces',
4758         'cu': 'chu',
4759         'cv': 'chv',
4760         'cy': 'cym',
4761         'da': 'dan',
4762         'de': 'deu',
4763         'dv': 'div',
4764         'dz': 'dzo',
4765         'ee': 'ewe',
4766         'el': 'ell',
4767         'en': 'eng',
4768         'eo': 'epo',
4769         'es': 'spa',
4770         'et': 'est',
4771         'eu': 'eus',
4772         'fa': 'fas',
4773         'ff': 'ful',
4774         'fi': 'fin',
4775         'fj': 'fij',
4776         'fo': 'fao',
4777         'fr': 'fra',
4778         'fy': 'fry',
4779         'ga': 'gle',
4780         'gd': 'gla',
4781         'gl': 'glg',
4782         'gn': 'grn',
4783         'gu': 'guj',
4784         'gv': 'glv',
4785         'ha': 'hau',
4786         'he': 'heb',
4787         'iw': 'heb',  # Replaced by he in 1989 revision
4788         'hi': 'hin',
4789         'ho': 'hmo',
4790         'hr': 'hrv',
4791         'ht': 'hat',
4792         'hu': 'hun',
4793         'hy': 'hye',
4794         'hz': 'her',
4795         'ia': 'ina',
4796         'id': 'ind',
4797         'in': 'ind',  # Replaced by id in 1989 revision
4798         'ie': 'ile',
4799         'ig': 'ibo',
4800         'ii': 'iii',
4801         'ik': 'ipk',
4802         'io': 'ido',
4803         'is': 'isl',
4804         'it': 'ita',
4805         'iu': 'iku',
4806         'ja': 'jpn',
4807         'jv': 'jav',
4808         'ka': 'kat',
4809         'kg': 'kon',
4810         'ki': 'kik',
4811         'kj': 'kua',
4812         'kk': 'kaz',
4813         'kl': 'kal',
4814         'km': 'khm',
4815         'kn': 'kan',
4816         'ko': 'kor',
4817         'kr': 'kau',
4818         'ks': 'kas',
4819         'ku': 'kur',
4820         'kv': 'kom',
4821         'kw': 'cor',
4822         'ky': 'kir',
4823         'la': 'lat',
4824         'lb': 'ltz',
4825         'lg': 'lug',
4826         'li': 'lim',
4827         'ln': 'lin',
4828         'lo': 'lao',
4829         'lt': 'lit',
4830         'lu': 'lub',
4831         'lv': 'lav',
4832         'mg': 'mlg',
4833         'mh': 'mah',
4834         'mi': 'mri',
4835         'mk': 'mkd',
4836         'ml': 'mal',
4837         'mn': 'mon',
4838         'mr': 'mar',
4839         'ms': 'msa',
4840         'mt': 'mlt',
4841         'my': 'mya',
4842         'na': 'nau',
4843         'nb': 'nob',
4844         'nd': 'nde',
4845         'ne': 'nep',
4846         'ng': 'ndo',
4847         'nl': 'nld',
4848         'nn': 'nno',
4849         'no': 'nor',
4850         'nr': 'nbl',
4851         'nv': 'nav',
4852         'ny': 'nya',
4853         'oc': 'oci',
4854         'oj': 'oji',
4855         'om': 'orm',
4856         'or': 'ori',
4857         'os': 'oss',
4858         'pa': 'pan',
4859         'pi': 'pli',
4860         'pl': 'pol',
4861         'ps': 'pus',
4862         'pt': 'por',
4863         'qu': 'que',
4864         'rm': 'roh',
4865         'rn': 'run',
4866         'ro': 'ron',
4867         'ru': 'rus',
4868         'rw': 'kin',
4869         'sa': 'san',
4870         'sc': 'srd',
4871         'sd': 'snd',
4872         'se': 'sme',
4873         'sg': 'sag',
4874         'si': 'sin',
4875         'sk': 'slk',
4876         'sl': 'slv',
4877         'sm': 'smo',
4878         'sn': 'sna',
4879         'so': 'som',
4880         'sq': 'sqi',
4881         'sr': 'srp',
4882         'ss': 'ssw',
4883         'st': 'sot',
4884         'su': 'sun',
4885         'sv': 'swe',
4886         'sw': 'swa',
4887         'ta': 'tam',
4888         'te': 'tel',
4889         'tg': 'tgk',
4890         'th': 'tha',
4891         'ti': 'tir',
4892         'tk': 'tuk',
4893         'tl': 'tgl',
4894         'tn': 'tsn',
4895         'to': 'ton',
4896         'tr': 'tur',
4897         'ts': 'tso',
4898         'tt': 'tat',
4899         'tw': 'twi',
4900         'ty': 'tah',
4901         'ug': 'uig',
4902         'uk': 'ukr',
4903         'ur': 'urd',
4904         'uz': 'uzb',
4905         've': 'ven',
4906         'vi': 'vie',
4907         'vo': 'vol',
4908         'wa': 'wln',
4909         'wo': 'wol',
4910         'xh': 'xho',
4911         'yi': 'yid',
4912         'ji': 'yid',  # Replaced by yi in 1989 revision
4913         'yo': 'yor',
4914         'za': 'zha',
4915         'zh': 'zho',
4916         'zu': 'zul',
4917     }
4918
4919     @classmethod
4920     def short2long(cls, code):
4921         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4922         return cls._lang_map.get(code[:2])
4923
4924     @classmethod
4925     def long2short(cls, code):
4926         """Convert language code from ISO 639-2/T to ISO 639-1"""
4927         for short_name, long_name in cls._lang_map.items():
4928             if long_name == code:
4929                 return short_name
4930
4931
4932 class ISO3166Utils(object):
4933     # From http://data.okfn.org/data/core/country-list
4934     _country_map = {
4935         'AF': 'Afghanistan',
4936         'AX': 'Åland Islands',
4937         'AL': 'Albania',
4938         'DZ': 'Algeria',
4939         'AS': 'American Samoa',
4940         'AD': 'Andorra',
4941         'AO': 'Angola',
4942         'AI': 'Anguilla',
4943         'AQ': 'Antarctica',
4944         'AG': 'Antigua and Barbuda',
4945         'AR': 'Argentina',
4946         'AM': 'Armenia',
4947         'AW': 'Aruba',
4948         'AU': 'Australia',
4949         'AT': 'Austria',
4950         'AZ': 'Azerbaijan',
4951         'BS': 'Bahamas',
4952         'BH': 'Bahrain',
4953         'BD': 'Bangladesh',
4954         'BB': 'Barbados',
4955         'BY': 'Belarus',
4956         'BE': 'Belgium',
4957         'BZ': 'Belize',
4958         'BJ': 'Benin',
4959         'BM': 'Bermuda',
4960         'BT': 'Bhutan',
4961         'BO': 'Bolivia, Plurinational State of',
4962         'BQ': 'Bonaire, Sint Eustatius and Saba',
4963         'BA': 'Bosnia and Herzegovina',
4964         'BW': 'Botswana',
4965         'BV': 'Bouvet Island',
4966         'BR': 'Brazil',
4967         'IO': 'British Indian Ocean Territory',
4968         'BN': 'Brunei Darussalam',
4969         'BG': 'Bulgaria',
4970         'BF': 'Burkina Faso',
4971         'BI': 'Burundi',
4972         'KH': 'Cambodia',
4973         'CM': 'Cameroon',
4974         'CA': 'Canada',
4975         'CV': 'Cape Verde',
4976         'KY': 'Cayman Islands',
4977         'CF': 'Central African Republic',
4978         'TD': 'Chad',
4979         'CL': 'Chile',
4980         'CN': 'China',
4981         'CX': 'Christmas Island',
4982         'CC': 'Cocos (Keeling) Islands',
4983         'CO': 'Colombia',
4984         'KM': 'Comoros',
4985         'CG': 'Congo',
4986         'CD': 'Congo, the Democratic Republic of the',
4987         'CK': 'Cook Islands',
4988         'CR': 'Costa Rica',
4989         'CI': 'Côte d\'Ivoire',
4990         'HR': 'Croatia',
4991         'CU': 'Cuba',
4992         'CW': 'Curaçao',
4993         'CY': 'Cyprus',
4994         'CZ': 'Czech Republic',
4995         'DK': 'Denmark',
4996         'DJ': 'Djibouti',
4997         'DM': 'Dominica',
4998         'DO': 'Dominican Republic',
4999         'EC': 'Ecuador',
5000         'EG': 'Egypt',
5001         'SV': 'El Salvador',
5002         'GQ': 'Equatorial Guinea',
5003         'ER': 'Eritrea',
5004         'EE': 'Estonia',
5005         'ET': 'Ethiopia',
5006         'FK': 'Falkland Islands (Malvinas)',
5007         'FO': 'Faroe Islands',
5008         'FJ': 'Fiji',
5009         'FI': 'Finland',
5010         'FR': 'France',
5011         'GF': 'French Guiana',
5012         'PF': 'French Polynesia',
5013         'TF': 'French Southern Territories',
5014         'GA': 'Gabon',
5015         'GM': 'Gambia',
5016         'GE': 'Georgia',
5017         'DE': 'Germany',
5018         'GH': 'Ghana',
5019         'GI': 'Gibraltar',
5020         'GR': 'Greece',
5021         'GL': 'Greenland',
5022         'GD': 'Grenada',
5023         'GP': 'Guadeloupe',
5024         'GU': 'Guam',
5025         'GT': 'Guatemala',
5026         'GG': 'Guernsey',
5027         'GN': 'Guinea',
5028         'GW': 'Guinea-Bissau',
5029         'GY': 'Guyana',
5030         'HT': 'Haiti',
5031         'HM': 'Heard Island and McDonald Islands',
5032         'VA': 'Holy See (Vatican City State)',
5033         'HN': 'Honduras',
5034         'HK': 'Hong Kong',
5035         'HU': 'Hungary',
5036         'IS': 'Iceland',
5037         'IN': 'India',
5038         'ID': 'Indonesia',
5039         'IR': 'Iran, Islamic Republic of',
5040         'IQ': 'Iraq',
5041         'IE': 'Ireland',
5042         'IM': 'Isle of Man',
5043         'IL': 'Israel',
5044         'IT': 'Italy',
5045         'JM': 'Jamaica',
5046         'JP': 'Japan',
5047         'JE': 'Jersey',
5048         'JO': 'Jordan',
5049         'KZ': 'Kazakhstan',
5050         'KE': 'Kenya',
5051         'KI': 'Kiribati',
5052         'KP': 'Korea, Democratic People\'s Republic of',
5053         'KR': 'Korea, Republic of',
5054         'KW': 'Kuwait',
5055         'KG': 'Kyrgyzstan',
5056         'LA': 'Lao People\'s Democratic Republic',
5057         'LV': 'Latvia',
5058         'LB': 'Lebanon',
5059         'LS': 'Lesotho',
5060         'LR': 'Liberia',
5061         'LY': 'Libya',
5062         'LI': 'Liechtenstein',
5063         'LT': 'Lithuania',
5064         'LU': 'Luxembourg',
5065         'MO': 'Macao',
5066         'MK': 'Macedonia, the Former Yugoslav Republic of',
5067         'MG': 'Madagascar',
5068         'MW': 'Malawi',
5069         'MY': 'Malaysia',
5070         'MV': 'Maldives',
5071         'ML': 'Mali',
5072         'MT': 'Malta',
5073         'MH': 'Marshall Islands',
5074         'MQ': 'Martinique',
5075         'MR': 'Mauritania',
5076         'MU': 'Mauritius',
5077         'YT': 'Mayotte',
5078         'MX': 'Mexico',
5079         'FM': 'Micronesia, Federated States of',
5080         'MD': 'Moldova, Republic of',
5081         'MC': 'Monaco',
5082         'MN': 'Mongolia',
5083         'ME': 'Montenegro',
5084         'MS': 'Montserrat',
5085         'MA': 'Morocco',
5086         'MZ': 'Mozambique',
5087         'MM': 'Myanmar',
5088         'NA': 'Namibia',
5089         'NR': 'Nauru',
5090         'NP': 'Nepal',
5091         'NL': 'Netherlands',
5092         'NC': 'New Caledonia',
5093         'NZ': 'New Zealand',
5094         'NI': 'Nicaragua',
5095         'NE': 'Niger',
5096         'NG': 'Nigeria',
5097         'NU': 'Niue',
5098         'NF': 'Norfolk Island',
5099         'MP': 'Northern Mariana Islands',
5100         'NO': 'Norway',
5101         'OM': 'Oman',
5102         'PK': 'Pakistan',
5103         'PW': 'Palau',
5104         'PS': 'Palestine, State of',
5105         'PA': 'Panama',
5106         'PG': 'Papua New Guinea',
5107         'PY': 'Paraguay',
5108         'PE': 'Peru',
5109         'PH': 'Philippines',
5110         'PN': 'Pitcairn',
5111         'PL': 'Poland',
5112         'PT': 'Portugal',
5113         'PR': 'Puerto Rico',
5114         'QA': 'Qatar',
5115         'RE': 'Réunion',
5116         'RO': 'Romania',
5117         'RU': 'Russian Federation',
5118         'RW': 'Rwanda',
5119         'BL': 'Saint Barthélemy',
5120         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5121         'KN': 'Saint Kitts and Nevis',
5122         'LC': 'Saint Lucia',
5123         'MF': 'Saint Martin (French part)',
5124         'PM': 'Saint Pierre and Miquelon',
5125         'VC': 'Saint Vincent and the Grenadines',
5126         'WS': 'Samoa',
5127         'SM': 'San Marino',
5128         'ST': 'Sao Tome and Principe',
5129         'SA': 'Saudi Arabia',
5130         'SN': 'Senegal',
5131         'RS': 'Serbia',
5132         'SC': 'Seychelles',
5133         'SL': 'Sierra Leone',
5134         'SG': 'Singapore',
5135         'SX': 'Sint Maarten (Dutch part)',
5136         'SK': 'Slovakia',
5137         'SI': 'Slovenia',
5138         'SB': 'Solomon Islands',
5139         'SO': 'Somalia',
5140         'ZA': 'South Africa',
5141         'GS': 'South Georgia and the South Sandwich Islands',
5142         'SS': 'South Sudan',
5143         'ES': 'Spain',
5144         'LK': 'Sri Lanka',
5145         'SD': 'Sudan',
5146         'SR': 'Suriname',
5147         'SJ': 'Svalbard and Jan Mayen',
5148         'SZ': 'Swaziland',
5149         'SE': 'Sweden',
5150         'CH': 'Switzerland',
5151         'SY': 'Syrian Arab Republic',
5152         'TW': 'Taiwan, Province of China',
5153         'TJ': 'Tajikistan',
5154         'TZ': 'Tanzania, United Republic of',
5155         'TH': 'Thailand',
5156         'TL': 'Timor-Leste',
5157         'TG': 'Togo',
5158         'TK': 'Tokelau',
5159         'TO': 'Tonga',
5160         'TT': 'Trinidad and Tobago',
5161         'TN': 'Tunisia',
5162         'TR': 'Turkey',
5163         'TM': 'Turkmenistan',
5164         'TC': 'Turks and Caicos Islands',
5165         'TV': 'Tuvalu',
5166         'UG': 'Uganda',
5167         'UA': 'Ukraine',
5168         'AE': 'United Arab Emirates',
5169         'GB': 'United Kingdom',
5170         'US': 'United States',
5171         'UM': 'United States Minor Outlying Islands',
5172         'UY': 'Uruguay',
5173         'UZ': 'Uzbekistan',
5174         'VU': 'Vanuatu',
5175         'VE': 'Venezuela, Bolivarian Republic of',
5176         'VN': 'Viet Nam',
5177         'VG': 'Virgin Islands, British',
5178         'VI': 'Virgin Islands, U.S.',
5179         'WF': 'Wallis and Futuna',
5180         'EH': 'Western Sahara',
5181         'YE': 'Yemen',
5182         'ZM': 'Zambia',
5183         'ZW': 'Zimbabwe',
5184     }
5185
5186     @classmethod
5187     def short2full(cls, code):
5188         """Convert an ISO 3166-2 country code to the corresponding full name"""
5189         return cls._country_map.get(code.upper())
5190
5191
5192 class GeoUtils(object):
5193     # Major IPv4 address blocks per country
5194     _country_ip_map = {
5195         'AD': '46.172.224.0/19',
5196         'AE': '94.200.0.0/13',
5197         'AF': '149.54.0.0/17',
5198         'AG': '209.59.64.0/18',
5199         'AI': '204.14.248.0/21',
5200         'AL': '46.99.0.0/16',
5201         'AM': '46.70.0.0/15',
5202         'AO': '105.168.0.0/13',
5203         'AP': '182.50.184.0/21',
5204         'AQ': '23.154.160.0/24',
5205         'AR': '181.0.0.0/12',
5206         'AS': '202.70.112.0/20',
5207         'AT': '77.116.0.0/14',
5208         'AU': '1.128.0.0/11',
5209         'AW': '181.41.0.0/18',
5210         'AX': '185.217.4.0/22',
5211         'AZ': '5.197.0.0/16',
5212         'BA': '31.176.128.0/17',
5213         'BB': '65.48.128.0/17',
5214         'BD': '114.130.0.0/16',
5215         'BE': '57.0.0.0/8',
5216         'BF': '102.178.0.0/15',
5217         'BG': '95.42.0.0/15',
5218         'BH': '37.131.0.0/17',
5219         'BI': '154.117.192.0/18',
5220         'BJ': '137.255.0.0/16',
5221         'BL': '185.212.72.0/23',
5222         'BM': '196.12.64.0/18',
5223         'BN': '156.31.0.0/16',
5224         'BO': '161.56.0.0/16',
5225         'BQ': '161.0.80.0/20',
5226         'BR': '191.128.0.0/12',
5227         'BS': '24.51.64.0/18',
5228         'BT': '119.2.96.0/19',
5229         'BW': '168.167.0.0/16',
5230         'BY': '178.120.0.0/13',
5231         'BZ': '179.42.192.0/18',
5232         'CA': '99.224.0.0/11',
5233         'CD': '41.243.0.0/16',
5234         'CF': '197.242.176.0/21',
5235         'CG': '160.113.0.0/16',
5236         'CH': '85.0.0.0/13',
5237         'CI': '102.136.0.0/14',
5238         'CK': '202.65.32.0/19',
5239         'CL': '152.172.0.0/14',
5240         'CM': '102.244.0.0/14',
5241         'CN': '36.128.0.0/10',
5242         'CO': '181.240.0.0/12',
5243         'CR': '201.192.0.0/12',
5244         'CU': '152.206.0.0/15',
5245         'CV': '165.90.96.0/19',
5246         'CW': '190.88.128.0/17',
5247         'CY': '31.153.0.0/16',
5248         'CZ': '88.100.0.0/14',
5249         'DE': '53.0.0.0/8',
5250         'DJ': '197.241.0.0/17',
5251         'DK': '87.48.0.0/12',
5252         'DM': '192.243.48.0/20',
5253         'DO': '152.166.0.0/15',
5254         'DZ': '41.96.0.0/12',
5255         'EC': '186.68.0.0/15',
5256         'EE': '90.190.0.0/15',
5257         'EG': '156.160.0.0/11',
5258         'ER': '196.200.96.0/20',
5259         'ES': '88.0.0.0/11',
5260         'ET': '196.188.0.0/14',
5261         'EU': '2.16.0.0/13',
5262         'FI': '91.152.0.0/13',
5263         'FJ': '144.120.0.0/16',
5264         'FK': '80.73.208.0/21',
5265         'FM': '119.252.112.0/20',
5266         'FO': '88.85.32.0/19',
5267         'FR': '90.0.0.0/9',
5268         'GA': '41.158.0.0/15',
5269         'GB': '25.0.0.0/8',
5270         'GD': '74.122.88.0/21',
5271         'GE': '31.146.0.0/16',
5272         'GF': '161.22.64.0/18',
5273         'GG': '62.68.160.0/19',
5274         'GH': '154.160.0.0/12',
5275         'GI': '95.164.0.0/16',
5276         'GL': '88.83.0.0/19',
5277         'GM': '160.182.0.0/15',
5278         'GN': '197.149.192.0/18',
5279         'GP': '104.250.0.0/19',
5280         'GQ': '105.235.224.0/20',
5281         'GR': '94.64.0.0/13',
5282         'GT': '168.234.0.0/16',
5283         'GU': '168.123.0.0/16',
5284         'GW': '197.214.80.0/20',
5285         'GY': '181.41.64.0/18',
5286         'HK': '113.252.0.0/14',
5287         'HN': '181.210.0.0/16',
5288         'HR': '93.136.0.0/13',
5289         'HT': '148.102.128.0/17',
5290         'HU': '84.0.0.0/14',
5291         'ID': '39.192.0.0/10',
5292         'IE': '87.32.0.0/12',
5293         'IL': '79.176.0.0/13',
5294         'IM': '5.62.80.0/20',
5295         'IN': '117.192.0.0/10',
5296         'IO': '203.83.48.0/21',
5297         'IQ': '37.236.0.0/14',
5298         'IR': '2.176.0.0/12',
5299         'IS': '82.221.0.0/16',
5300         'IT': '79.0.0.0/10',
5301         'JE': '87.244.64.0/18',
5302         'JM': '72.27.0.0/17',
5303         'JO': '176.29.0.0/16',
5304         'JP': '133.0.0.0/8',
5305         'KE': '105.48.0.0/12',
5306         'KG': '158.181.128.0/17',
5307         'KH': '36.37.128.0/17',
5308         'KI': '103.25.140.0/22',
5309         'KM': '197.255.224.0/20',
5310         'KN': '198.167.192.0/19',
5311         'KP': '175.45.176.0/22',
5312         'KR': '175.192.0.0/10',
5313         'KW': '37.36.0.0/14',
5314         'KY': '64.96.0.0/15',
5315         'KZ': '2.72.0.0/13',
5316         'LA': '115.84.64.0/18',
5317         'LB': '178.135.0.0/16',
5318         'LC': '24.92.144.0/20',
5319         'LI': '82.117.0.0/19',
5320         'LK': '112.134.0.0/15',
5321         'LR': '102.183.0.0/16',
5322         'LS': '129.232.0.0/17',
5323         'LT': '78.56.0.0/13',
5324         'LU': '188.42.0.0/16',
5325         'LV': '46.109.0.0/16',
5326         'LY': '41.252.0.0/14',
5327         'MA': '105.128.0.0/11',
5328         'MC': '88.209.64.0/18',
5329         'MD': '37.246.0.0/16',
5330         'ME': '178.175.0.0/17',
5331         'MF': '74.112.232.0/21',
5332         'MG': '154.126.0.0/17',
5333         'MH': '117.103.88.0/21',
5334         'MK': '77.28.0.0/15',
5335         'ML': '154.118.128.0/18',
5336         'MM': '37.111.0.0/17',
5337         'MN': '49.0.128.0/17',
5338         'MO': '60.246.0.0/16',
5339         'MP': '202.88.64.0/20',
5340         'MQ': '109.203.224.0/19',
5341         'MR': '41.188.64.0/18',
5342         'MS': '208.90.112.0/22',
5343         'MT': '46.11.0.0/16',
5344         'MU': '105.16.0.0/12',
5345         'MV': '27.114.128.0/18',
5346         'MW': '102.70.0.0/15',
5347         'MX': '187.192.0.0/11',
5348         'MY': '175.136.0.0/13',
5349         'MZ': '197.218.0.0/15',
5350         'NA': '41.182.0.0/16',
5351         'NC': '101.101.0.0/18',
5352         'NE': '197.214.0.0/18',
5353         'NF': '203.17.240.0/22',
5354         'NG': '105.112.0.0/12',
5355         'NI': '186.76.0.0/15',
5356         'NL': '145.96.0.0/11',
5357         'NO': '84.208.0.0/13',
5358         'NP': '36.252.0.0/15',
5359         'NR': '203.98.224.0/19',
5360         'NU': '49.156.48.0/22',
5361         'NZ': '49.224.0.0/14',
5362         'OM': '5.36.0.0/15',
5363         'PA': '186.72.0.0/15',
5364         'PE': '186.160.0.0/14',
5365         'PF': '123.50.64.0/18',
5366         'PG': '124.240.192.0/19',
5367         'PH': '49.144.0.0/13',
5368         'PK': '39.32.0.0/11',
5369         'PL': '83.0.0.0/11',
5370         'PM': '70.36.0.0/20',
5371         'PR': '66.50.0.0/16',
5372         'PS': '188.161.0.0/16',
5373         'PT': '85.240.0.0/13',
5374         'PW': '202.124.224.0/20',
5375         'PY': '181.120.0.0/14',
5376         'QA': '37.210.0.0/15',
5377         'RE': '102.35.0.0/16',
5378         'RO': '79.112.0.0/13',
5379         'RS': '93.86.0.0/15',
5380         'RU': '5.136.0.0/13',
5381         'RW': '41.186.0.0/16',
5382         'SA': '188.48.0.0/13',
5383         'SB': '202.1.160.0/19',
5384         'SC': '154.192.0.0/11',
5385         'SD': '102.120.0.0/13',
5386         'SE': '78.64.0.0/12',
5387         'SG': '8.128.0.0/10',
5388         'SI': '188.196.0.0/14',
5389         'SK': '78.98.0.0/15',
5390         'SL': '102.143.0.0/17',
5391         'SM': '89.186.32.0/19',
5392         'SN': '41.82.0.0/15',
5393         'SO': '154.115.192.0/18',
5394         'SR': '186.179.128.0/17',
5395         'SS': '105.235.208.0/21',
5396         'ST': '197.159.160.0/19',
5397         'SV': '168.243.0.0/16',
5398         'SX': '190.102.0.0/20',
5399         'SY': '5.0.0.0/16',
5400         'SZ': '41.84.224.0/19',
5401         'TC': '65.255.48.0/20',
5402         'TD': '154.68.128.0/19',
5403         'TG': '196.168.0.0/14',
5404         'TH': '171.96.0.0/13',
5405         'TJ': '85.9.128.0/18',
5406         'TK': '27.96.24.0/21',
5407         'TL': '180.189.160.0/20',
5408         'TM': '95.85.96.0/19',
5409         'TN': '197.0.0.0/11',
5410         'TO': '175.176.144.0/21',
5411         'TR': '78.160.0.0/11',
5412         'TT': '186.44.0.0/15',
5413         'TV': '202.2.96.0/19',
5414         'TW': '120.96.0.0/11',
5415         'TZ': '156.156.0.0/14',
5416         'UA': '37.52.0.0/14',
5417         'UG': '102.80.0.0/13',
5418         'US': '6.0.0.0/8',
5419         'UY': '167.56.0.0/13',
5420         'UZ': '84.54.64.0/18',
5421         'VA': '212.77.0.0/19',
5422         'VC': '207.191.240.0/21',
5423         'VE': '186.88.0.0/13',
5424         'VG': '66.81.192.0/20',
5425         'VI': '146.226.0.0/16',
5426         'VN': '14.160.0.0/11',
5427         'VU': '202.80.32.0/20',
5428         'WF': '117.20.32.0/21',
5429         'WS': '202.4.32.0/19',
5430         'YE': '134.35.0.0/16',
5431         'YT': '41.242.116.0/22',
5432         'ZA': '41.0.0.0/11',
5433         'ZM': '102.144.0.0/13',
5434         'ZW': '102.177.192.0/18',
5435     }
5436
5437     @classmethod
5438     def random_ipv4(cls, code_or_block):
5439         if len(code_or_block) == 2:
5440             block = cls._country_ip_map.get(code_or_block.upper())
5441             if not block:
5442                 return None
5443         else:
5444             block = code_or_block
5445         addr, preflen = block.split('/')
5446         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5447         addr_max = addr_min | (0xffffffff >> int(preflen))
5448         return compat_str(socket.inet_ntoa(
5449             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5450
5451
5452 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5453     def __init__(self, proxies=None):
5454         # Set default handlers
5455         for type in ('http', 'https'):
5456             setattr(self, '%s_open' % type,
5457                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5458                         meth(r, proxy, type))
5459         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5460
5461     def proxy_open(self, req, proxy, type):
5462         req_proxy = req.headers.get('Ytdl-request-proxy')
5463         if req_proxy is not None:
5464             proxy = req_proxy
5465             del req.headers['Ytdl-request-proxy']
5466
5467         if proxy == '__noproxy__':
5468             return None  # No Proxy
5469         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5470             req.add_header('Ytdl-socks-proxy', proxy)
5471             # yt-dlp's http/https handlers do wrapping the socket with socks
5472             return None
5473         return compat_urllib_request.ProxyHandler.proxy_open(
5474             self, req, proxy, type)
5475
5476
5477 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5478 # released into Public Domain
5479 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5480
5481 def long_to_bytes(n, blocksize=0):
5482     """long_to_bytes(n:long, blocksize:int) : string
5483     Convert a long integer to a byte string.
5484
5485     If optional blocksize is given and greater than zero, pad the front of the
5486     byte string with binary zeros so that the length is a multiple of
5487     blocksize.
5488     """
5489     # after much testing, this algorithm was deemed to be the fastest
5490     s = b''
5491     n = int(n)
5492     while n > 0:
5493         s = compat_struct_pack('>I', n & 0xffffffff) + s
5494         n = n >> 32
5495     # strip off leading zeros
5496     for i in range(len(s)):
5497         if s[i] != b'\000'[0]:
5498             break
5499     else:
5500         # only happens when n == 0
5501         s = b'\000'
5502         i = 0
5503     s = s[i:]
5504     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5505     # de-padding being done above, but sigh...
5506     if blocksize > 0 and len(s) % blocksize:
5507         s = (blocksize - len(s) % blocksize) * b'\000' + s
5508     return s
5509
5510
5511 def bytes_to_long(s):
5512     """bytes_to_long(string) : long
5513     Convert a byte string to a long integer.
5514
5515     This is (essentially) the inverse of long_to_bytes().
5516     """
5517     acc = 0
5518     length = len(s)
5519     if length % 4:
5520         extra = (4 - length % 4)
5521         s = b'\000' * extra + s
5522         length = length + extra
5523     for i in range(0, length, 4):
5524         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5525     return acc
5526
5527
5528 def ohdave_rsa_encrypt(data, exponent, modulus):
5529     '''
5530     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5531
5532     Input:
5533         data: data to encrypt, bytes-like object
5534         exponent, modulus: parameter e and N of RSA algorithm, both integer
5535     Output: hex string of encrypted data
5536
5537     Limitation: supports one block encryption only
5538     '''
5539
5540     payload = int(binascii.hexlify(data[::-1]), 16)
5541     encrypted = pow(payload, exponent, modulus)
5542     return '%x' % encrypted
5543
5544
5545 def pkcs1pad(data, length):
5546     """
5547     Padding input data with PKCS#1 scheme
5548
5549     @param {int[]} data        input data
5550     @param {int}   length      target length
5551     @returns {int[]}           padded data
5552     """
5553     if len(data) > length - 11:
5554         raise ValueError('Input data too long for PKCS#1 padding')
5555
5556     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5557     return [0, 2] + pseudo_random + [0] + data
5558
5559
5560 def encode_base_n(num, n, table=None):
5561     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5562     if not table:
5563         table = FULL_TABLE[:n]
5564
5565     if n > len(table):
5566         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5567
5568     if num == 0:
5569         return table[0]
5570
5571     ret = ''
5572     while num:
5573         ret = table[num % n] + ret
5574         num = num // n
5575     return ret
5576
5577
5578 def decode_packed_codes(code):
5579     mobj = re.search(PACKED_CODES_RE, code)
5580     obfuscated_code, base, count, symbols = mobj.groups()
5581     base = int(base)
5582     count = int(count)
5583     symbols = symbols.split('|')
5584     symbol_table = {}
5585
5586     while count:
5587         count -= 1
5588         base_n_count = encode_base_n(count, base)
5589         symbol_table[base_n_count] = symbols[count] or base_n_count
5590
5591     return re.sub(
5592         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5593         obfuscated_code)
5594
5595
5596 def caesar(s, alphabet, shift):
5597     if shift == 0:
5598         return s
5599     l = len(alphabet)
5600     return ''.join(
5601         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5602         for c in s)
5603
5604
5605 def rot47(s):
5606     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5607
5608
5609 def parse_m3u8_attributes(attrib):
5610     info = {}
5611     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5612         if val.startswith('"'):
5613             val = val[1:-1]
5614         info[key] = val
5615     return info
5616
5617
5618 def urshift(val, n):
5619     return val >> n if val >= 0 else (val + 0x100000000) >> n
5620
5621
5622 # Based on png2str() written by @gdkchan and improved by @yokrysty
5623 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5624 def decode_png(png_data):
5625     # Reference: https://www.w3.org/TR/PNG/
5626     header = png_data[8:]
5627
5628     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5629         raise IOError('Not a valid PNG file.')
5630
5631     int_map = {1: '>B', 2: '>H', 4: '>I'}
5632     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5633
5634     chunks = []
5635
5636     while header:
5637         length = unpack_integer(header[:4])
5638         header = header[4:]
5639
5640         chunk_type = header[:4]
5641         header = header[4:]
5642
5643         chunk_data = header[:length]
5644         header = header[length:]
5645
5646         header = header[4:]  # Skip CRC
5647
5648         chunks.append({
5649             'type': chunk_type,
5650             'length': length,
5651             'data': chunk_data
5652         })
5653
5654     ihdr = chunks[0]['data']
5655
5656     width = unpack_integer(ihdr[:4])
5657     height = unpack_integer(ihdr[4:8])
5658
5659     idat = b''
5660
5661     for chunk in chunks:
5662         if chunk['type'] == b'IDAT':
5663             idat += chunk['data']
5664
5665     if not idat:
5666         raise IOError('Unable to read PNG data.')
5667
5668     decompressed_data = bytearray(zlib.decompress(idat))
5669
5670     stride = width * 3
5671     pixels = []
5672
5673     def _get_pixel(idx):
5674         x = idx % stride
5675         y = idx // stride
5676         return pixels[y][x]
5677
5678     for y in range(height):
5679         basePos = y * (1 + stride)
5680         filter_type = decompressed_data[basePos]
5681
5682         current_row = []
5683
5684         pixels.append(current_row)
5685
5686         for x in range(stride):
5687             color = decompressed_data[1 + basePos + x]
5688             basex = y * stride + x
5689             left = 0
5690             up = 0
5691
5692             if x > 2:
5693                 left = _get_pixel(basex - 3)
5694             if y > 0:
5695                 up = _get_pixel(basex - stride)
5696
5697             if filter_type == 1:  # Sub
5698                 color = (color + left) & 0xff
5699             elif filter_type == 2:  # Up
5700                 color = (color + up) & 0xff
5701             elif filter_type == 3:  # Average
5702                 color = (color + ((left + up) >> 1)) & 0xff
5703             elif filter_type == 4:  # Paeth
5704                 a = left
5705                 b = up
5706                 c = 0
5707
5708                 if x > 2 and y > 0:
5709                     c = _get_pixel(basex - stride - 3)
5710
5711                 p = a + b - c
5712
5713                 pa = abs(p - a)
5714                 pb = abs(p - b)
5715                 pc = abs(p - c)
5716
5717                 if pa <= pb and pa <= pc:
5718                     color = (color + a) & 0xff
5719                 elif pb <= pc:
5720                     color = (color + b) & 0xff
5721                 else:
5722                     color = (color + c) & 0xff
5723
5724             current_row.append(color)
5725
5726     return width, height, pixels
5727
5728
5729 def write_xattr(path, key, value):
5730     # This mess below finds the best xattr tool for the job
5731     try:
5732         # try the pyxattr module...
5733         import xattr
5734
5735         if hasattr(xattr, 'set'):  # pyxattr
5736             # Unicode arguments are not supported in python-pyxattr until
5737             # version 0.5.0
5738             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5739             pyxattr_required_version = '0.5.0'
5740             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5741                 # TODO: fallback to CLI tools
5742                 raise XAttrUnavailableError(
5743                     'python-pyxattr is detected but is too old. '
5744                     'yt-dlp requires %s or above while your version is %s. '
5745                     'Falling back to other xattr implementations' % (
5746                         pyxattr_required_version, xattr.__version__))
5747
5748             setxattr = xattr.set
5749         else:  # xattr
5750             setxattr = xattr.setxattr
5751
5752         try:
5753             setxattr(path, key, value)
5754         except EnvironmentError as e:
5755             raise XAttrMetadataError(e.errno, e.strerror)
5756
5757     except ImportError:
5758         if compat_os_name == 'nt':
5759             # Write xattrs to NTFS Alternate Data Streams:
5760             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5761             assert ':' not in key
5762             assert os.path.exists(path)
5763
5764             ads_fn = path + ':' + key
5765             try:
5766                 with open(ads_fn, 'wb') as f:
5767                     f.write(value)
5768             except EnvironmentError as e:
5769                 raise XAttrMetadataError(e.errno, e.strerror)
5770         else:
5771             user_has_setfattr = check_executable('setfattr', ['--version'])
5772             user_has_xattr = check_executable('xattr', ['-h'])
5773
5774             if user_has_setfattr or user_has_xattr:
5775
5776                 value = value.decode('utf-8')
5777                 if user_has_setfattr:
5778                     executable = 'setfattr'
5779                     opts = ['-n', key, '-v', value]
5780                 elif user_has_xattr:
5781                     executable = 'xattr'
5782                     opts = ['-w', key, value]
5783
5784                 cmd = ([encodeFilename(executable, True)]
5785                        + [encodeArgument(o) for o in opts]
5786                        + [encodeFilename(path, True)])
5787
5788                 try:
5789                     p = subprocess.Popen(
5790                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5791                 except EnvironmentError as e:
5792                     raise XAttrMetadataError(e.errno, e.strerror)
5793                 stdout, stderr = process_communicate_or_kill(p)
5794                 stderr = stderr.decode('utf-8', 'replace')
5795                 if p.returncode != 0:
5796                     raise XAttrMetadataError(p.returncode, stderr)
5797
5798             else:
5799                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5800                 if sys.platform.startswith('linux'):
5801                     raise XAttrUnavailableError(
5802                         "Couldn't find a tool to set the xattrs. "
5803                         "Install either the python 'pyxattr' or 'xattr' "
5804                         "modules, or the GNU 'attr' package "
5805                         "(which contains the 'setfattr' tool).")
5806                 else:
5807                     raise XAttrUnavailableError(
5808                         "Couldn't find a tool to set the xattrs. "
5809                         "Install either the python 'xattr' module, "
5810                         "or the 'xattr' binary.")
5811
5812
5813 def random_birthday(year_field, month_field, day_field):
5814     start_date = datetime.date(1950, 1, 1)
5815     end_date = datetime.date(1995, 12, 31)
5816     offset = random.randint(0, (end_date - start_date).days)
5817     random_date = start_date + datetime.timedelta(offset)
5818     return {
5819         year_field: str(random_date.year),
5820         month_field: str(random_date.month),
5821         day_field: str(random_date.day),
5822     }
5823
5824
5825 # Templates for internet shortcut files, which are plain text files.
5826 DOT_URL_LINK_TEMPLATE = '''
5827 [InternetShortcut]
5828 URL=%(url)s
5829 '''.lstrip()
5830
5831 DOT_WEBLOC_LINK_TEMPLATE = '''
5832 <?xml version="1.0" encoding="UTF-8"?>
5833 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5834 <plist version="1.0">
5835 <dict>
5836 \t<key>URL</key>
5837 \t<string>%(url)s</string>
5838 </dict>
5839 </plist>
5840 '''.lstrip()
5841
5842 DOT_DESKTOP_LINK_TEMPLATE = '''
5843 [Desktop Entry]
5844 Encoding=UTF-8
5845 Name=%(filename)s
5846 Type=Link
5847 URL=%(url)s
5848 Icon=text-html
5849 '''.lstrip()
5850
5851
5852 def iri_to_uri(iri):
5853     """
5854     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5855
5856     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5857     """
5858
5859     iri_parts = compat_urllib_parse_urlparse(iri)
5860
5861     if '[' in iri_parts.netloc:
5862         raise ValueError('IPv6 URIs are not, yet, supported.')
5863         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5864
5865     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5866
5867     net_location = ''
5868     if iri_parts.username:
5869         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5870         if iri_parts.password is not None:
5871             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5872         net_location += '@'
5873
5874     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5875     # The 'idna' encoding produces ASCII text.
5876     if iri_parts.port is not None and iri_parts.port != 80:
5877         net_location += ':' + str(iri_parts.port)
5878
5879     return compat_urllib_parse_urlunparse(
5880         (iri_parts.scheme,
5881             net_location,
5882
5883             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5884
5885             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5886             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5887
5888             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5889             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5890
5891             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5892
5893     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5894
5895
5896 def to_high_limit_path(path):
5897     if sys.platform in ['win32', 'cygwin']:
5898         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5899         return r'\\?\ '.rstrip() + os.path.abspath(path)
5900
5901     return path
5902
5903
5904 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5905     val = obj.get(field, default)
5906     if func and val not in ignore:
5907         val = func(val)
5908     return template % val if val not in ignore else default
5909
5910
5911 def clean_podcast_url(url):
5912     return re.sub(r'''(?x)
5913         (?:
5914             (?:
5915                 chtbl\.com/track|
5916                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5917                 play\.podtrac\.com
5918             )/[^/]+|
5919             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5920             flex\.acast\.com|
5921             pd(?:
5922                 cn\.co| # https://podcorn.com/analytics-prefix/
5923                 st\.fm # https://podsights.com/docs/
5924             )/e
5925         )/''', '', url)
5926
5927
5928 _HEX_TABLE = '0123456789abcdef'
5929
5930
5931 def random_uuidv4():
5932     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5933
5934
5935 def make_dir(path, to_screen=None):
5936     try:
5937         dn = os.path.dirname(path)
5938         if dn and not os.path.exists(dn):
5939             os.makedirs(dn)
5940         return True
5941     except (OSError, IOError) as err:
5942         if callable(to_screen) is not None:
5943             to_screen('unable to create directory ' + error_to_compat_str(err))
5944         return False
5945
5946
5947 def get_executable_path():
5948     from zipimport import zipimporter
5949     if hasattr(sys, 'frozen'):  # Running from PyInstaller
5950         path = os.path.dirname(sys.executable)
5951     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
5952         path = os.path.join(os.path.dirname(__file__), '../..')
5953     else:
5954         path = os.path.join(os.path.dirname(__file__), '..')
5955     return os.path.abspath(path)
5956
5957
5958 def load_plugins(name, type, namespace):
5959     plugin_info = [None]
5960     classes = []
5961     try:
5962         plugin_info = imp.find_module(
5963             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5964         plugins = imp.load_module(name, *plugin_info)
5965         for name in dir(plugins):
5966             if not name.endswith(type):
5967                 continue
5968             klass = getattr(plugins, name)
5969             classes.append(klass)
5970             namespace[name] = klass
5971     except ImportError:
5972         pass
5973     finally:
5974         if plugin_info[0] is not None:
5975             plugin_info[0].close()
5976     return classes
5977
5978
5979 def traverse_dict(dictn, keys, casesense=True):
5980     if not isinstance(dictn, dict):
5981         return None
5982     first_key = keys[0]
5983     if not casesense:
5984         dictn = {key.lower(): val for key, val in dictn.items()}
5985         first_key = first_key.lower()
5986     value = dictn.get(first_key, None)
5987     return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)