yt_dlp/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_numeric_types,
  54     compat_kwargs,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_parse_urlunparse,
  66     compat_urllib_parse_quote,
  67     compat_urllib_parse_quote_plus,
  68     compat_urllib_parse_unquote_plus,
  69     compat_urllib_request,
  70     compat_urlparse,
  71     compat_xpath,
  72 )
  73
  74 from .socks import (
  75     ProxyType,
  76     sockssocket,
  77 )
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '74.0.3729.129',
  97         '76.0.3780.3',
  98         '76.0.3780.2',
  99         '74.0.3729.128',
 100         '76.0.3780.1',
 101         '76.0.3780.0',
 102         '75.0.3770.15',
 103         '74.0.3729.127',
 104         '74.0.3729.126',
 105         '76.0.3779.1',
 106         '76.0.3779.0',
 107         '75.0.3770.14',
 108         '74.0.3729.125',
 109         '76.0.3778.1',
 110         '76.0.3778.0',
 111         '75.0.3770.13',
 112         '74.0.3729.124',
 113         '74.0.3729.123',
 114         '73.0.3683.121',
 115         '76.0.3777.1',
 116         '76.0.3777.0',
 117         '75.0.3770.12',
 118         '74.0.3729.122',
 119         '76.0.3776.4',
 120         '75.0.3770.11',
 121         '74.0.3729.121',
 122         '76.0.3776.3',
 123         '76.0.3776.2',
 124         '73.0.3683.120',
 125         '74.0.3729.120',
 126         '74.0.3729.119',
 127         '74.0.3729.118',
 128         '76.0.3776.1',
 129         '76.0.3776.0',
 130         '76.0.3775.5',
 131         '75.0.3770.10',
 132         '74.0.3729.117',
 133         '76.0.3775.4',
 134         '76.0.3775.3',
 135         '74.0.3729.116',
 136         '75.0.3770.9',
 137         '76.0.3775.2',
 138         '76.0.3775.1',
 139         '76.0.3775.0',
 140         '75.0.3770.8',
 141         '74.0.3729.115',
 142         '74.0.3729.114',
 143         '76.0.3774.1',
 144         '76.0.3774.0',
 145         '75.0.3770.7',
 146         '74.0.3729.113',
 147         '74.0.3729.112',
 148         '74.0.3729.111',
 149         '76.0.3773.1',
 150         '76.0.3773.0',
 151         '75.0.3770.6',
 152         '74.0.3729.110',
 153         '74.0.3729.109',
 154         '76.0.3772.1',
 155         '76.0.3772.0',
 156         '75.0.3770.5',
 157         '74.0.3729.108',
 158         '74.0.3729.107',
 159         '76.0.3771.1',
 160         '76.0.3771.0',
 161         '75.0.3770.4',
 162         '74.0.3729.106',
 163         '74.0.3729.105',
 164         '75.0.3770.3',
 165         '74.0.3729.104',
 166         '74.0.3729.103',
 167         '74.0.3729.102',
 168         '75.0.3770.2',
 169         '74.0.3729.101',
 170         '75.0.3770.1',
 171         '75.0.3770.0',
 172         '74.0.3729.100',
 173         '75.0.3769.5',
 174         '75.0.3769.4',
 175         '74.0.3729.99',
 176         '75.0.3769.3',
 177         '75.0.3769.2',
 178         '75.0.3768.6',
 179         '74.0.3729.98',
 180         '75.0.3769.1',
 181         '75.0.3769.0',
 182         '74.0.3729.97',
 183         '73.0.3683.119',
 184         '73.0.3683.118',
 185         '74.0.3729.96',
 186         '75.0.3768.5',
 187         '75.0.3768.4',
 188         '75.0.3768.3',
 189         '75.0.3768.2',
 190         '74.0.3729.95',
 191         '74.0.3729.94',
 192         '75.0.3768.1',
 193         '75.0.3768.0',
 194         '74.0.3729.93',
 195         '74.0.3729.92',
 196         '73.0.3683.117',
 197         '74.0.3729.91',
 198         '75.0.3766.3',
 199         '74.0.3729.90',
 200         '75.0.3767.2',
 201         '75.0.3767.1',
 202         '75.0.3767.0',
 203         '74.0.3729.89',
 204         '73.0.3683.116',
 205         '75.0.3766.2',
 206         '74.0.3729.88',
 207         '75.0.3766.1',
 208         '75.0.3766.0',
 209         '74.0.3729.87',
 210         '73.0.3683.115',
 211         '74.0.3729.86',
 212         '75.0.3765.1',
 213         '75.0.3765.0',
 214         '74.0.3729.85',
 215         '73.0.3683.114',
 216         '74.0.3729.84',
 217         '75.0.3764.1',
 218         '75.0.3764.0',
 219         '74.0.3729.83',
 220         '73.0.3683.113',
 221         '75.0.3763.2',
 222         '75.0.3761.4',
 223         '74.0.3729.82',
 224         '75.0.3763.1',
 225         '75.0.3763.0',
 226         '74.0.3729.81',
 227         '73.0.3683.112',
 228         '75.0.3762.1',
 229         '75.0.3762.0',
 230         '74.0.3729.80',
 231         '75.0.3761.3',
 232         '74.0.3729.79',
 233         '73.0.3683.111',
 234         '75.0.3761.2',
 235         '74.0.3729.78',
 236         '74.0.3729.77',
 237         '75.0.3761.1',
 238         '75.0.3761.0',
 239         '73.0.3683.110',
 240         '74.0.3729.76',
 241         '74.0.3729.75',
 242         '75.0.3760.0',
 243         '74.0.3729.74',
 244         '75.0.3759.8',
 245         '75.0.3759.7',
 246         '75.0.3759.6',
 247         '74.0.3729.73',
 248         '75.0.3759.5',
 249         '74.0.3729.72',
 250         '73.0.3683.109',
 251         '75.0.3759.4',
 252         '75.0.3759.3',
 253         '74.0.3729.71',
 254         '75.0.3759.2',
 255         '74.0.3729.70',
 256         '73.0.3683.108',
 257         '74.0.3729.69',
 258         '75.0.3759.1',
 259         '75.0.3759.0',
 260         '74.0.3729.68',
 261         '73.0.3683.107',
 262         '74.0.3729.67',
 263         '75.0.3758.1',
 264         '75.0.3758.0',
 265         '74.0.3729.66',
 266         '73.0.3683.106',
 267         '74.0.3729.65',
 268         '75.0.3757.1',
 269         '75.0.3757.0',
 270         '74.0.3729.64',
 271         '73.0.3683.105',
 272         '74.0.3729.63',
 273         '75.0.3756.1',
 274         '75.0.3756.0',
 275         '74.0.3729.62',
 276         '73.0.3683.104',
 277         '75.0.3755.3',
 278         '75.0.3755.2',
 279         '73.0.3683.103',
 280         '75.0.3755.1',
 281         '75.0.3755.0',
 282         '74.0.3729.61',
 283         '73.0.3683.102',
 284         '74.0.3729.60',
 285         '75.0.3754.2',
 286         '74.0.3729.59',
 287         '75.0.3753.4',
 288         '74.0.3729.58',
 289         '75.0.3754.1',
 290         '75.0.3754.0',
 291         '74.0.3729.57',
 292         '73.0.3683.101',
 293         '75.0.3753.3',
 294         '75.0.3752.2',
 295         '75.0.3753.2',
 296         '74.0.3729.56',
 297         '75.0.3753.1',
 298         '75.0.3753.0',
 299         '74.0.3729.55',
 300         '73.0.3683.100',
 301         '74.0.3729.54',
 302         '75.0.3752.1',
 303         '75.0.3752.0',
 304         '74.0.3729.53',
 305         '73.0.3683.99',
 306         '74.0.3729.52',
 307         '75.0.3751.1',
 308         '75.0.3751.0',
 309         '74.0.3729.51',
 310         '73.0.3683.98',
 311         '74.0.3729.50',
 312         '75.0.3750.0',
 313         '74.0.3729.49',
 314         '74.0.3729.48',
 315         '74.0.3729.47',
 316         '75.0.3749.3',
 317         '74.0.3729.46',
 318         '73.0.3683.97',
 319         '75.0.3749.2',
 320         '74.0.3729.45',
 321         '75.0.3749.1',
 322         '75.0.3749.0',
 323         '74.0.3729.44',
 324         '73.0.3683.96',
 325         '74.0.3729.43',
 326         '74.0.3729.42',
 327         '75.0.3748.1',
 328         '75.0.3748.0',
 329         '74.0.3729.41',
 330         '75.0.3747.1',
 331         '73.0.3683.95',
 332         '75.0.3746.4',
 333         '74.0.3729.40',
 334         '74.0.3729.39',
 335         '75.0.3747.0',
 336         '75.0.3746.3',
 337         '75.0.3746.2',
 338         '74.0.3729.38',
 339         '75.0.3746.1',
 340         '75.0.3746.0',
 341         '74.0.3729.37',
 342         '73.0.3683.94',
 343         '75.0.3745.5',
 344         '75.0.3745.4',
 345         '75.0.3745.3',
 346         '75.0.3745.2',
 347         '74.0.3729.36',
 348         '75.0.3745.1',
 349         '75.0.3745.0',
 350         '75.0.3744.2',
 351         '74.0.3729.35',
 352         '73.0.3683.93',
 353         '74.0.3729.34',
 354         '75.0.3744.1',
 355         '75.0.3744.0',
 356         '74.0.3729.33',
 357         '73.0.3683.92',
 358         '74.0.3729.32',
 359         '74.0.3729.31',
 360         '73.0.3683.91',
 361         '75.0.3741.2',
 362         '75.0.3740.5',
 363         '74.0.3729.30',
 364         '75.0.3741.1',
 365         '75.0.3741.0',
 366         '74.0.3729.29',
 367         '75.0.3740.4',
 368         '73.0.3683.90',
 369         '74.0.3729.28',
 370         '75.0.3740.3',
 371         '73.0.3683.89',
 372         '75.0.3740.2',
 373         '74.0.3729.27',
 374         '75.0.3740.1',
 375         '75.0.3740.0',
 376         '74.0.3729.26',
 377         '73.0.3683.88',
 378         '73.0.3683.87',
 379         '74.0.3729.25',
 380         '75.0.3739.1',
 381         '75.0.3739.0',
 382         '73.0.3683.86',
 383         '74.0.3729.24',
 384         '73.0.3683.85',
 385         '75.0.3738.4',
 386         '75.0.3738.3',
 387         '75.0.3738.2',
 388         '75.0.3738.1',
 389         '75.0.3738.0',
 390         '74.0.3729.23',
 391         '73.0.3683.84',
 392         '74.0.3729.22',
 393         '74.0.3729.21',
 394         '75.0.3737.1',
 395         '75.0.3737.0',
 396         '74.0.3729.20',
 397         '73.0.3683.83',
 398         '74.0.3729.19',
 399         '75.0.3736.1',
 400         '75.0.3736.0',
 401         '74.0.3729.18',
 402         '73.0.3683.82',
 403         '74.0.3729.17',
 404         '75.0.3735.1',
 405         '75.0.3735.0',
 406         '74.0.3729.16',
 407         '73.0.3683.81',
 408         '75.0.3734.1',
 409         '75.0.3734.0',
 410         '74.0.3729.15',
 411         '73.0.3683.80',
 412         '74.0.3729.14',
 413         '75.0.3733.1',
 414         '75.0.3733.0',
 415         '75.0.3732.1',
 416         '74.0.3729.13',
 417         '74.0.3729.12',
 418         '73.0.3683.79',
 419         '74.0.3729.11',
 420         '75.0.3732.0',
 421         '74.0.3729.10',
 422         '73.0.3683.78',
 423         '74.0.3729.9',
 424         '74.0.3729.8',
 425         '74.0.3729.7',
 426         '75.0.3731.3',
 427         '75.0.3731.2',
 428         '75.0.3731.0',
 429         '74.0.3729.6',
 430         '73.0.3683.77',
 431         '73.0.3683.76',
 432         '75.0.3730.5',
 433         '75.0.3730.4',
 434         '73.0.3683.75',
 435         '74.0.3729.5',
 436         '73.0.3683.74',
 437         '75.0.3730.3',
 438         '75.0.3730.2',
 439         '74.0.3729.4',
 440         '73.0.3683.73',
 441         '73.0.3683.72',
 442         '75.0.3730.1',
 443         '75.0.3730.0',
 444         '74.0.3729.3',
 445         '73.0.3683.71',
 446         '74.0.3729.2',
 447         '73.0.3683.70',
 448         '74.0.3729.1',
 449         '74.0.3729.0',
 450         '74.0.3726.4',
 451         '73.0.3683.69',
 452         '74.0.3726.3',
 453         '74.0.3728.0',
 454         '74.0.3726.2',
 455         '73.0.3683.68',
 456         '74.0.3726.1',
 457         '74.0.3726.0',
 458         '74.0.3725.4',
 459         '73.0.3683.67',
 460         '73.0.3683.66',
 461         '74.0.3725.3',
 462         '74.0.3725.2',
 463         '74.0.3725.1',
 464         '74.0.3724.8',
 465         '74.0.3725.0',
 466         '73.0.3683.65',
 467         '74.0.3724.7',
 468         '74.0.3724.6',
 469         '74.0.3724.5',
 470         '74.0.3724.4',
 471         '74.0.3724.3',
 472         '74.0.3724.2',
 473         '74.0.3724.1',
 474         '74.0.3724.0',
 475         '73.0.3683.64',
 476         '74.0.3723.1',
 477         '74.0.3723.0',
 478         '73.0.3683.63',
 479         '74.0.3722.1',
 480         '74.0.3722.0',
 481         '73.0.3683.62',
 482         '74.0.3718.9',
 483         '74.0.3702.3',
 484         '74.0.3721.3',
 485         '74.0.3721.2',
 486         '74.0.3721.1',
 487         '74.0.3721.0',
 488         '74.0.3720.6',
 489         '73.0.3683.61',
 490         '72.0.3626.122',
 491         '73.0.3683.60',
 492         '74.0.3720.5',
 493         '72.0.3626.121',
 494         '74.0.3718.8',
 495         '74.0.3720.4',
 496         '74.0.3720.3',
 497         '74.0.3718.7',
 498         '74.0.3720.2',
 499         '74.0.3720.1',
 500         '74.0.3720.0',
 501         '74.0.3718.6',
 502         '74.0.3719.5',
 503         '73.0.3683.59',
 504         '74.0.3718.5',
 505         '74.0.3718.4',
 506         '74.0.3719.4',
 507         '74.0.3719.3',
 508         '74.0.3719.2',
 509         '74.0.3719.1',
 510         '73.0.3683.58',
 511         '74.0.3719.0',
 512         '73.0.3683.57',
 513         '73.0.3683.56',
 514         '74.0.3718.3',
 515         '73.0.3683.55',
 516         '74.0.3718.2',
 517         '74.0.3718.1',
 518         '74.0.3718.0',
 519         '73.0.3683.54',
 520         '74.0.3717.2',
 521         '73.0.3683.53',
 522         '74.0.3717.1',
 523         '74.0.3717.0',
 524         '73.0.3683.52',
 525         '74.0.3716.1',
 526         '74.0.3716.0',
 527         '73.0.3683.51',
 528         '74.0.3715.1',
 529         '74.0.3715.0',
 530         '73.0.3683.50',
 531         '74.0.3711.2',
 532         '74.0.3714.2',
 533         '74.0.3713.3',
 534         '74.0.3714.1',
 535         '74.0.3714.0',
 536         '73.0.3683.49',
 537         '74.0.3713.1',
 538         '74.0.3713.0',
 539         '72.0.3626.120',
 540         '73.0.3683.48',
 541         '74.0.3712.2',
 542         '74.0.3712.1',
 543         '74.0.3712.0',
 544         '73.0.3683.47',
 545         '72.0.3626.119',
 546         '73.0.3683.46',
 547         '74.0.3710.2',
 548         '72.0.3626.118',
 549         '74.0.3711.1',
 550         '74.0.3711.0',
 551         '73.0.3683.45',
 552         '72.0.3626.117',
 553         '74.0.3710.1',
 554         '74.0.3710.0',
 555         '73.0.3683.44',
 556         '72.0.3626.116',
 557         '74.0.3709.1',
 558         '74.0.3709.0',
 559         '74.0.3704.9',
 560         '73.0.3683.43',
 561         '72.0.3626.115',
 562         '74.0.3704.8',
 563         '74.0.3704.7',
 564         '74.0.3708.0',
 565         '74.0.3706.7',
 566         '74.0.3704.6',
 567         '73.0.3683.42',
 568         '72.0.3626.114',
 569         '74.0.3706.6',
 570         '72.0.3626.113',
 571         '74.0.3704.5',
 572         '74.0.3706.5',
 573         '74.0.3706.4',
 574         '74.0.3706.3',
 575         '74.0.3706.2',
 576         '74.0.3706.1',
 577         '74.0.3706.0',
 578         '73.0.3683.41',
 579         '72.0.3626.112',
 580         '74.0.3705.1',
 581         '74.0.3705.0',
 582         '73.0.3683.40',
 583         '72.0.3626.111',
 584         '73.0.3683.39',
 585         '74.0.3704.4',
 586         '73.0.3683.38',
 587         '74.0.3704.3',
 588         '74.0.3704.2',
 589         '74.0.3704.1',
 590         '74.0.3704.0',
 591         '73.0.3683.37',
 592         '72.0.3626.110',
 593         '72.0.3626.109',
 594         '74.0.3703.3',
 595         '74.0.3703.2',
 596         '73.0.3683.36',
 597         '74.0.3703.1',
 598         '74.0.3703.0',
 599         '73.0.3683.35',
 600         '72.0.3626.108',
 601         '74.0.3702.2',
 602         '74.0.3699.3',
 603         '74.0.3702.1',
 604         '74.0.3702.0',
 605         '73.0.3683.34',
 606         '72.0.3626.107',
 607         '73.0.3683.33',
 608         '74.0.3701.1',
 609         '74.0.3701.0',
 610         '73.0.3683.32',
 611         '73.0.3683.31',
 612         '72.0.3626.105',
 613         '74.0.3700.1',
 614         '74.0.3700.0',
 615         '73.0.3683.29',
 616         '72.0.3626.103',
 617         '74.0.3699.2',
 618         '74.0.3699.1',
 619         '74.0.3699.0',
 620         '73.0.3683.28',
 621         '72.0.3626.102',
 622         '73.0.3683.27',
 623         '73.0.3683.26',
 624         '74.0.3698.0',
 625         '74.0.3696.2',
 626         '72.0.3626.101',
 627         '73.0.3683.25',
 628         '74.0.3696.1',
 629         '74.0.3696.0',
 630         '74.0.3694.8',
 631         '72.0.3626.100',
 632         '74.0.3694.7',
 633         '74.0.3694.6',
 634         '74.0.3694.5',
 635         '74.0.3694.4',
 636         '72.0.3626.99',
 637         '72.0.3626.98',
 638         '74.0.3694.3',
 639         '73.0.3683.24',
 640         '72.0.3626.97',
 641         '72.0.3626.96',
 642         '72.0.3626.95',
 643         '73.0.3683.23',
 644         '72.0.3626.94',
 645         '73.0.3683.22',
 646         '73.0.3683.21',
 647         '72.0.3626.93',
 648         '74.0.3694.2',
 649         '72.0.3626.92',
 650         '74.0.3694.1',
 651         '74.0.3694.0',
 652         '74.0.3693.6',
 653         '73.0.3683.20',
 654         '72.0.3626.91',
 655         '74.0.3693.5',
 656         '74.0.3693.4',
 657         '74.0.3693.3',
 658         '74.0.3693.2',
 659         '73.0.3683.19',
 660         '74.0.3693.1',
 661         '74.0.3693.0',
 662         '73.0.3683.18',
 663         '72.0.3626.90',
 664         '74.0.3692.1',
 665         '74.0.3692.0',
 666         '73.0.3683.17',
 667         '72.0.3626.89',
 668         '74.0.3687.3',
 669         '74.0.3691.1',
 670         '74.0.3691.0',
 671         '73.0.3683.16',
 672         '72.0.3626.88',
 673         '72.0.3626.87',
 674         '73.0.3683.15',
 675         '74.0.3690.1',
 676         '74.0.3690.0',
 677         '73.0.3683.14',
 678         '72.0.3626.86',
 679         '73.0.3683.13',
 680         '73.0.3683.12',
 681         '74.0.3689.1',
 682         '74.0.3689.0',
 683         '73.0.3683.11',
 684         '72.0.3626.85',
 685         '73.0.3683.10',
 686         '72.0.3626.84',
 687         '73.0.3683.9',
 688         '74.0.3688.1',
 689         '74.0.3688.0',
 690         '73.0.3683.8',
 691         '72.0.3626.83',
 692         '74.0.3687.2',
 693         '74.0.3687.1',
 694         '74.0.3687.0',
 695         '73.0.3683.7',
 696         '72.0.3626.82',
 697         '74.0.3686.4',
 698         '72.0.3626.81',
 699         '74.0.3686.3',
 700         '74.0.3686.2',
 701         '74.0.3686.1',
 702         '74.0.3686.0',
 703         '73.0.3683.6',
 704         '72.0.3626.80',
 705         '74.0.3685.1',
 706         '74.0.3685.0',
 707         '73.0.3683.5',
 708         '72.0.3626.79',
 709         '74.0.3684.1',
 710         '74.0.3684.0',
 711         '73.0.3683.4',
 712         '72.0.3626.78',
 713         '72.0.3626.77',
 714         '73.0.3683.3',
 715         '73.0.3683.2',
 716         '72.0.3626.76',
 717         '73.0.3683.1',
 718         '73.0.3683.0',
 719         '72.0.3626.75',
 720         '71.0.3578.141',
 721         '73.0.3682.1',
 722         '73.0.3682.0',
 723         '72.0.3626.74',
 724         '71.0.3578.140',
 725         '73.0.3681.4',
 726         '73.0.3681.3',
 727         '73.0.3681.2',
 728         '73.0.3681.1',
 729         '73.0.3681.0',
 730         '72.0.3626.73',
 731         '71.0.3578.139',
 732         '72.0.3626.72',
 733         '72.0.3626.71',
 734         '73.0.3680.1',
 735         '73.0.3680.0',
 736         '72.0.3626.70',
 737         '71.0.3578.138',
 738         '73.0.3678.2',
 739         '73.0.3679.1',
 740         '73.0.3679.0',
 741         '72.0.3626.69',
 742         '71.0.3578.137',
 743         '73.0.3678.1',
 744         '73.0.3678.0',
 745         '71.0.3578.136',
 746         '73.0.3677.1',
 747         '73.0.3677.0',
 748         '72.0.3626.68',
 749         '72.0.3626.67',
 750         '71.0.3578.135',
 751         '73.0.3676.1',
 752         '73.0.3676.0',
 753         '73.0.3674.2',
 754         '72.0.3626.66',
 755         '71.0.3578.134',
 756         '73.0.3674.1',
 757         '73.0.3674.0',
 758         '72.0.3626.65',
 759         '71.0.3578.133',
 760         '73.0.3673.2',
 761         '73.0.3673.1',
 762         '73.0.3673.0',
 763         '72.0.3626.64',
 764         '71.0.3578.132',
 765         '72.0.3626.63',
 766         '72.0.3626.62',
 767         '72.0.3626.61',
 768         '72.0.3626.60',
 769         '73.0.3672.1',
 770         '73.0.3672.0',
 771         '72.0.3626.59',
 772         '71.0.3578.131',
 773         '73.0.3671.3',
 774         '73.0.3671.2',
 775         '73.0.3671.1',
 776         '73.0.3671.0',
 777         '72.0.3626.58',
 778         '71.0.3578.130',
 779         '73.0.3670.1',
 780         '73.0.3670.0',
 781         '72.0.3626.57',
 782         '71.0.3578.129',
 783         '73.0.3669.1',
 784         '73.0.3669.0',
 785         '72.0.3626.56',
 786         '71.0.3578.128',
 787         '73.0.3668.2',
 788         '73.0.3668.1',
 789         '73.0.3668.0',
 790         '72.0.3626.55',
 791         '71.0.3578.127',
 792         '73.0.3667.2',
 793         '73.0.3667.1',
 794         '73.0.3667.0',
 795         '72.0.3626.54',
 796         '71.0.3578.126',
 797         '73.0.3666.1',
 798         '73.0.3666.0',
 799         '72.0.3626.53',
 800         '71.0.3578.125',
 801         '73.0.3665.4',
 802         '73.0.3665.3',
 803         '72.0.3626.52',
 804         '73.0.3665.2',
 805         '73.0.3664.4',
 806         '73.0.3665.1',
 807         '73.0.3665.0',
 808         '72.0.3626.51',
 809         '71.0.3578.124',
 810         '72.0.3626.50',
 811         '73.0.3664.3',
 812         '73.0.3664.2',
 813         '73.0.3664.1',
 814         '73.0.3664.0',
 815         '73.0.3663.2',
 816         '72.0.3626.49',
 817         '71.0.3578.123',
 818         '73.0.3663.1',
 819         '73.0.3663.0',
 820         '72.0.3626.48',
 821         '71.0.3578.122',
 822         '73.0.3662.1',
 823         '73.0.3662.0',
 824         '72.0.3626.47',
 825         '71.0.3578.121',
 826         '73.0.3661.1',
 827         '72.0.3626.46',
 828         '73.0.3661.0',
 829         '72.0.3626.45',
 830         '71.0.3578.120',
 831         '73.0.3660.2',
 832         '73.0.3660.1',
 833         '73.0.3660.0',
 834         '72.0.3626.44',
 835         '71.0.3578.119',
 836         '73.0.3659.1',
 837         '73.0.3659.0',
 838         '72.0.3626.43',
 839         '71.0.3578.118',
 840         '73.0.3658.1',
 841         '73.0.3658.0',
 842         '72.0.3626.42',
 843         '71.0.3578.117',
 844         '73.0.3657.1',
 845         '73.0.3657.0',
 846         '72.0.3626.41',
 847         '71.0.3578.116',
 848         '73.0.3656.1',
 849         '73.0.3656.0',
 850         '72.0.3626.40',
 851         '71.0.3578.115',
 852         '73.0.3655.1',
 853         '73.0.3655.0',
 854         '72.0.3626.39',
 855         '71.0.3578.114',
 856         '73.0.3654.1',
 857         '73.0.3654.0',
 858         '72.0.3626.38',
 859         '71.0.3578.113',
 860         '73.0.3653.1',
 861         '73.0.3653.0',
 862         '72.0.3626.37',
 863         '71.0.3578.112',
 864         '73.0.3652.1',
 865         '73.0.3652.0',
 866         '72.0.3626.36',
 867         '71.0.3578.111',
 868         '73.0.3651.1',
 869         '73.0.3651.0',
 870         '72.0.3626.35',
 871         '71.0.3578.110',
 872         '73.0.3650.1',
 873         '73.0.3650.0',
 874         '72.0.3626.34',
 875         '71.0.3578.109',
 876         '73.0.3649.1',
 877         '73.0.3649.0',
 878         '72.0.3626.33',
 879         '71.0.3578.108',
 880         '73.0.3648.2',
 881         '73.0.3648.1',
 882         '73.0.3648.0',
 883         '72.0.3626.32',
 884         '71.0.3578.107',
 885         '73.0.3647.2',
 886         '73.0.3647.1',
 887         '73.0.3647.0',
 888         '72.0.3626.31',
 889         '71.0.3578.106',
 890         '73.0.3635.3',
 891         '73.0.3646.2',
 892         '73.0.3646.1',
 893         '73.0.3646.0',
 894         '72.0.3626.30',
 895         '71.0.3578.105',
 896         '72.0.3626.29',
 897         '73.0.3645.2',
 898         '73.0.3645.1',
 899         '73.0.3645.0',
 900         '72.0.3626.28',
 901         '71.0.3578.104',
 902         '72.0.3626.27',
 903         '72.0.3626.26',
 904         '72.0.3626.25',
 905         '72.0.3626.24',
 906         '73.0.3644.0',
 907         '73.0.3643.2',
 908         '72.0.3626.23',
 909         '71.0.3578.103',
 910         '73.0.3643.1',
 911         '73.0.3643.0',
 912         '72.0.3626.22',
 913         '71.0.3578.102',
 914         '73.0.3642.1',
 915         '73.0.3642.0',
 916         '72.0.3626.21',
 917         '71.0.3578.101',
 918         '73.0.3641.1',
 919         '73.0.3641.0',
 920         '72.0.3626.20',
 921         '71.0.3578.100',
 922         '72.0.3626.19',
 923         '73.0.3640.1',
 924         '73.0.3640.0',
 925         '72.0.3626.18',
 926         '73.0.3639.1',
 927         '71.0.3578.99',
 928         '73.0.3639.0',
 929         '72.0.3626.17',
 930         '73.0.3638.2',
 931         '72.0.3626.16',
 932         '73.0.3638.1',
 933         '73.0.3638.0',
 934         '72.0.3626.15',
 935         '71.0.3578.98',
 936         '73.0.3635.2',
 937         '71.0.3578.97',
 938         '73.0.3637.1',
 939         '73.0.3637.0',
 940         '72.0.3626.14',
 941         '71.0.3578.96',
 942         '71.0.3578.95',
 943         '72.0.3626.13',
 944         '71.0.3578.94',
 945         '73.0.3636.2',
 946         '71.0.3578.93',
 947         '73.0.3636.1',
 948         '73.0.3636.0',
 949         '72.0.3626.12',
 950         '71.0.3578.92',
 951         '73.0.3635.1',
 952         '73.0.3635.0',
 953         '72.0.3626.11',
 954         '71.0.3578.91',
 955         '73.0.3634.2',
 956         '73.0.3634.1',
 957         '73.0.3634.0',
 958         '72.0.3626.10',
 959         '71.0.3578.90',
 960         '71.0.3578.89',
 961         '73.0.3633.2',
 962         '73.0.3633.1',
 963         '73.0.3633.0',
 964         '72.0.3610.4',
 965         '72.0.3626.9',
 966         '71.0.3578.88',
 967         '73.0.3632.5',
 968         '73.0.3632.4',
 969         '73.0.3632.3',
 970         '73.0.3632.2',
 971         '73.0.3632.1',
 972         '73.0.3632.0',
 973         '72.0.3626.8',
 974         '71.0.3578.87',
 975         '73.0.3631.2',
 976         '73.0.3631.1',
 977         '73.0.3631.0',
 978         '72.0.3626.7',
 979         '71.0.3578.86',
 980         '72.0.3626.6',
 981         '73.0.3630.1',
 982         '73.0.3630.0',
 983         '72.0.3626.5',
 984         '71.0.3578.85',
 985         '72.0.3626.4',
 986         '73.0.3628.3',
 987         '73.0.3628.2',
 988         '73.0.3629.1',
 989         '73.0.3629.0',
 990         '72.0.3626.3',
 991         '71.0.3578.84',
 992         '73.0.3628.1',
 993         '73.0.3628.0',
 994         '71.0.3578.83',
 995         '73.0.3627.1',
 996         '73.0.3627.0',
 997         '72.0.3626.2',
 998         '71.0.3578.82',
 999         '71.0.3578.81',
1000         '71.0.3578.80',
1001         '72.0.3626.1',
1002         '72.0.3626.0',
1003         '71.0.3578.79',
1004         '70.0.3538.124',
1005         '71.0.3578.78',
1006         '72.0.3623.4',
1007         '72.0.3625.2',
1008         '72.0.3625.1',
1009         '72.0.3625.0',
1010         '71.0.3578.77',
1011         '70.0.3538.123',
1012         '72.0.3624.4',
1013         '72.0.3624.3',
1014         '72.0.3624.2',
1015         '71.0.3578.76',
1016         '72.0.3624.1',
1017         '72.0.3624.0',
1018         '72.0.3623.3',
1019         '71.0.3578.75',
1020         '70.0.3538.122',
1021         '71.0.3578.74',
1022         '72.0.3623.2',
1023         '72.0.3610.3',
1024         '72.0.3623.1',
1025         '72.0.3623.0',
1026         '72.0.3622.3',
1027         '72.0.3622.2',
1028         '71.0.3578.73',
1029         '70.0.3538.121',
1030         '72.0.3622.1',
1031         '72.0.3622.0',
1032         '71.0.3578.72',
1033         '70.0.3538.120',
1034         '72.0.3621.1',
1035         '72.0.3621.0',
1036         '71.0.3578.71',
1037         '70.0.3538.119',
1038         '72.0.3620.1',
1039         '72.0.3620.0',
1040         '71.0.3578.70',
1041         '70.0.3538.118',
1042         '71.0.3578.69',
1043         '72.0.3619.1',
1044         '72.0.3619.0',
1045         '71.0.3578.68',
1046         '70.0.3538.117',
1047         '71.0.3578.67',
1048         '72.0.3618.1',
1049         '72.0.3618.0',
1050         '71.0.3578.66',
1051         '70.0.3538.116',
1052         '72.0.3617.1',
1053         '72.0.3617.0',
1054         '71.0.3578.65',
1055         '70.0.3538.115',
1056         '72.0.3602.3',
1057         '71.0.3578.64',
1058         '72.0.3616.1',
1059         '72.0.3616.0',
1060         '71.0.3578.63',
1061         '70.0.3538.114',
1062         '71.0.3578.62',
1063         '72.0.3615.1',
1064         '72.0.3615.0',
1065         '71.0.3578.61',
1066         '70.0.3538.113',
1067         '72.0.3614.1',
1068         '72.0.3614.0',
1069         '71.0.3578.60',
1070         '70.0.3538.112',
1071         '72.0.3613.1',
1072         '72.0.3613.0',
1073         '71.0.3578.59',
1074         '70.0.3538.111',
1075         '72.0.3612.2',
1076         '72.0.3612.1',
1077         '72.0.3612.0',
1078         '70.0.3538.110',
1079         '71.0.3578.58',
1080         '70.0.3538.109',
1081         '72.0.3611.2',
1082         '72.0.3611.1',
1083         '72.0.3611.0',
1084         '71.0.3578.57',
1085         '70.0.3538.108',
1086         '72.0.3610.2',
1087         '71.0.3578.56',
1088         '71.0.3578.55',
1089         '72.0.3610.1',
1090         '72.0.3610.0',
1091         '71.0.3578.54',
1092         '70.0.3538.107',
1093         '71.0.3578.53',
1094         '72.0.3609.3',
1095         '71.0.3578.52',
1096         '72.0.3609.2',
1097         '71.0.3578.51',
1098         '72.0.3608.5',
1099         '72.0.3609.1',
1100         '72.0.3609.0',
1101         '71.0.3578.50',
1102         '70.0.3538.106',
1103         '72.0.3608.4',
1104         '72.0.3608.3',
1105         '72.0.3608.2',
1106         '71.0.3578.49',
1107         '72.0.3608.1',
1108         '72.0.3608.0',
1109         '70.0.3538.105',
1110         '71.0.3578.48',
1111         '72.0.3607.1',
1112         '72.0.3607.0',
1113         '71.0.3578.47',
1114         '70.0.3538.104',
1115         '72.0.3606.2',
1116         '72.0.3606.1',
1117         '72.0.3606.0',
1118         '71.0.3578.46',
1119         '70.0.3538.103',
1120         '70.0.3538.102',
1121         '72.0.3605.3',
1122         '72.0.3605.2',
1123         '72.0.3605.1',
1124         '72.0.3605.0',
1125         '71.0.3578.45',
1126         '70.0.3538.101',
1127         '71.0.3578.44',
1128         '71.0.3578.43',
1129         '70.0.3538.100',
1130         '70.0.3538.99',
1131         '71.0.3578.42',
1132         '72.0.3604.1',
1133         '72.0.3604.0',
1134         '71.0.3578.41',
1135         '70.0.3538.98',
1136         '71.0.3578.40',
1137         '72.0.3603.2',
1138         '72.0.3603.1',
1139         '72.0.3603.0',
1140         '71.0.3578.39',
1141         '70.0.3538.97',
1142         '72.0.3602.2',
1143         '71.0.3578.38',
1144         '71.0.3578.37',
1145         '72.0.3602.1',
1146         '72.0.3602.0',
1147         '71.0.3578.36',
1148         '70.0.3538.96',
1149         '72.0.3601.1',
1150         '72.0.3601.0',
1151         '71.0.3578.35',
1152         '70.0.3538.95',
1153         '72.0.3600.1',
1154         '72.0.3600.0',
1155         '71.0.3578.34',
1156         '70.0.3538.94',
1157         '72.0.3599.3',
1158         '72.0.3599.2',
1159         '72.0.3599.1',
1160         '72.0.3599.0',
1161         '71.0.3578.33',
1162         '70.0.3538.93',
1163         '72.0.3598.1',
1164         '72.0.3598.0',
1165         '71.0.3578.32',
1166         '70.0.3538.87',
1167         '72.0.3597.1',
1168         '72.0.3597.0',
1169         '72.0.3596.2',
1170         '71.0.3578.31',
1171         '70.0.3538.86',
1172         '71.0.3578.30',
1173         '71.0.3578.29',
1174         '72.0.3596.1',
1175         '72.0.3596.0',
1176         '71.0.3578.28',
1177         '70.0.3538.85',
1178         '72.0.3595.2',
1179         '72.0.3591.3',
1180         '72.0.3595.1',
1181         '72.0.3595.0',
1182         '71.0.3578.27',
1183         '70.0.3538.84',
1184         '72.0.3594.1',
1185         '72.0.3594.0',
1186         '71.0.3578.26',
1187         '70.0.3538.83',
1188         '72.0.3593.2',
1189         '72.0.3593.1',
1190         '72.0.3593.0',
1191         '71.0.3578.25',
1192         '70.0.3538.82',
1193         '72.0.3589.3',
1194         '72.0.3592.2',
1195         '72.0.3592.1',
1196         '72.0.3592.0',
1197         '71.0.3578.24',
1198         '72.0.3589.2',
1199         '70.0.3538.81',
1200         '70.0.3538.80',
1201         '72.0.3591.2',
1202         '72.0.3591.1',
1203         '72.0.3591.0',
1204         '71.0.3578.23',
1205         '70.0.3538.79',
1206         '71.0.3578.22',
1207         '72.0.3590.1',
1208         '72.0.3590.0',
1209         '71.0.3578.21',
1210         '70.0.3538.78',
1211         '70.0.3538.77',
1212         '72.0.3589.1',
1213         '72.0.3589.0',
1214         '71.0.3578.20',
1215         '70.0.3538.76',
1216         '71.0.3578.19',
1217         '70.0.3538.75',
1218         '72.0.3588.1',
1219         '72.0.3588.0',
1220         '71.0.3578.18',
1221         '70.0.3538.74',
1222         '72.0.3586.2',
1223         '72.0.3587.0',
1224         '71.0.3578.17',
1225         '70.0.3538.73',
1226         '72.0.3586.1',
1227         '72.0.3586.0',
1228         '71.0.3578.16',
1229         '70.0.3538.72',
1230         '72.0.3585.1',
1231         '72.0.3585.0',
1232         '71.0.3578.15',
1233         '70.0.3538.71',
1234         '71.0.3578.14',
1235         '72.0.3584.1',
1236         '72.0.3584.0',
1237         '71.0.3578.13',
1238         '70.0.3538.70',
1239         '72.0.3583.2',
1240         '71.0.3578.12',
1241         '72.0.3583.1',
1242         '72.0.3583.0',
1243         '71.0.3578.11',
1244         '70.0.3538.69',
1245         '71.0.3578.10',
1246         '72.0.3582.0',
1247         '72.0.3581.4',
1248         '71.0.3578.9',
1249         '70.0.3538.67',
1250         '72.0.3581.3',
1251         '72.0.3581.2',
1252         '72.0.3581.1',
1253         '72.0.3581.0',
1254         '71.0.3578.8',
1255         '70.0.3538.66',
1256         '72.0.3580.1',
1257         '72.0.3580.0',
1258         '71.0.3578.7',
1259         '70.0.3538.65',
1260         '71.0.3578.6',
1261         '72.0.3579.1',
1262         '72.0.3579.0',
1263         '71.0.3578.5',
1264         '70.0.3538.64',
1265         '71.0.3578.4',
1266         '71.0.3578.3',
1267         '71.0.3578.2',
1268         '71.0.3578.1',
1269         '71.0.3578.0',
1270         '70.0.3538.63',
1271         '69.0.3497.128',
1272         '70.0.3538.62',
1273         '70.0.3538.61',
1274         '70.0.3538.60',
1275         '70.0.3538.59',
1276         '71.0.3577.1',
1277         '71.0.3577.0',
1278         '70.0.3538.58',
1279         '69.0.3497.127',
1280         '71.0.3576.2',
1281         '71.0.3576.1',
1282         '71.0.3576.0',
1283         '70.0.3538.57',
1284         '70.0.3538.56',
1285         '71.0.3575.2',
1286         '70.0.3538.55',
1287         '69.0.3497.126',
1288         '70.0.3538.54',
1289         '71.0.3575.1',
1290         '71.0.3575.0',
1291         '71.0.3574.1',
1292         '71.0.3574.0',
1293         '70.0.3538.53',
1294         '69.0.3497.125',
1295         '70.0.3538.52',
1296         '71.0.3573.1',
1297         '71.0.3573.0',
1298         '70.0.3538.51',
1299         '69.0.3497.124',
1300         '71.0.3572.1',
1301         '71.0.3572.0',
1302         '70.0.3538.50',
1303         '69.0.3497.123',
1304         '71.0.3571.2',
1305         '70.0.3538.49',
1306         '69.0.3497.122',
1307         '71.0.3571.1',
1308         '71.0.3571.0',
1309         '70.0.3538.48',
1310         '69.0.3497.121',
1311         '71.0.3570.1',
1312         '71.0.3570.0',
1313         '70.0.3538.47',
1314         '69.0.3497.120',
1315         '71.0.3568.2',
1316         '71.0.3569.1',
1317         '71.0.3569.0',
1318         '70.0.3538.46',
1319         '69.0.3497.119',
1320         '70.0.3538.45',
1321         '71.0.3568.1',
1322         '71.0.3568.0',
1323         '70.0.3538.44',
1324         '69.0.3497.118',
1325         '70.0.3538.43',
1326         '70.0.3538.42',
1327         '71.0.3567.1',
1328         '71.0.3567.0',
1329         '70.0.3538.41',
1330         '69.0.3497.117',
1331         '71.0.3566.1',
1332         '71.0.3566.0',
1333         '70.0.3538.40',
1334         '69.0.3497.116',
1335         '71.0.3565.1',
1336         '71.0.3565.0',
1337         '70.0.3538.39',
1338         '69.0.3497.115',
1339         '71.0.3564.1',
1340         '71.0.3564.0',
1341         '70.0.3538.38',
1342         '69.0.3497.114',
1343         '71.0.3563.0',
1344         '71.0.3562.2',
1345         '70.0.3538.37',
1346         '69.0.3497.113',
1347         '70.0.3538.36',
1348         '70.0.3538.35',
1349         '71.0.3562.1',
1350         '71.0.3562.0',
1351         '70.0.3538.34',
1352         '69.0.3497.112',
1353         '70.0.3538.33',
1354         '71.0.3561.1',
1355         '71.0.3561.0',
1356         '70.0.3538.32',
1357         '69.0.3497.111',
1358         '71.0.3559.6',
1359         '71.0.3560.1',
1360         '71.0.3560.0',
1361         '71.0.3559.5',
1362         '71.0.3559.4',
1363         '70.0.3538.31',
1364         '69.0.3497.110',
1365         '71.0.3559.3',
1366         '70.0.3538.30',
1367         '69.0.3497.109',
1368         '71.0.3559.2',
1369         '71.0.3559.1',
1370         '71.0.3559.0',
1371         '70.0.3538.29',
1372         '69.0.3497.108',
1373         '71.0.3558.2',
1374         '71.0.3558.1',
1375         '71.0.3558.0',
1376         '70.0.3538.28',
1377         '69.0.3497.107',
1378         '71.0.3557.2',
1379         '71.0.3557.1',
1380         '71.0.3557.0',
1381         '70.0.3538.27',
1382         '69.0.3497.106',
1383         '71.0.3554.4',
1384         '70.0.3538.26',
1385         '71.0.3556.1',
1386         '71.0.3556.0',
1387         '70.0.3538.25',
1388         '71.0.3554.3',
1389         '69.0.3497.105',
1390         '71.0.3554.2',
1391         '70.0.3538.24',
1392         '69.0.3497.104',
1393         '71.0.3555.2',
1394         '70.0.3538.23',
1395         '71.0.3555.1',
1396         '71.0.3555.0',
1397         '70.0.3538.22',
1398         '69.0.3497.103',
1399         '71.0.3554.1',
1400         '71.0.3554.0',
1401         '70.0.3538.21',
1402         '69.0.3497.102',
1403         '71.0.3553.3',
1404         '70.0.3538.20',
1405         '69.0.3497.101',
1406         '71.0.3553.2',
1407         '69.0.3497.100',
1408         '71.0.3553.1',
1409         '71.0.3553.0',
1410         '70.0.3538.19',
1411         '69.0.3497.99',
1412         '69.0.3497.98',
1413         '69.0.3497.97',
1414         '71.0.3552.6',
1415         '71.0.3552.5',
1416         '71.0.3552.4',
1417         '71.0.3552.3',
1418         '71.0.3552.2',
1419         '71.0.3552.1',
1420         '71.0.3552.0',
1421         '70.0.3538.18',
1422         '69.0.3497.96',
1423         '71.0.3551.3',
1424         '71.0.3551.2',
1425         '71.0.3551.1',
1426         '71.0.3551.0',
1427         '70.0.3538.17',
1428         '69.0.3497.95',
1429         '71.0.3550.3',
1430         '71.0.3550.2',
1431         '71.0.3550.1',
1432         '71.0.3550.0',
1433         '70.0.3538.16',
1434         '69.0.3497.94',
1435         '71.0.3549.1',
1436         '71.0.3549.0',
1437         '70.0.3538.15',
1438         '69.0.3497.93',
1439         '69.0.3497.92',
1440         '71.0.3548.1',
1441         '71.0.3548.0',
1442         '70.0.3538.14',
1443         '69.0.3497.91',
1444         '71.0.3547.1',
1445         '71.0.3547.0',
1446         '70.0.3538.13',
1447         '69.0.3497.90',
1448         '71.0.3546.2',
1449         '69.0.3497.89',
1450         '71.0.3546.1',
1451         '71.0.3546.0',
1452         '70.0.3538.12',
1453         '69.0.3497.88',
1454         '71.0.3545.4',
1455         '71.0.3545.3',
1456         '71.0.3545.2',
1457         '71.0.3545.1',
1458         '71.0.3545.0',
1459         '70.0.3538.11',
1460         '69.0.3497.87',
1461         '71.0.3544.5',
1462         '71.0.3544.4',
1463         '71.0.3544.3',
1464         '71.0.3544.2',
1465         '71.0.3544.1',
1466         '71.0.3544.0',
1467         '69.0.3497.86',
1468         '70.0.3538.10',
1469         '69.0.3497.85',
1470         '70.0.3538.9',
1471         '69.0.3497.84',
1472         '71.0.3543.4',
1473         '70.0.3538.8',
1474         '71.0.3543.3',
1475         '71.0.3543.2',
1476         '71.0.3543.1',
1477         '71.0.3543.0',
1478         '70.0.3538.7',
1479         '69.0.3497.83',
1480         '71.0.3542.2',
1481         '71.0.3542.1',
1482         '71.0.3542.0',
1483         '70.0.3538.6',
1484         '69.0.3497.82',
1485         '69.0.3497.81',
1486         '71.0.3541.1',
1487         '71.0.3541.0',
1488         '70.0.3538.5',
1489         '69.0.3497.80',
1490         '71.0.3540.1',
1491         '71.0.3540.0',
1492         '70.0.3538.4',
1493         '69.0.3497.79',
1494         '70.0.3538.3',
1495         '71.0.3539.1',
1496         '71.0.3539.0',
1497         '69.0.3497.78',
1498         '68.0.3440.134',
1499         '69.0.3497.77',
1500         '70.0.3538.2',
1501         '70.0.3538.1',
1502         '70.0.3538.0',
1503         '69.0.3497.76',
1504         '68.0.3440.133',
1505         '69.0.3497.75',
1506         '70.0.3537.2',
1507         '70.0.3537.1',
1508         '70.0.3537.0',
1509         '69.0.3497.74',
1510         '68.0.3440.132',
1511         '70.0.3536.0',
1512         '70.0.3535.5',
1513         '70.0.3535.4',
1514         '70.0.3535.3',
1515         '69.0.3497.73',
1516         '68.0.3440.131',
1517         '70.0.3532.8',
1518         '70.0.3532.7',
1519         '69.0.3497.72',
1520         '69.0.3497.71',
1521         '70.0.3535.2',
1522         '70.0.3535.1',
1523         '70.0.3535.0',
1524         '69.0.3497.70',
1525         '68.0.3440.130',
1526         '69.0.3497.69',
1527         '68.0.3440.129',
1528         '70.0.3534.4',
1529         '70.0.3534.3',
1530         '70.0.3534.2',
1531         '70.0.3534.1',
1532         '70.0.3534.0',
1533         '69.0.3497.68',
1534         '68.0.3440.128',
1535         '70.0.3533.2',
1536         '70.0.3533.1',
1537         '70.0.3533.0',
1538         '69.0.3497.67',
1539         '68.0.3440.127',
1540         '70.0.3532.6',
1541         '70.0.3532.5',
1542         '70.0.3532.4',
1543         '69.0.3497.66',
1544         '68.0.3440.126',
1545         '70.0.3532.3',
1546         '70.0.3532.2',
1547         '70.0.3532.1',
1548         '69.0.3497.60',
1549         '69.0.3497.65',
1550         '69.0.3497.64',
1551         '70.0.3532.0',
1552         '70.0.3531.0',
1553         '70.0.3530.4',
1554         '70.0.3530.3',
1555         '70.0.3530.2',
1556         '69.0.3497.58',
1557         '68.0.3440.125',
1558         '69.0.3497.57',
1559         '69.0.3497.56',
1560         '69.0.3497.55',
1561         '69.0.3497.54',
1562         '70.0.3530.1',
1563         '70.0.3530.0',
1564         '69.0.3497.53',
1565         '68.0.3440.124',
1566         '69.0.3497.52',
1567         '70.0.3529.3',
1568         '70.0.3529.2',
1569         '70.0.3529.1',
1570         '70.0.3529.0',
1571         '69.0.3497.51',
1572         '70.0.3528.4',
1573         '68.0.3440.123',
1574         '70.0.3528.3',
1575         '70.0.3528.2',
1576         '70.0.3528.1',
1577         '70.0.3528.0',
1578         '69.0.3497.50',
1579         '68.0.3440.122',
1580         '70.0.3527.1',
1581         '70.0.3527.0',
1582         '69.0.3497.49',
1583         '68.0.3440.121',
1584         '70.0.3526.1',
1585         '70.0.3526.0',
1586         '68.0.3440.120',
1587         '69.0.3497.48',
1588         '69.0.3497.47',
1589         '68.0.3440.119',
1590         '68.0.3440.118',
1591         '70.0.3525.5',
1592         '70.0.3525.4',
1593         '70.0.3525.3',
1594         '68.0.3440.117',
1595         '69.0.3497.46',
1596         '70.0.3525.2',
1597         '70.0.3525.1',
1598         '70.0.3525.0',
1599         '69.0.3497.45',
1600         '68.0.3440.116',
1601         '70.0.3524.4',
1602         '70.0.3524.3',
1603         '69.0.3497.44',
1604         '70.0.3524.2',
1605         '70.0.3524.1',
1606         '70.0.3524.0',
1607         '70.0.3523.2',
1608         '69.0.3497.43',
1609         '68.0.3440.115',
1610         '70.0.3505.9',
1611         '69.0.3497.42',
1612         '70.0.3505.8',
1613         '70.0.3523.1',
1614         '70.0.3523.0',
1615         '69.0.3497.41',
1616         '68.0.3440.114',
1617         '70.0.3505.7',
1618         '69.0.3497.40',
1619         '70.0.3522.1',
1620         '70.0.3522.0',
1621         '70.0.3521.2',
1622         '69.0.3497.39',
1623         '68.0.3440.113',
1624         '70.0.3505.6',
1625         '70.0.3521.1',
1626         '70.0.3521.0',
1627         '69.0.3497.38',
1628         '68.0.3440.112',
1629         '70.0.3520.1',
1630         '70.0.3520.0',
1631         '69.0.3497.37',
1632         '68.0.3440.111',
1633         '70.0.3519.3',
1634         '70.0.3519.2',
1635         '70.0.3519.1',
1636         '70.0.3519.0',
1637         '69.0.3497.36',
1638         '68.0.3440.110',
1639         '70.0.3518.1',
1640         '70.0.3518.0',
1641         '69.0.3497.35',
1642         '69.0.3497.34',
1643         '68.0.3440.109',
1644         '70.0.3517.1',
1645         '70.0.3517.0',
1646         '69.0.3497.33',
1647         '68.0.3440.108',
1648         '69.0.3497.32',
1649         '70.0.3516.3',
1650         '70.0.3516.2',
1651         '70.0.3516.1',
1652         '70.0.3516.0',
1653         '69.0.3497.31',
1654         '68.0.3440.107',
1655         '70.0.3515.4',
1656         '68.0.3440.106',
1657         '70.0.3515.3',
1658         '70.0.3515.2',
1659         '70.0.3515.1',
1660         '70.0.3515.0',
1661         '69.0.3497.30',
1662         '68.0.3440.105',
1663         '68.0.3440.104',
1664         '70.0.3514.2',
1665         '70.0.3514.1',
1666         '70.0.3514.0',
1667         '69.0.3497.29',
1668         '68.0.3440.103',
1669         '70.0.3513.1',
1670         '70.0.3513.0',
1671         '69.0.3497.28',
1672     )
1673     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677     'User-Agent': random_user_agent(),
1678     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680     'Accept-Encoding': 'gzip, deflate',
1681     'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693     'January', 'February', 'March', 'April', 'May', 'June',
1694     'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697     'en': ENGLISH_MONTH_NAMES,
1698     'fr': [
1699         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705     'flv', 'f4v', 'f4a', 'f4b',
1706     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707     'mkv', 'mka', 'mk3d',
1708     'avi', 'divx',
1709     'mov',
1710     'asf', 'wmv', 'wma',
1711     '3gp', '3g2',
1712     'mp3',
1713     'flac',
1714     'ape',
1715     'wav',
1716     'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
1720 # needed for sanitizing filenames in restricted mode
1721 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1722                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1724
1725 DATE_FORMATS = (
1726     '%d %B %Y',
1727     '%d %b %Y',
1728     '%B %d %Y',
1729     '%B %dst %Y',
1730     '%B %dnd %Y',
1731     '%B %drd %Y',
1732     '%B %dth %Y',
1733     '%b %d %Y',
1734     '%b %dst %Y',
1735     '%b %dnd %Y',
1736     '%b %drd %Y',
1737     '%b %dth %Y',
1738     '%b %dst %Y %I:%M',
1739     '%b %dnd %Y %I:%M',
1740     '%b %drd %Y %I:%M',
1741     '%b %dth %Y %I:%M',
1742     '%Y %m %d',
1743     '%Y-%m-%d',
1744     '%Y/%m/%d',
1745     '%Y/%m/%d %H:%M',
1746     '%Y/%m/%d %H:%M:%S',
1747     '%Y-%m-%d %H:%M',
1748     '%Y-%m-%d %H:%M:%S',
1749     '%Y-%m-%d %H:%M:%S.%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     # Handle timestamps
2111     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2112     result = ''.join(map(replace_insane, s))
2113     if not is_id:
2114         while '__' in result:
2115             result = result.replace('__', '_')
2116         result = result.strip('_')
2117         # Common case of "Foreign band name - English song title"
2118         if restricted and result.startswith('-_'):
2119             result = result[2:]
2120         if result.startswith('-'):
2121             result = '_' + result[len('-'):]
2122         result = result.lstrip('.')
2123         if not result:
2124             result = '_'
2125     return result
2126
2127
2128 def sanitize_path(s, force=False):
2129     """Sanitizes and normalizes path on Windows"""
2130     if sys.platform == 'win32':
2131         force = False
2132         drive_or_unc, _ = os.path.splitdrive(s)
2133         if sys.version_info < (2, 7) and not drive_or_unc:
2134             drive_or_unc, _ = os.path.splitunc(s)
2135     elif force:
2136         drive_or_unc = ''
2137     else:
2138         return s
2139
2140     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141     if drive_or_unc:
2142         norm_path.pop(0)
2143     sanitized_path = [
2144         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2145         for path_part in norm_path]
2146     if drive_or_unc:
2147         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2148     elif force and s[0] == os.path.sep:
2149         sanitized_path.insert(0, os.path.sep)
2150     return os.path.join(*sanitized_path)
2151
2152
2153 def sanitize_url(url):
2154     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155     # the number of unwanted failures due to missing protocol
2156     if url.startswith('//'):
2157         return 'http:%s' % url
2158     # Fix some common typos seen so far
2159     COMMON_TYPOS = (
2160         # https://github.com/ytdl-org/youtube-dl/issues/15649
2161         (r'^httpss://', r'https://'),
2162         # https://bx1.be/lives/direct-tv/
2163         (r'^rmtp([es]?)://', r'rtmp\1://'),
2164     )
2165     for mistake, fixup in COMMON_TYPOS:
2166         if re.match(mistake, url):
2167             return re.sub(mistake, fixup, url)
2168     return url
2169
2170
2171 def sanitized_Request(url, *args, **kwargs):
2172     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2173
2174
2175 def expand_path(s):
2176     """Expand shell variables and ~"""
2177     return os.path.expandvars(compat_expanduser(s))
2178
2179
2180 def orderedSet(iterable):
2181     """ Remove all duplicates from the input iterable """
2182     res = []
2183     for el in iterable:
2184         if el not in res:
2185             res.append(el)
2186     return res
2187
2188
2189 def _htmlentity_transform(entity_with_semicolon):
2190     """Transforms an HTML entity to a character."""
2191     entity = entity_with_semicolon[:-1]
2192
2193     # Known non-numeric HTML entity
2194     if entity in compat_html_entities.name2codepoint:
2195         return compat_chr(compat_html_entities.name2codepoint[entity])
2196
2197     # TODO: HTML5 allows entities without a semicolon. For example,
2198     # '&Eacuteric' should be decoded as 'Éric'.
2199     if entity_with_semicolon in compat_html_entities_html5:
2200         return compat_html_entities_html5[entity_with_semicolon]
2201
2202     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2203     if mobj is not None:
2204         numstr = mobj.group(1)
2205         if numstr.startswith('x'):
2206             base = 16
2207             numstr = '0%s' % numstr
2208         else:
2209             base = 10
2210         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2211         try:
2212             return compat_chr(int(numstr, base))
2213         except ValueError:
2214             pass
2215
2216     # Unknown entity in name, return its literal representation
2217     return '&%s;' % entity
2218
2219
2220 def unescapeHTML(s):
2221     if s is None:
2222         return None
2223     assert type(s) == compat_str
2224
2225     return re.sub(
2226         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2227
2228
2229 def process_communicate_or_kill(p, *args, **kwargs):
2230     try:
2231         return p.communicate(*args, **kwargs)
2232     except BaseException:  # Including KeyboardInterrupt
2233         p.kill()
2234         p.wait()
2235         raise
2236
2237
2238 def get_subprocess_encoding():
2239     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240         # For subprocess calls, encode with locale encoding
2241         # Refer to http://stackoverflow.com/a/9951851/35070
2242         encoding = preferredencoding()
2243     else:
2244         encoding = sys.getfilesystemencoding()
2245     if encoding is None:
2246         encoding = 'utf-8'
2247     return encoding
2248
2249
2250 def encodeFilename(s, for_subprocess=False):
2251     """
2252     @param s The name of the file
2253     """
2254
2255     assert type(s) == compat_str
2256
2257     # Python 3 has a Unicode API
2258     if sys.version_info >= (3, 0):
2259         return s
2260
2261     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265         return s
2266
2267     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268     if sys.platform.startswith('java'):
2269         return s
2270
2271     return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def decodeFilename(b, for_subprocess=False):
2275
2276     if sys.version_info >= (3, 0):
2277         return b
2278
2279     if not isinstance(b, bytes):
2280         return b
2281
2282     return b.decode(get_subprocess_encoding(), 'ignore')
2283
2284
2285 def encodeArgument(s):
2286     if not isinstance(s, compat_str):
2287         # Legacy code that uses byte strings
2288         # Uncomment the following line after fixing all post processors
2289         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2290         s = s.decode('ascii')
2291     return encodeFilename(s, True)
2292
2293
2294 def decodeArgument(b):
2295     return decodeFilename(b, True)
2296
2297
2298 def decodeOption(optval):
2299     if optval is None:
2300         return optval
2301     if isinstance(optval, bytes):
2302         optval = optval.decode(preferredencoding())
2303
2304     assert isinstance(optval, compat_str)
2305     return optval
2306
2307
2308 def formatSeconds(secs, delim=':'):
2309     if secs > 3600:
2310         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2311     elif secs > 60:
2312         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2313     else:
2314         return '%d' % secs
2315
2316
2317 def make_HTTPS_handler(params, **kwargs):
2318     opts_no_check_certificate = params.get('nocheckcertificate', False)
2319     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2320         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2321         if opts_no_check_certificate:
2322             context.check_hostname = False
2323             context.verify_mode = ssl.CERT_NONE
2324         try:
2325             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2326         except TypeError:
2327             # Python 2.7.8
2328             # (create_default_context present but HTTPSHandler has no context=)
2329             pass
2330
2331     if sys.version_info < (3, 2):
2332         return YoutubeDLHTTPSHandler(params, **kwargs)
2333     else:  # Python < 3.4
2334         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2335         context.verify_mode = (ssl.CERT_NONE
2336                                if opts_no_check_certificate
2337                                else ssl.CERT_REQUIRED)
2338         context.set_default_verify_paths()
2339         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2340
2341
2342 def bug_reports_message():
2343     if ytdl_is_updateable():
2344         update_cmd = 'type  yt-dlp -U  to update'
2345     else:
2346         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2347     msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
2348     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2349     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2350     return msg
2351
2352
2353 class YoutubeDLError(Exception):
2354     """Base exception for YoutubeDL errors."""
2355     pass
2356
2357
2358 class ExtractorError(YoutubeDLError):
2359     """Error during info extraction."""
2360
2361     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2362         """ tb, if given, is the original traceback (so that it can be printed out).
2363         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2364         """
2365
2366         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367             expected = True
2368         if video_id is not None:
2369             msg = video_id + ': ' + msg
2370         if cause:
2371             msg += ' (caused by %r)' % cause
2372         if not expected:
2373             msg += bug_reports_message()
2374         super(ExtractorError, self).__init__(msg)
2375
2376         self.traceback = tb
2377         self.exc_info = sys.exc_info()  # preserve original exception
2378         self.cause = cause
2379         self.video_id = video_id
2380
2381     def format_traceback(self):
2382         if self.traceback is None:
2383             return None
2384         return ''.join(traceback.format_tb(self.traceback))
2385
2386
2387 class UnsupportedError(ExtractorError):
2388     def __init__(self, url):
2389         super(UnsupportedError, self).__init__(
2390             'Unsupported URL: %s' % url, expected=True)
2391         self.url = url
2392
2393
2394 class RegexNotFoundError(ExtractorError):
2395     """Error when a regex didn't match"""
2396     pass
2397
2398
2399 class GeoRestrictedError(ExtractorError):
2400     """Geographic restriction Error exception.
2401
2402     This exception may be thrown when a video is not available from your
2403     geographic location due to geographic restrictions imposed by a website.
2404     """
2405
2406     def __init__(self, msg, countries=None):
2407         super(GeoRestrictedError, self).__init__(msg, expected=True)
2408         self.msg = msg
2409         self.countries = countries
2410
2411
2412 class DownloadError(YoutubeDLError):
2413     """Download Error exception.
2414
2415     This exception may be thrown by FileDownloader objects if they are not
2416     configured to continue on errors. They will contain the appropriate
2417     error message.
2418     """
2419
2420     def __init__(self, msg, exc_info=None):
2421         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422         super(DownloadError, self).__init__(msg)
2423         self.exc_info = exc_info
2424
2425
2426 class SameFileError(YoutubeDLError):
2427     """Same File exception.
2428
2429     This exception will be thrown by FileDownloader objects if they detect
2430     multiple files would have to be downloaded to the same file on disk.
2431     """
2432     pass
2433
2434
2435 class PostProcessingError(YoutubeDLError):
2436     """Post Processing exception.
2437
2438     This exception may be raised by PostProcessor's .run() method to
2439     indicate an error in the postprocessing task.
2440     """
2441
2442     def __init__(self, msg):
2443         super(PostProcessingError, self).__init__(msg)
2444         self.msg = msg
2445
2446
2447 class ExistingVideoReached(YoutubeDLError):
2448     """ --max-downloads limit has been reached. """
2449     pass
2450
2451
2452 class RejectedVideoReached(YoutubeDLError):
2453     """ --max-downloads limit has been reached. """
2454     pass
2455
2456
2457 class MaxDownloadsReached(YoutubeDLError):
2458     """ --max-downloads limit has been reached. """
2459     pass
2460
2461
2462 class UnavailableVideoError(YoutubeDLError):
2463     """Unavailable Format exception.
2464
2465     This exception will be thrown when a video is requested
2466     in a format that is not available for that video.
2467     """
2468     pass
2469
2470
2471 class ContentTooShortError(YoutubeDLError):
2472     """Content Too Short exception.
2473
2474     This exception may be raised by FileDownloader objects when a file they
2475     download is too small for what the server announced first, indicating
2476     the connection was probably interrupted.
2477     """
2478
2479     def __init__(self, downloaded, expected):
2480         super(ContentTooShortError, self).__init__(
2481             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482         )
2483         # Both in bytes
2484         self.downloaded = downloaded
2485         self.expected = expected
2486
2487
2488 class XAttrMetadataError(YoutubeDLError):
2489     def __init__(self, code=None, msg='Unknown error'):
2490         super(XAttrMetadataError, self).__init__(msg)
2491         self.code = code
2492         self.msg = msg
2493
2494         # Parsing code and msg
2495         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2496                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2497             self.reason = 'NO_SPACE'
2498         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499             self.reason = 'VALUE_TOO_LONG'
2500         else:
2501             self.reason = 'NOT_SUPPORTED'
2502
2503
2504 class XAttrUnavailableError(YoutubeDLError):
2505     pass
2506
2507
2508 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2509     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510     # expected HTTP responses to meet HTTP/1.0 or later (see also
2511     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2512     if sys.version_info < (3, 0):
2513         kwargs['strict'] = True
2514     hc = http_class(*args, **compat_kwargs(kwargs))
2515     source_address = ydl_handler._params.get('source_address')
2516
2517     if source_address is not None:
2518         # This is to workaround _create_connection() from socket where it will try all
2519         # address data from getaddrinfo() including IPv6. This filters the result from
2520         # getaddrinfo() based on the source_address value.
2521         # This is based on the cpython socket.create_connection() function.
2522         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524             host, port = address
2525             err = None
2526             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2527             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528             ip_addrs = [addr for addr in addrs if addr[0] == af]
2529             if addrs and not ip_addrs:
2530                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531                 raise socket.error(
2532                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533                     % (ip_version, source_address[0]))
2534             for res in ip_addrs:
2535                 af, socktype, proto, canonname, sa = res
2536                 sock = None
2537                 try:
2538                     sock = socket.socket(af, socktype, proto)
2539                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540                         sock.settimeout(timeout)
2541                     sock.bind(source_address)
2542                     sock.connect(sa)
2543                     err = None  # Explicitly break reference cycle
2544                     return sock
2545                 except socket.error as _:
2546                     err = _
2547                     if sock is not None:
2548                         sock.close()
2549             if err is not None:
2550                 raise err
2551             else:
2552                 raise socket.error('getaddrinfo returns an empty list')
2553         if hasattr(hc, '_create_connection'):
2554             hc._create_connection = _create_connection
2555         sa = (source_address, 0)
2556         if hasattr(hc, 'source_address'):  # Python 2.7+
2557             hc.source_address = sa
2558         else:  # Python 2.6
2559             def _hc_connect(self, *args, **kwargs):
2560                 sock = _create_connection(
2561                     (self.host, self.port), self.timeout, sa)
2562                 if is_https:
2563                     self.sock = ssl.wrap_socket(
2564                         sock, self.key_file, self.cert_file,
2565                         ssl_version=ssl.PROTOCOL_TLSv1)
2566                 else:
2567                     self.sock = sock
2568             hc.connect = functools.partial(_hc_connect, hc)
2569
2570     return hc
2571
2572
2573 def handle_youtubedl_headers(headers):
2574     filtered_headers = headers
2575
2576     if 'Youtubedl-no-compression' in filtered_headers:
2577         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2578         del filtered_headers['Youtubedl-no-compression']
2579
2580     return filtered_headers
2581
2582
2583 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2584     """Handler for HTTP requests and responses.
2585
2586     This class, when installed with an OpenerDirector, automatically adds
2587     the standard headers to every HTTP request and handles gzipped and
2588     deflated responses from web servers. If compression is to be avoided in
2589     a particular request, the original request in the program code only has
2590     to include the HTTP header "Youtubedl-no-compression", which will be
2591     removed before making the real request.
2592
2593     Part of this code was copied from:
2594
2595     http://techknack.net/python-urllib2-handlers/
2596
2597     Andrew Rowls, the author of that code, agreed to release it to the
2598     public domain.
2599     """
2600
2601     def __init__(self, params, *args, **kwargs):
2602         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603         self._params = params
2604
2605     def http_open(self, req):
2606         conn_class = compat_http_client.HTTPConnection
2607
2608         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609         if socks_proxy:
2610             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611             del req.headers['Ytdl-socks-proxy']
2612
2613         return self.do_open(functools.partial(
2614             _create_http_connection, self, conn_class, False),
2615             req)
2616
2617     @staticmethod
2618     def deflate(data):
2619         if not data:
2620             return data
2621         try:
2622             return zlib.decompress(data, -zlib.MAX_WBITS)
2623         except zlib.error:
2624             return zlib.decompress(data)
2625
2626     def http_request(self, req):
2627         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628         # always respected by websites, some tend to give out URLs with non percent-encoded
2629         # non-ASCII characters (see telemb.py, ard.py [#3412])
2630         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631         # To work around aforementioned issue we will replace request's original URL with
2632         # percent-encoded one
2633         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635         url = req.get_full_url()
2636         url_escaped = escape_url(url)
2637
2638         # Substitute URL if any change after escaping
2639         if url != url_escaped:
2640             req = update_Request(req, url=url_escaped)
2641
2642         for h, v in std_headers.items():
2643             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644             # The dict keys are capitalized because of this bug by urllib
2645             if h.capitalize() not in req.headers:
2646                 req.add_header(h, v)
2647
2648         req.headers = handle_youtubedl_headers(req.headers)
2649
2650         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651             # Python 2.6 is brain-dead when it comes to fragments
2652             req._Request__original = req._Request__original.partition('#')[0]
2653             req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
2655         return req
2656
2657     def http_response(self, req, resp):
2658         old_resp = resp
2659         # gzip
2660         if resp.headers.get('Content-encoding', '') == 'gzip':
2661             content = resp.read()
2662             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663             try:
2664                 uncompressed = io.BytesIO(gz.read())
2665             except IOError as original_ioerror:
2666                 # There may be junk add the end of the file
2667                 # See http://stackoverflow.com/q/4928560/35070 for details
2668                 for i in range(1, 1024):
2669                     try:
2670                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671                         uncompressed = io.BytesIO(gz.read())
2672                     except IOError:
2673                         continue
2674                     break
2675                 else:
2676                     raise original_ioerror
2677             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2678             resp.msg = old_resp.msg
2679             del resp.headers['Content-encoding']
2680         # deflate
2681         if resp.headers.get('Content-encoding', '') == 'deflate':
2682             gz = io.BytesIO(self.deflate(resp.read()))
2683             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2684             resp.msg = old_resp.msg
2685             del resp.headers['Content-encoding']
2686         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2687         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2688         if 300 <= resp.code < 400:
2689             location = resp.headers.get('Location')
2690             if location:
2691                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692                 if sys.version_info >= (3, 0):
2693                     location = location.encode('iso-8859-1').decode('utf-8')
2694                 else:
2695                     location = location.decode('utf-8')
2696                 location_escaped = escape_url(location)
2697                 if location != location_escaped:
2698                     del resp.headers['Location']
2699                     if sys.version_info < (3, 0):
2700                         location_escaped = location_escaped.encode('utf-8')
2701                     resp.headers['Location'] = location_escaped
2702         return resp
2703
2704     https_request = http_request
2705     https_response = http_response
2706
2707
2708 def make_socks_conn_class(base_class, socks_proxy):
2709     assert issubclass(base_class, (
2710         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712     url_components = compat_urlparse.urlparse(socks_proxy)
2713     if url_components.scheme.lower() == 'socks5':
2714         socks_type = ProxyType.SOCKS5
2715     elif url_components.scheme.lower() in ('socks', 'socks4'):
2716         socks_type = ProxyType.SOCKS4
2717     elif url_components.scheme.lower() == 'socks4a':
2718         socks_type = ProxyType.SOCKS4A
2719
2720     def unquote_if_non_empty(s):
2721         if not s:
2722             return s
2723         return compat_urllib_parse_unquote_plus(s)
2724
2725     proxy_args = (
2726         socks_type,
2727         url_components.hostname, url_components.port or 1080,
2728         True,  # Remote DNS
2729         unquote_if_non_empty(url_components.username),
2730         unquote_if_non_empty(url_components.password),
2731     )
2732
2733     class SocksConnection(base_class):
2734         def connect(self):
2735             self.sock = sockssocket()
2736             self.sock.setproxy(*proxy_args)
2737             if type(self.timeout) in (int, float):
2738                 self.sock.settimeout(self.timeout)
2739             self.sock.connect((self.host, self.port))
2740
2741             if isinstance(self, compat_http_client.HTTPSConnection):
2742                 if hasattr(self, '_context'):  # Python > 2.6
2743                     self.sock = self._context.wrap_socket(
2744                         self.sock, server_hostname=self.host)
2745                 else:
2746                     self.sock = ssl.wrap_socket(self.sock)
2747
2748     return SocksConnection
2749
2750
2751 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755         self._params = params
2756
2757     def https_open(self, req):
2758         kwargs = {}
2759         conn_class = self._https_conn_class
2760
2761         if hasattr(self, '_context'):  # python > 2.6
2762             kwargs['context'] = self._context
2763         if hasattr(self, '_check_hostname'):  # python 3.x
2764             kwargs['check_hostname'] = self._check_hostname
2765
2766         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767         if socks_proxy:
2768             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769             del req.headers['Ytdl-socks-proxy']
2770
2771         return self.do_open(functools.partial(
2772             _create_http_connection, self, conn_class, True),
2773             req, **kwargs)
2774
2775
2776 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2777     """
2778     See [1] for cookie file format.
2779
2780     1. https://curl.haxx.se/docs/http-cookies.html
2781     """
2782     _HTTPONLY_PREFIX = '#HttpOnly_'
2783     _ENTRY_LEN = 7
2784     _HEADER = '''# Netscape HTTP Cookie File
2785 # This file is generated by yt-dlp.  Do not edit.
2786
2787 '''
2788     _CookieFileEntry = collections.namedtuple(
2789         'CookieFileEntry',
2790         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2791
2792     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2793         """
2794         Save cookies to a file.
2795
2796         Most of the code is taken from CPython 3.8 and slightly adapted
2797         to support cookie files with UTF-8 in both python 2 and 3.
2798         """
2799         if filename is None:
2800             if self.filename is not None:
2801                 filename = self.filename
2802             else:
2803                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
2805         # Store session cookies with `expires` set to 0 instead of an empty
2806         # string
2807         for cookie in self:
2808             if cookie.expires is None:
2809                 cookie.expires = 0
2810
2811         with io.open(filename, 'w', encoding='utf-8') as f:
2812             f.write(self._HEADER)
2813             now = time.time()
2814             for cookie in self:
2815                 if not ignore_discard and cookie.discard:
2816                     continue
2817                 if not ignore_expires and cookie.is_expired(now):
2818                     continue
2819                 if cookie.secure:
2820                     secure = 'TRUE'
2821                 else:
2822                     secure = 'FALSE'
2823                 if cookie.domain.startswith('.'):
2824                     initial_dot = 'TRUE'
2825                 else:
2826                     initial_dot = 'FALSE'
2827                 if cookie.expires is not None:
2828                     expires = compat_str(cookie.expires)
2829                 else:
2830                     expires = ''
2831                 if cookie.value is None:
2832                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833                     # with no name, whereas http.cookiejar regards it as a
2834                     # cookie with no value.
2835                     name = ''
2836                     value = cookie.name
2837                 else:
2838                     name = cookie.name
2839                     value = cookie.value
2840                 f.write(
2841                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2842                                secure, expires, name, value]) + '\n')
2843
2844     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2845         """Load cookies from a file."""
2846         if filename is None:
2847             if self.filename is not None:
2848                 filename = self.filename
2849             else:
2850                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
2852         def prepare_line(line):
2853             if line.startswith(self._HTTPONLY_PREFIX):
2854                 line = line[len(self._HTTPONLY_PREFIX):]
2855             # comments and empty lines are fine
2856             if line.startswith('#') or not line.strip():
2857                 return line
2858             cookie_list = line.split('\t')
2859             if len(cookie_list) != self._ENTRY_LEN:
2860                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861             cookie = self._CookieFileEntry(*cookie_list)
2862             if cookie.expires_at and not cookie.expires_at.isdigit():
2863                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864             return line
2865
2866         cf = io.StringIO()
2867         with io.open(filename, encoding='utf-8') as f:
2868             for line in f:
2869                 try:
2870                     cf.write(prepare_line(line))
2871                 except compat_cookiejar.LoadError as e:
2872                     write_string(
2873                         'WARNING: skipping cookie file entry due to %s: %r\n'
2874                         % (e, line), sys.stderr)
2875                     continue
2876         cf.seek(0)
2877         self._really_load(cf, filename, ignore_discard, ignore_expires)
2878         # Session cookies are denoted by either `expires` field set to
2879         # an empty string or 0. MozillaCookieJar only recognizes the former
2880         # (see [1]). So we need force the latter to be recognized as session
2881         # cookies on our own.
2882         # Session cookies may be important for cookies-based authentication,
2883         # e.g. usually, when user does not check 'Remember me' check box while
2884         # logging in on a site, some important cookies are stored as session
2885         # cookies so that not recognizing them will result in failed login.
2886         # 1. https://bugs.python.org/issue17164
2887         for cookie in self:
2888             # Treat `expires=0` cookies as session cookies
2889             if cookie.expires == 0:
2890                 cookie.expires = None
2891                 cookie.discard = True
2892
2893
2894 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895     def __init__(self, cookiejar=None):
2896         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898     def http_response(self, request, response):
2899         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900         # characters in Set-Cookie HTTP header of last response (see
2901         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2902         # In order to at least prevent crashing we will percent encode Set-Cookie
2903         # header before HTTPCookieProcessor starts processing it.
2904         # if sys.version_info < (3, 0) and response.headers:
2905         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906         #         set_cookie = response.headers.get(set_cookie_header)
2907         #         if set_cookie:
2908         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909         #             if set_cookie != set_cookie_escaped:
2910         #                 del response.headers[set_cookie_header]
2911         #                 response.headers[set_cookie_header] = set_cookie_escaped
2912         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915     https_response = http_response
2916
2917
2918 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919     if sys.version_info[0] < 3:
2920         def redirect_request(self, req, fp, code, msg, headers, newurl):
2921             # On python 2 urlh.geturl() may sometimes return redirect URL
2922             # as byte string instead of unicode. This workaround allows
2923             # to force it always return unicode.
2924             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
2927 def extract_timezone(date_str):
2928     m = re.search(
2929         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930         date_str)
2931     if not m:
2932         timezone = datetime.timedelta()
2933     else:
2934         date_str = date_str[:-len(m.group('tz'))]
2935         if not m.group('sign'):
2936             timezone = datetime.timedelta()
2937         else:
2938             sign = 1 if m.group('sign') == '+' else -1
2939             timezone = datetime.timedelta(
2940                 hours=sign * int(m.group('hours')),
2941                 minutes=sign * int(m.group('minutes')))
2942     return timezone, date_str
2943
2944
2945 def parse_iso8601(date_str, delimiter='T', timezone=None):
2946     """ Return a UNIX timestamp from the given date """
2947
2948     if date_str is None:
2949         return None
2950
2951     date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
2953     if timezone is None:
2954         timezone, date_str = extract_timezone(date_str)
2955
2956     try:
2957         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959         return calendar.timegm(dt.timetuple())
2960     except ValueError:
2961         pass
2962
2963
2964 def date_formats(day_first=True):
2965     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
2968 def unified_strdate(date_str, day_first=True):
2969     """Return a string with the date in the format YYYYMMDD"""
2970
2971     if date_str is None:
2972         return None
2973     upload_date = None
2974     # Replace commas
2975     date_str = date_str.replace(',', ' ')
2976     # Remove AM/PM + timezone
2977     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2978     _, date_str = extract_timezone(date_str)
2979
2980     for expression in date_formats(day_first):
2981         try:
2982             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2983         except ValueError:
2984             pass
2985     if upload_date is None:
2986         timetuple = email.utils.parsedate_tz(date_str)
2987         if timetuple:
2988             try:
2989                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990             except ValueError:
2991                 pass
2992     if upload_date is not None:
2993         return compat_str(upload_date)
2994
2995
2996 def unified_timestamp(date_str, day_first=True):
2997     if date_str is None:
2998         return None
2999
3000     date_str = re.sub(r'[,|]', '', date_str)
3001
3002     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3003     timezone, date_str = extract_timezone(date_str)
3004
3005     # Remove AM/PM + timezone
3006     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
3008     # Remove unrecognized timezones from ISO 8601 alike timestamps
3009     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010     if m:
3011         date_str = date_str[:-len(m.group('tz'))]
3012
3013     # Python only supports microseconds, so remove nanoseconds
3014     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015     if m:
3016         date_str = m.group(1)
3017
3018     for expression in date_formats(day_first):
3019         try:
3020             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3021             return calendar.timegm(dt.timetuple())
3022         except ValueError:
3023             pass
3024     timetuple = email.utils.parsedate_tz(date_str)
3025     if timetuple:
3026         return calendar.timegm(timetuple) + pm_delta * 3600
3027
3028
3029 def determine_ext(url, default_ext='unknown_video'):
3030     if url is None or '.' not in url:
3031         return default_ext
3032     guess = url.partition('?')[0].rpartition('.')[2]
3033     if re.match(r'^[A-Za-z0-9]+$', guess):
3034         return guess
3035     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3037         return guess.rstrip('/')
3038     else:
3039         return default_ext
3040
3041
3042 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3044
3045
3046 def date_from_str(date_str):
3047     """
3048     Return a datetime object from a string in the format YYYYMMDD or
3049     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050     today = datetime.date.today()
3051     if date_str in ('now', 'today'):
3052         return today
3053     if date_str == 'yesterday':
3054         return today - datetime.timedelta(days=1)
3055     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3056     if match is not None:
3057         sign = match.group('sign')
3058         time = int(match.group('time'))
3059         if sign == '-':
3060             time = -time
3061         unit = match.group('unit')
3062         # A bad approximation?
3063         if unit == 'month':
3064             unit = 'day'
3065             time *= 30
3066         elif unit == 'year':
3067             unit = 'day'
3068             time *= 365
3069         unit += 's'
3070         delta = datetime.timedelta(**{unit: time})
3071         return today + delta
3072     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3073
3074
3075 def hyphenate_date(date_str):
3076     """
3077     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079     if match is not None:
3080         return '-'.join(match.groups())
3081     else:
3082         return date_str
3083
3084
3085 class DateRange(object):
3086     """Represents a time interval between two dates"""
3087
3088     def __init__(self, start=None, end=None):
3089         """start and end must be strings in the format accepted by date"""
3090         if start is not None:
3091             self.start = date_from_str(start)
3092         else:
3093             self.start = datetime.datetime.min.date()
3094         if end is not None:
3095             self.end = date_from_str(end)
3096         else:
3097             self.end = datetime.datetime.max.date()
3098         if self.start > self.end:
3099             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3100
3101     @classmethod
3102     def day(cls, day):
3103         """Returns a range that only contains the given day"""
3104         return cls(day, day)
3105
3106     def __contains__(self, date):
3107         """Check if the date is in the range"""
3108         if not isinstance(date, datetime.date):
3109             date = date_from_str(date)
3110         return self.start <= date <= self.end
3111
3112     def __str__(self):
3113         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3114
3115
3116 def platform_name():
3117     """ Returns the platform name as a compat_str """
3118     res = platform.platform()
3119     if isinstance(res, bytes):
3120         res = res.decode(preferredencoding())
3121
3122     assert isinstance(res, compat_str)
3123     return res
3124
3125
3126 def _windows_write_string(s, out):
3127     """ Returns True if the string was written using special methods,
3128     False if it has yet to be written out."""
3129     # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131     import ctypes
3132     import ctypes.wintypes
3133
3134     WIN_OUTPUT_IDS = {
3135         1: -11,
3136         2: -12,
3137     }
3138
3139     try:
3140         fileno = out.fileno()
3141     except AttributeError:
3142         # If the output stream doesn't have a fileno, it's virtual
3143         return False
3144     except io.UnsupportedOperation:
3145         # Some strange Windows pseudo files?
3146         return False
3147     if fileno not in WIN_OUTPUT_IDS:
3148         return False
3149
3150     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3151         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3152         ('GetStdHandle', ctypes.windll.kernel32))
3153     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
3155     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3156         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3158         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3159     written = ctypes.wintypes.DWORD(0)
3160
3161     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3162     FILE_TYPE_CHAR = 0x0002
3163     FILE_TYPE_REMOTE = 0x8000
3164     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3165         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166         ctypes.POINTER(ctypes.wintypes.DWORD))(
3167         ('GetConsoleMode', ctypes.windll.kernel32))
3168     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170     def not_a_console(handle):
3171         if handle == INVALID_HANDLE_VALUE or handle is None:
3172             return True
3173         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3175
3176     if not_a_console(h):
3177         return False
3178
3179     def next_nonbmp_pos(s):
3180         try:
3181             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182         except StopIteration:
3183             return len(s)
3184
3185     while s:
3186         count = min(next_nonbmp_pos(s), 1024)
3187
3188         ret = WriteConsoleW(
3189             h, s, count if count else 2, ctypes.byref(written), None)
3190         if ret == 0:
3191             raise OSError('Failed to write string')
3192         if not count:  # We just wrote a non-BMP character
3193             assert written.value == 2
3194             s = s[1:]
3195         else:
3196             assert written.value > 0
3197             s = s[written.value:]
3198     return True
3199
3200
3201 def write_string(s, out=None, encoding=None):
3202     if out is None:
3203         out = sys.stderr
3204     assert type(s) == compat_str
3205
3206     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207         if _windows_write_string(s, out):
3208             return
3209
3210     if ('b' in getattr(out, 'mode', '')
3211             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3212         byt = s.encode(encoding or preferredencoding(), 'ignore')
3213         out.write(byt)
3214     elif hasattr(out, 'buffer'):
3215         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216         byt = s.encode(enc, 'ignore')
3217         out.buffer.write(byt)
3218     else:
3219         out.write(s)
3220     out.flush()
3221
3222
3223 def bytes_to_intlist(bs):
3224     if not bs:
3225         return []
3226     if isinstance(bs[0], int):  # Python 3
3227         return list(bs)
3228     else:
3229         return [ord(c) for c in bs]
3230
3231
3232 def intlist_to_bytes(xs):
3233     if not xs:
3234         return b''
3235     return compat_struct_pack('%dB' % len(xs), *xs)
3236
3237
3238 # Cross-platform file locking
3239 if sys.platform == 'win32':
3240     import ctypes.wintypes
3241     import msvcrt
3242
3243     class OVERLAPPED(ctypes.Structure):
3244         _fields_ = [
3245             ('Internal', ctypes.wintypes.LPVOID),
3246             ('InternalHigh', ctypes.wintypes.LPVOID),
3247             ('Offset', ctypes.wintypes.DWORD),
3248             ('OffsetHigh', ctypes.wintypes.DWORD),
3249             ('hEvent', ctypes.wintypes.HANDLE),
3250         ]
3251
3252     kernel32 = ctypes.windll.kernel32
3253     LockFileEx = kernel32.LockFileEx
3254     LockFileEx.argtypes = [
3255         ctypes.wintypes.HANDLE,     # hFile
3256         ctypes.wintypes.DWORD,      # dwFlags
3257         ctypes.wintypes.DWORD,      # dwReserved
3258         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3259         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3260         ctypes.POINTER(OVERLAPPED)  # Overlapped
3261     ]
3262     LockFileEx.restype = ctypes.wintypes.BOOL
3263     UnlockFileEx = kernel32.UnlockFileEx
3264     UnlockFileEx.argtypes = [
3265         ctypes.wintypes.HANDLE,     # hFile
3266         ctypes.wintypes.DWORD,      # dwReserved
3267         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3268         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3269         ctypes.POINTER(OVERLAPPED)  # Overlapped
3270     ]
3271     UnlockFileEx.restype = ctypes.wintypes.BOOL
3272     whole_low = 0xffffffff
3273     whole_high = 0x7fffffff
3274
3275     def _lock_file(f, exclusive):
3276         overlapped = OVERLAPPED()
3277         overlapped.Offset = 0
3278         overlapped.OffsetHigh = 0
3279         overlapped.hEvent = 0
3280         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281         handle = msvcrt.get_osfhandle(f.fileno())
3282         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283                           whole_low, whole_high, f._lock_file_overlapped_p):
3284             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286     def _unlock_file(f):
3287         assert f._lock_file_overlapped_p
3288         handle = msvcrt.get_osfhandle(f.fileno())
3289         if not UnlockFileEx(handle, 0,
3290                             whole_low, whole_high, f._lock_file_overlapped_p):
3291             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293 else:
3294     # Some platforms, such as Jython, is missing fcntl
3295     try:
3296         import fcntl
3297
3298         def _lock_file(f, exclusive):
3299             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3300
3301         def _unlock_file(f):
3302             fcntl.flock(f, fcntl.LOCK_UN)
3303     except ImportError:
3304         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306         def _lock_file(f, exclusive):
3307             raise IOError(UNSUPPORTED_MSG)
3308
3309         def _unlock_file(f):
3310             raise IOError(UNSUPPORTED_MSG)
3311
3312
3313 class locked_file(object):
3314     def __init__(self, filename, mode, encoding=None):
3315         assert mode in ['r', 'a', 'w']
3316         self.f = io.open(filename, mode, encoding=encoding)
3317         self.mode = mode
3318
3319     def __enter__(self):
3320         exclusive = self.mode != 'r'
3321         try:
3322             _lock_file(self.f, exclusive)
3323         except IOError:
3324             self.f.close()
3325             raise
3326         return self
3327
3328     def __exit__(self, etype, value, traceback):
3329         try:
3330             _unlock_file(self.f)
3331         finally:
3332             self.f.close()
3333
3334     def __iter__(self):
3335         return iter(self.f)
3336
3337     def write(self, *args):
3338         return self.f.write(*args)
3339
3340     def read(self, *args):
3341         return self.f.read(*args)
3342
3343
3344 def get_filesystem_encoding():
3345     encoding = sys.getfilesystemencoding()
3346     return encoding if encoding is not None else 'utf-8'
3347
3348
3349 def shell_quote(args):
3350     quoted_args = []
3351     encoding = get_filesystem_encoding()
3352     for a in args:
3353         if isinstance(a, bytes):
3354             # We may get a filename encoded with 'encodeFilename'
3355             a = a.decode(encoding)
3356         quoted_args.append(compat_shlex_quote(a))
3357     return ' '.join(quoted_args)
3358
3359
3360 def smuggle_url(url, data):
3361     """ Pass additional data in a URL for internal use. """
3362
3363     url, idata = unsmuggle_url(url, {})
3364     data.update(idata)
3365     sdata = compat_urllib_parse_urlencode(
3366         {'__youtubedl_smuggle': json.dumps(data)})
3367     return url + '#' + sdata
3368
3369
3370 def unsmuggle_url(smug_url, default=None):
3371     if '#__youtubedl_smuggle' not in smug_url:
3372         return smug_url, default
3373     url, _, sdata = smug_url.rpartition('#')
3374     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3375     data = json.loads(jsond)
3376     return url, data
3377
3378
3379 def format_bytes(bytes):
3380     if bytes is None:
3381         return 'N/A'
3382     if type(bytes) is str:
3383         bytes = float(bytes)
3384     if bytes == 0.0:
3385         exponent = 0
3386     else:
3387         exponent = int(math.log(bytes, 1024.0))
3388     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3389     converted = float(bytes) / float(1024 ** exponent)
3390     return '%.2f%s' % (converted, suffix)
3391
3392
3393 def lookup_unit_table(unit_table, s):
3394     units_re = '|'.join(re.escape(u) for u in unit_table)
3395     m = re.match(
3396         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3397     if not m:
3398         return None
3399     num_str = m.group('num').replace(',', '.')
3400     mult = unit_table[m.group('unit')]
3401     return int(float(num_str) * mult)
3402
3403
3404 def parse_filesize(s):
3405     if s is None:
3406         return None
3407
3408     # The lower-case forms are of course incorrect and unofficial,
3409     # but we support those too
3410     _UNIT_TABLE = {
3411         'B': 1,
3412         'b': 1,
3413         'bytes': 1,
3414         'KiB': 1024,
3415         'KB': 1000,
3416         'kB': 1024,
3417         'Kb': 1000,
3418         'kb': 1000,
3419         'kilobytes': 1000,
3420         'kibibytes': 1024,
3421         'MiB': 1024 ** 2,
3422         'MB': 1000 ** 2,
3423         'mB': 1024 ** 2,
3424         'Mb': 1000 ** 2,
3425         'mb': 1000 ** 2,
3426         'megabytes': 1000 ** 2,
3427         'mebibytes': 1024 ** 2,
3428         'GiB': 1024 ** 3,
3429         'GB': 1000 ** 3,
3430         'gB': 1024 ** 3,
3431         'Gb': 1000 ** 3,
3432         'gb': 1000 ** 3,
3433         'gigabytes': 1000 ** 3,
3434         'gibibytes': 1024 ** 3,
3435         'TiB': 1024 ** 4,
3436         'TB': 1000 ** 4,
3437         'tB': 1024 ** 4,
3438         'Tb': 1000 ** 4,
3439         'tb': 1000 ** 4,
3440         'terabytes': 1000 ** 4,
3441         'tebibytes': 1024 ** 4,
3442         'PiB': 1024 ** 5,
3443         'PB': 1000 ** 5,
3444         'pB': 1024 ** 5,
3445         'Pb': 1000 ** 5,
3446         'pb': 1000 ** 5,
3447         'petabytes': 1000 ** 5,
3448         'pebibytes': 1024 ** 5,
3449         'EiB': 1024 ** 6,
3450         'EB': 1000 ** 6,
3451         'eB': 1024 ** 6,
3452         'Eb': 1000 ** 6,
3453         'eb': 1000 ** 6,
3454         'exabytes': 1000 ** 6,
3455         'exbibytes': 1024 ** 6,
3456         'ZiB': 1024 ** 7,
3457         'ZB': 1000 ** 7,
3458         'zB': 1024 ** 7,
3459         'Zb': 1000 ** 7,
3460         'zb': 1000 ** 7,
3461         'zettabytes': 1000 ** 7,
3462         'zebibytes': 1024 ** 7,
3463         'YiB': 1024 ** 8,
3464         'YB': 1000 ** 8,
3465         'yB': 1024 ** 8,
3466         'Yb': 1000 ** 8,
3467         'yb': 1000 ** 8,
3468         'yottabytes': 1000 ** 8,
3469         'yobibytes': 1024 ** 8,
3470     }
3471
3472     return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475 def parse_count(s):
3476     if s is None:
3477         return None
3478
3479     s = s.strip()
3480
3481     if re.match(r'^[\d,.]+$', s):
3482         return str_to_int(s)
3483
3484     _UNIT_TABLE = {
3485         'k': 1000,
3486         'K': 1000,
3487         'm': 1000 ** 2,
3488         'M': 1000 ** 2,
3489         'kk': 1000 ** 2,
3490         'KK': 1000 ** 2,
3491     }
3492
3493     return lookup_unit_table(_UNIT_TABLE, s)
3494
3495
3496 def parse_resolution(s):
3497     if s is None:
3498         return {}
3499
3500     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501     if mobj:
3502         return {
3503             'width': int(mobj.group('w')),
3504             'height': int(mobj.group('h')),
3505         }
3506
3507     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508     if mobj:
3509         return {'height': int(mobj.group(1))}
3510
3511     mobj = re.search(r'\b([48])[kK]\b', s)
3512     if mobj:
3513         return {'height': int(mobj.group(1)) * 540}
3514
3515     return {}
3516
3517
3518 def parse_bitrate(s):
3519     if not isinstance(s, compat_str):
3520         return
3521     mobj = re.search(r'\b(\d+)\s*kbps', s)
3522     if mobj:
3523         return int(mobj.group(1))
3524
3525
3526 def month_by_name(name, lang='en'):
3527     """ Return the number of a month by (locale-independently) English name """
3528
3529     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3530
3531     try:
3532         return month_names.index(name) + 1
3533     except ValueError:
3534         return None
3535
3536
3537 def month_by_abbreviation(abbrev):
3538     """ Return the number of a month by (locale-independently) English
3539         abbreviations """
3540
3541     try:
3542         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3543     except ValueError:
3544         return None
3545
3546
3547 def fix_xml_ampersands(xml_str):
3548     """Replace all the '&' by '&amp;' in XML"""
3549     return re.sub(
3550         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3551         '&amp;',
3552         xml_str)
3553
3554
3555 def setproctitle(title):
3556     assert isinstance(title, compat_str)
3557
3558     # ctypes in Jython is not complete
3559     # http://bugs.jython.org/issue2148
3560     if sys.platform.startswith('java'):
3561         return
3562
3563     try:
3564         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3565     except OSError:
3566         return
3567     except TypeError:
3568         # LoadLibrary in Windows Python 2.7.13 only expects
3569         # a bytestring, but since unicode_literals turns
3570         # every string into a unicode string, it fails.
3571         return
3572     title_bytes = title.encode('utf-8')
3573     buf = ctypes.create_string_buffer(len(title_bytes))
3574     buf.value = title_bytes
3575     try:
3576         libc.prctl(15, buf, 0, 0, 0)
3577     except AttributeError:
3578         return  # Strange libc, just skip this
3579
3580
3581 def remove_start(s, start):
3582     return s[len(start):] if s is not None and s.startswith(start) else s
3583
3584
3585 def remove_end(s, end):
3586     return s[:-len(end)] if s is not None and s.endswith(end) else s
3587
3588
3589 def remove_quotes(s):
3590     if s is None or len(s) < 2:
3591         return s
3592     for quote in ('"', "'", ):
3593         if s[0] == quote and s[-1] == quote:
3594             return s[1:-1]
3595     return s
3596
3597
3598 def get_domain(url):
3599     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600     return domain.group('domain') if domain else None
3601
3602
3603 def url_basename(url):
3604     path = compat_urlparse.urlparse(url).path
3605     return path.strip('/').split('/')[-1]
3606
3607
3608 def base_url(url):
3609     return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
3612 def urljoin(base, path):
3613     if isinstance(path, bytes):
3614         path = path.decode('utf-8')
3615     if not isinstance(path, compat_str) or not path:
3616         return None
3617     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3618         return path
3619     if isinstance(base, bytes):
3620         base = base.decode('utf-8')
3621     if not isinstance(base, compat_str) or not re.match(
3622             r'^(?:https?:)?//', base):
3623         return None
3624     return compat_urlparse.urljoin(base, path)
3625
3626
3627 class HEADRequest(compat_urllib_request.Request):
3628     def get_method(self):
3629         return 'HEAD'
3630
3631
3632 class PUTRequest(compat_urllib_request.Request):
3633     def get_method(self):
3634         return 'PUT'
3635
3636
3637 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3638     if get_attr:
3639         if v is not None:
3640             v = getattr(v, get_attr, None)
3641     if v == '':
3642         v = None
3643     if v is None:
3644         return default
3645     try:
3646         return int(v) * invscale // scale
3647     except (ValueError, TypeError):
3648         return default
3649
3650
3651 def str_or_none(v, default=None):
3652     return default if v is None else compat_str(v)
3653
3654
3655 def str_to_int(int_str):
3656     """ A more relaxed version of int_or_none """
3657     if isinstance(int_str, compat_integer_types):
3658         return int_str
3659     elif isinstance(int_str, compat_str):
3660         int_str = re.sub(r'[,\.\+]', '', int_str)
3661         return int_or_none(int_str)
3662
3663
3664 def float_or_none(v, scale=1, invscale=1, default=None):
3665     if v is None:
3666         return default
3667     try:
3668         return float(v) * invscale / scale
3669     except (ValueError, TypeError):
3670         return default
3671
3672
3673 def bool_or_none(v, default=None):
3674     return v if isinstance(v, bool) else default
3675
3676
3677 def strip_or_none(v, default=None):
3678     return v.strip() if isinstance(v, compat_str) else default
3679
3680
3681 def url_or_none(url):
3682     if not url or not isinstance(url, compat_str):
3683         return None
3684     url = url.strip()
3685     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3686
3687
3688 def strftime_or_none(timestamp, date_format, default=None):
3689     datetime_object = None
3690     try:
3691         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3692             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3694             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695         return datetime_object.strftime(date_format)
3696     except (ValueError, TypeError, AttributeError):
3697         return default
3698
3699
3700 def parse_duration(s):
3701     if not isinstance(s, compat_basestring):
3702         return None
3703
3704     s = s.strip()
3705
3706     days, hours, mins, secs, ms = [None] * 5
3707     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3708     if m:
3709         days, hours, mins, secs, ms = m.groups()
3710     else:
3711         m = re.match(
3712             r'''(?ix)(?:P?
3713                 (?:
3714                     [0-9]+\s*y(?:ears?)?\s*
3715                 )?
3716                 (?:
3717                     [0-9]+\s*m(?:onths?)?\s*
3718                 )?
3719                 (?:
3720                     [0-9]+\s*w(?:eeks?)?\s*
3721                 )?
3722                 (?:
3723                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3724                 )?
3725                 T)?
3726                 (?:
3727                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728                 )?
3729                 (?:
3730                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731                 )?
3732                 (?:
3733                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3734                 )?Z?$''', s)
3735         if m:
3736             days, hours, mins, secs, ms = m.groups()
3737         else:
3738             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3739             if m:
3740                 hours, mins = m.groups()
3741             else:
3742                 return None
3743
3744     duration = 0
3745     if secs:
3746         duration += float(secs)
3747     if mins:
3748         duration += float(mins) * 60
3749     if hours:
3750         duration += float(hours) * 60 * 60
3751     if days:
3752         duration += float(days) * 24 * 60 * 60
3753     if ms:
3754         duration += float(ms)
3755     return duration
3756
3757
3758 def prepend_extension(filename, ext, expected_real_ext=None):
3759     name, real_ext = os.path.splitext(filename)
3760     return (
3761         '{0}.{1}{2}'.format(name, ext, real_ext)
3762         if not expected_real_ext or real_ext[1:] == expected_real_ext
3763         else '{0}.{1}'.format(filename, ext))
3764
3765
3766 def replace_extension(filename, ext, expected_real_ext=None):
3767     name, real_ext = os.path.splitext(filename)
3768     return '{0}.{1}'.format(
3769         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770         ext)
3771
3772
3773 def check_executable(exe, args=[]):
3774     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775     args can be a list of arguments for a short output (like -version) """
3776     try:
3777         process_communicate_or_kill(subprocess.Popen(
3778             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3779     except OSError:
3780         return False
3781     return exe
3782
3783
3784 def get_exe_version(exe, args=['--version'],
3785                     version_re=None, unrecognized='present'):
3786     """ Returns the version of the specified executable,
3787     or False if the executable is not present """
3788     try:
3789         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3790         # SIGTTOU if yt-dlp is run in the background.
3791         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3792         out, _ = process_communicate_or_kill(subprocess.Popen(
3793             [encodeArgument(exe)] + args,
3794             stdin=subprocess.PIPE,
3795             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3796     except OSError:
3797         return False
3798     if isinstance(out, bytes):  # Python 2.x
3799         out = out.decode('ascii', 'ignore')
3800     return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803 def detect_exe_version(output, version_re=None, unrecognized='present'):
3804     assert isinstance(output, compat_str)
3805     if version_re is None:
3806         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807     m = re.search(version_re, output)
3808     if m:
3809         return m.group(1)
3810     else:
3811         return unrecognized
3812
3813
3814 class PagedList(object):
3815     def __len__(self):
3816         # This is only useful for tests
3817         return len(self.getslice())
3818
3819
3820 class OnDemandPagedList(PagedList):
3821     def __init__(self, pagefunc, pagesize, use_cache=True):
3822         self._pagefunc = pagefunc
3823         self._pagesize = pagesize
3824         self._use_cache = use_cache
3825         if use_cache:
3826             self._cache = {}
3827
3828     def getslice(self, start=0, end=None):
3829         res = []
3830         for pagenum in itertools.count(start // self._pagesize):
3831             firstid = pagenum * self._pagesize
3832             nextfirstid = pagenum * self._pagesize + self._pagesize
3833             if start >= nextfirstid:
3834                 continue
3835
3836             page_results = None
3837             if self._use_cache:
3838                 page_results = self._cache.get(pagenum)
3839             if page_results is None:
3840                 page_results = list(self._pagefunc(pagenum))
3841             if self._use_cache:
3842                 self._cache[pagenum] = page_results
3843
3844             startv = (
3845                 start % self._pagesize
3846                 if firstid <= start < nextfirstid
3847                 else 0)
3848
3849             endv = (
3850                 ((end - 1) % self._pagesize) + 1
3851                 if (end is not None and firstid <= end <= nextfirstid)
3852                 else None)
3853
3854             if startv != 0 or endv is not None:
3855                 page_results = page_results[startv:endv]
3856             res.extend(page_results)
3857
3858             # A little optimization - if current page is not "full", ie. does
3859             # not contain page_size videos then we can assume that this page
3860             # is the last one - there are no more ids on further pages -
3861             # i.e. no need to query again.
3862             if len(page_results) + startv < self._pagesize:
3863                 break
3864
3865             # If we got the whole page, but the next page is not interesting,
3866             # break out early as well
3867             if end == nextfirstid:
3868                 break
3869         return res
3870
3871
3872 class InAdvancePagedList(PagedList):
3873     def __init__(self, pagefunc, pagecount, pagesize):
3874         self._pagefunc = pagefunc
3875         self._pagecount = pagecount
3876         self._pagesize = pagesize
3877
3878     def getslice(self, start=0, end=None):
3879         res = []
3880         start_page = start // self._pagesize
3881         end_page = (
3882             self._pagecount if end is None else (end // self._pagesize + 1))
3883         skip_elems = start - start_page * self._pagesize
3884         only_more = None if end is None else end - start
3885         for pagenum in range(start_page, end_page):
3886             page = list(self._pagefunc(pagenum))
3887             if skip_elems:
3888                 page = page[skip_elems:]
3889                 skip_elems = None
3890             if only_more is not None:
3891                 if len(page) < only_more:
3892                     only_more -= len(page)
3893                 else:
3894                     page = page[:only_more]
3895                     res.extend(page)
3896                     break
3897             res.extend(page)
3898         return res
3899
3900
3901 def uppercase_escape(s):
3902     unicode_escape = codecs.getdecoder('unicode_escape')
3903     return re.sub(
3904         r'\\U[0-9a-fA-F]{8}',
3905         lambda m: unicode_escape(m.group(0))[0],
3906         s)
3907
3908
3909 def lowercase_escape(s):
3910     unicode_escape = codecs.getdecoder('unicode_escape')
3911     return re.sub(
3912         r'\\u[0-9a-fA-F]{4}',
3913         lambda m: unicode_escape(m.group(0))[0],
3914         s)
3915
3916
3917 def escape_rfc3986(s):
3918     """Escape non-ASCII characters as suggested by RFC 3986"""
3919     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3920         s = s.encode('utf-8')
3921     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3922
3923
3924 def escape_url(url):
3925     """Escape URL as suggested by RFC 3986"""
3926     url_parsed = compat_urllib_parse_urlparse(url)
3927     return url_parsed._replace(
3928         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3929         path=escape_rfc3986(url_parsed.path),
3930         params=escape_rfc3986(url_parsed.params),
3931         query=escape_rfc3986(url_parsed.query),
3932         fragment=escape_rfc3986(url_parsed.fragment)
3933     ).geturl()
3934
3935
3936 def read_batch_urls(batch_fd):
3937     def fixup(url):
3938         if not isinstance(url, compat_str):
3939             url = url.decode('utf-8', 'replace')
3940         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941         for bom in BOM_UTF8:
3942             if url.startswith(bom):
3943                 url = url[len(bom):]
3944         url = url.lstrip()
3945         if not url or url.startswith(('#', ';', ']')):
3946             return False
3947         # "#" cannot be stripped out since it is part of the URI
3948         # However, it can be safely stipped out if follwing a whitespace
3949         return re.split(r'\s#', url, 1)[0].rstrip()
3950
3951     with contextlib.closing(batch_fd) as fd:
3952         return [url for url in map(fixup, fd) if url]
3953
3954
3955 def urlencode_postdata(*args, **kargs):
3956     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3957
3958
3959 def update_url_query(url, query):
3960     if not query:
3961         return url
3962     parsed_url = compat_urlparse.urlparse(url)
3963     qs = compat_parse_qs(parsed_url.query)
3964     qs.update(query)
3965     return compat_urlparse.urlunparse(parsed_url._replace(
3966         query=compat_urllib_parse_urlencode(qs, True)))
3967
3968
3969 def update_Request(req, url=None, data=None, headers={}, query={}):
3970     req_headers = req.headers.copy()
3971     req_headers.update(headers)
3972     req_data = data or req.data
3973     req_url = update_url_query(url or req.get_full_url(), query)
3974     req_get_method = req.get_method()
3975     if req_get_method == 'HEAD':
3976         req_type = HEADRequest
3977     elif req_get_method == 'PUT':
3978         req_type = PUTRequest
3979     else:
3980         req_type = compat_urllib_request.Request
3981     new_req = req_type(
3982         req_url, data=req_data, headers=req_headers,
3983         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984     if hasattr(req, 'timeout'):
3985         new_req.timeout = req.timeout
3986     return new_req
3987
3988
3989 def _multipart_encode_impl(data, boundary):
3990     content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992     out = b''
3993     for k, v in data.items():
3994         out += b'--' + boundary.encode('ascii') + b'\r\n'
3995         if isinstance(k, compat_str):
3996             k = k.encode('utf-8')
3997         if isinstance(v, compat_str):
3998             v = v.encode('utf-8')
3999         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4001         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4002         if boundary.encode('ascii') in content:
4003             raise ValueError('Boundary overlaps with data')
4004         out += content
4005
4006     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008     return out, content_type
4009
4010
4011 def multipart_encode(data, boundary=None):
4012     '''
4013     Encode a dict to RFC 7578-compliant form-data
4014
4015     data:
4016         A dict where keys and values can be either Unicode or bytes-like
4017         objects.
4018     boundary:
4019         If specified a Unicode object, it's used as the boundary. Otherwise
4020         a random boundary is generated.
4021
4022     Reference: https://tools.ietf.org/html/rfc7578
4023     '''
4024     has_specified_boundary = boundary is not None
4025
4026     while True:
4027         if boundary is None:
4028             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030         try:
4031             out, content_type = _multipart_encode_impl(data, boundary)
4032             break
4033         except ValueError:
4034             if has_specified_boundary:
4035                 raise
4036             boundary = None
4037
4038     return out, content_type
4039
4040
4041 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4042     if isinstance(key_or_keys, (list, tuple)):
4043         for key in key_or_keys:
4044             if key not in d or d[key] is None or skip_false_values and not d[key]:
4045                 continue
4046             return d[key]
4047         return default
4048     return d.get(key_or_keys, default)
4049
4050
4051 def try_get(src, getter, expected_type=None):
4052     if not isinstance(getter, (list, tuple)):
4053         getter = [getter]
4054     for get in getter:
4055         try:
4056             v = get(src)
4057         except (AttributeError, KeyError, TypeError, IndexError):
4058             pass
4059         else:
4060             if expected_type is None or isinstance(v, expected_type):
4061                 return v
4062
4063
4064 def merge_dicts(*dicts):
4065     merged = {}
4066     for a_dict in dicts:
4067         for k, v in a_dict.items():
4068             if v is None:
4069                 continue
4070             if (k not in merged
4071                     or (isinstance(v, compat_str) and v
4072                         and isinstance(merged[k], compat_str)
4073                         and not merged[k])):
4074                 merged[k] = v
4075     return merged
4076
4077
4078 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
4081
4082 US_RATINGS = {
4083     'G': 0,
4084     'PG': 10,
4085     'PG-13': 13,
4086     'R': 16,
4087     'NC': 18,
4088 }
4089
4090
4091 TV_PARENTAL_GUIDELINES = {
4092     'TV-Y': 0,
4093     'TV-Y7': 7,
4094     'TV-G': 0,
4095     'TV-PG': 0,
4096     'TV-14': 14,
4097     'TV-MA': 17,
4098 }
4099
4100
4101 def parse_age_limit(s):
4102     if type(s) == int:
4103         return s if 0 <= s <= 21 else None
4104     if not isinstance(s, compat_basestring):
4105         return None
4106     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4107     if m:
4108         return int(m.group('age'))
4109     if s in US_RATINGS:
4110         return US_RATINGS[s]
4111     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4112     if m:
4113         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4114     return None
4115
4116
4117 def strip_jsonp(code):
4118     return re.sub(
4119         r'''(?sx)^
4120             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4121             (?:\s*&&\s*(?P=func_name))?
4122             \s*\(\s*(?P<callback_data>.*)\);?
4123             \s*?(?://[^\n]*)*$''',
4124         r'\g<callback_data>', code)
4125
4126
4127 def js_to_json(code, vars={}):
4128     # vars is a dict of var, val pairs to substitute
4129     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4130     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4131     INTEGER_TABLE = (
4132         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4133         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4134     )
4135
4136     def fix_kv(m):
4137         v = m.group(0)
4138         if v in ('true', 'false', 'null'):
4139             return v
4140         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4141             return ""
4142
4143         if v[0] in ("'", '"'):
4144             v = re.sub(r'(?s)\\.|"', lambda m: {
4145                 '"': '\\"',
4146                 "\\'": "'",
4147                 '\\\n': '',
4148                 '\\x': '\\u00',
4149             }.get(m.group(0), m.group(0)), v[1:-1])
4150         else:
4151             for regex, base in INTEGER_TABLE:
4152                 im = re.match(regex, v)
4153                 if im:
4154                     i = int(im.group(1), base)
4155                     return '"%d":' % i if v.endswith(':') else '%d' % i
4156
4157             if v in vars:
4158                 return vars[v]
4159
4160         return '"%s"' % v
4161
4162     return re.sub(r'''(?sx)
4163         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4164         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4165         {comment}|,(?={skip}[\]}}])|
4166         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4167         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4168         [0-9]+(?={skip}:)|
4169         !+
4170         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4171
4172
4173 def qualities(quality_ids):
4174     """ Get a numeric quality value out of a list of possible values """
4175     def q(qid):
4176         try:
4177             return quality_ids.index(qid)
4178         except ValueError:
4179             return -1
4180     return q
4181
4182
4183 DEFAULT_OUTTMPL = {
4184     'default': '%(title)s [%(id)s].%(ext)s',
4185     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4186 }
4187 OUTTMPL_TYPES = {
4188     'chapter': None,
4189     'subtitle': None,
4190     'thumbnail': None,
4191     'description': 'description',
4192     'annotation': 'annotations.xml',
4193     'infojson': 'info.json',
4194     'pl_description': 'description',
4195     'pl_infojson': 'info.json',
4196 }
4197
4198
4199 def limit_length(s, length):
4200     """ Add ellipses to overly long strings """
4201     if s is None:
4202         return None
4203     ELLIPSES = '...'
4204     if len(s) > length:
4205         return s[:length - len(ELLIPSES)] + ELLIPSES
4206     return s
4207
4208
4209 def version_tuple(v):
4210     return tuple(int(e) for e in re.split(r'[-.]', v))
4211
4212
4213 def is_outdated_version(version, limit, assume_new=True):
4214     if not version:
4215         return not assume_new
4216     try:
4217         return version_tuple(version) < version_tuple(limit)
4218     except ValueError:
4219         return not assume_new
4220
4221
4222 def ytdl_is_updateable():
4223     """ Returns if yt-dlp can be updated with -U """
4224     return False
4225
4226     from zipimport import zipimporter
4227
4228     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4229
4230
4231 def args_to_str(args):
4232     # Get a short string representation for a subprocess command
4233     return ' '.join(compat_shlex_quote(a) for a in args)
4234
4235
4236 def error_to_compat_str(err):
4237     err_str = str(err)
4238     # On python 2 error byte string must be decoded with proper
4239     # encoding rather than ascii
4240     if sys.version_info[0] < 3:
4241         err_str = err_str.decode(preferredencoding())
4242     return err_str
4243
4244
4245 def mimetype2ext(mt):
4246     if mt is None:
4247         return None
4248
4249     ext = {
4250         'audio/mp4': 'm4a',
4251         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4252         # it's the most popular one
4253         'audio/mpeg': 'mp3',
4254         'audio/x-wav': 'wav',
4255     }.get(mt)
4256     if ext is not None:
4257         return ext
4258
4259     _, _, res = mt.rpartition('/')
4260     res = res.split(';')[0].strip().lower()
4261
4262     return {
4263         '3gpp': '3gp',
4264         'smptett+xml': 'tt',
4265         'ttaf+xml': 'dfxp',
4266         'ttml+xml': 'ttml',
4267         'x-flv': 'flv',
4268         'x-mp4-fragmented': 'mp4',
4269         'x-ms-sami': 'sami',
4270         'x-ms-wmv': 'wmv',
4271         'mpegurl': 'm3u8',
4272         'x-mpegurl': 'm3u8',
4273         'vnd.apple.mpegurl': 'm3u8',
4274         'dash+xml': 'mpd',
4275         'f4m+xml': 'f4m',
4276         'hds+xml': 'f4m',
4277         'vnd.ms-sstr+xml': 'ism',
4278         'quicktime': 'mov',
4279         'mp2t': 'ts',
4280         'x-wav': 'wav',
4281     }.get(res, res)
4282
4283
4284 def parse_codecs(codecs_str):
4285     # http://tools.ietf.org/html/rfc6381
4286     if not codecs_str:
4287         return {}
4288     split_codecs = list(filter(None, map(
4289         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4290     vcodec, acodec = None, None
4291     for full_codec in split_codecs:
4292         codec = full_codec.split('.')[0]
4293         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4294             if not vcodec:
4295                 vcodec = full_codec
4296         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4297             if not acodec:
4298                 acodec = full_codec
4299         else:
4300             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4301     if not vcodec and not acodec:
4302         if len(split_codecs) == 2:
4303             return {
4304                 'vcodec': split_codecs[0],
4305                 'acodec': split_codecs[1],
4306             }
4307     else:
4308         return {
4309             'vcodec': vcodec or 'none',
4310             'acodec': acodec or 'none',
4311         }
4312     return {}
4313
4314
4315 def urlhandle_detect_ext(url_handle):
4316     getheader = url_handle.headers.get
4317
4318     cd = getheader('Content-Disposition')
4319     if cd:
4320         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4321         if m:
4322             e = determine_ext(m.group('filename'), default_ext=None)
4323             if e:
4324                 return e
4325
4326     return mimetype2ext(getheader('Content-Type'))
4327
4328
4329 def encode_data_uri(data, mime_type):
4330     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4331
4332
4333 def age_restricted(content_limit, age_limit):
4334     """ Returns True iff the content should be blocked """
4335
4336     if age_limit is None:  # No limit set
4337         return False
4338     if content_limit is None:
4339         return False  # Content available for everyone
4340     return age_limit < content_limit
4341
4342
4343 def is_html(first_bytes):
4344     """ Detect whether a file contains HTML by examining its first bytes. """
4345
4346     BOMS = [
4347         (b'\xef\xbb\xbf', 'utf-8'),
4348         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4349         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4350         (b'\xff\xfe', 'utf-16-le'),
4351         (b'\xfe\xff', 'utf-16-be'),
4352     ]
4353     for bom, enc in BOMS:
4354         if first_bytes.startswith(bom):
4355             s = first_bytes[len(bom):].decode(enc, 'replace')
4356             break
4357     else:
4358         s = first_bytes.decode('utf-8', 'replace')
4359
4360     return re.match(r'^\s*<', s)
4361
4362
4363 def determine_protocol(info_dict):
4364     protocol = info_dict.get('protocol')
4365     if protocol is not None:
4366         return protocol
4367
4368     url = info_dict['url']
4369     if url.startswith('rtmp'):
4370         return 'rtmp'
4371     elif url.startswith('mms'):
4372         return 'mms'
4373     elif url.startswith('rtsp'):
4374         return 'rtsp'
4375
4376     ext = determine_ext(url)
4377     if ext == 'm3u8':
4378         return 'm3u8'
4379     elif ext == 'f4m':
4380         return 'f4m'
4381
4382     return compat_urllib_parse_urlparse(url).scheme
4383
4384
4385 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4386     """ Render a list of rows, each as a list of values """
4387
4388     def get_max_lens(table):
4389         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4390
4391     def filter_using_list(row, filterArray):
4392         return [col for (take, col) in zip(filterArray, row) if take]
4393
4394     if hideEmpty:
4395         max_lens = get_max_lens(data)
4396         header_row = filter_using_list(header_row, max_lens)
4397         data = [filter_using_list(row, max_lens) for row in data]
4398
4399     table = [header_row] + data
4400     max_lens = get_max_lens(table)
4401     if delim:
4402         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4403     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4404     return '\n'.join(format_str % tuple(row) for row in table)
4405
4406
4407 def _match_one(filter_part, dct):
4408     COMPARISON_OPERATORS = {
4409         '<': operator.lt,
4410         '<=': operator.le,
4411         '>': operator.gt,
4412         '>=': operator.ge,
4413         '=': operator.eq,
4414         '!=': operator.ne,
4415     }
4416     operator_rex = re.compile(r'''(?x)\s*
4417         (?P<key>[a-z_]+)
4418         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4419         (?:
4420             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4421             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4422             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4423         )
4424         \s*$
4425         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4426     m = operator_rex.search(filter_part)
4427     if m:
4428         op = COMPARISON_OPERATORS[m.group('op')]
4429         actual_value = dct.get(m.group('key'))
4430         if (m.group('quotedstrval') is not None
4431             or m.group('strval') is not None
4432             # If the original field is a string and matching comparisonvalue is
4433             # a number we should respect the origin of the original field
4434             # and process comparison value as a string (see
4435             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4436             or actual_value is not None and m.group('intval') is not None
4437                 and isinstance(actual_value, compat_str)):
4438             if m.group('op') not in ('=', '!='):
4439                 raise ValueError(
4440                     'Operator %s does not support string values!' % m.group('op'))
4441             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4442             quote = m.group('quote')
4443             if quote is not None:
4444                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4445         else:
4446             try:
4447                 comparison_value = int(m.group('intval'))
4448             except ValueError:
4449                 comparison_value = parse_filesize(m.group('intval'))
4450                 if comparison_value is None:
4451                     comparison_value = parse_filesize(m.group('intval') + 'B')
4452                 if comparison_value is None:
4453                     raise ValueError(
4454                         'Invalid integer value %r in filter part %r' % (
4455                             m.group('intval'), filter_part))
4456         if actual_value is None:
4457             return m.group('none_inclusive')
4458         return op(actual_value, comparison_value)
4459
4460     UNARY_OPERATORS = {
4461         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4462         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4463     }
4464     operator_rex = re.compile(r'''(?x)\s*
4465         (?P<op>%s)\s*(?P<key>[a-z_]+)
4466         \s*$
4467         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4468     m = operator_rex.search(filter_part)
4469     if m:
4470         op = UNARY_OPERATORS[m.group('op')]
4471         actual_value = dct.get(m.group('key'))
4472         return op(actual_value)
4473
4474     raise ValueError('Invalid filter part %r' % filter_part)
4475
4476
4477 def match_str(filter_str, dct):
4478     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4479
4480     return all(
4481         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4482
4483
4484 def match_filter_func(filter_str):
4485     def _match_func(info_dict):
4486         if match_str(filter_str, info_dict):
4487             return None
4488         else:
4489             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4490             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4491     return _match_func
4492
4493
4494 def parse_dfxp_time_expr(time_expr):
4495     if not time_expr:
4496         return
4497
4498     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4499     if mobj:
4500         return float(mobj.group('time_offset'))
4501
4502     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4503     if mobj:
4504         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4505
4506
4507 def srt_subtitles_timecode(seconds):
4508     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4509
4510
4511 def dfxp2srt(dfxp_data):
4512     '''
4513     @param dfxp_data A bytes-like object containing DFXP data
4514     @returns A unicode object containing converted SRT data
4515     '''
4516     LEGACY_NAMESPACES = (
4517         (b'http://www.w3.org/ns/ttml', [
4518             b'http://www.w3.org/2004/11/ttaf1',
4519             b'http://www.w3.org/2006/04/ttaf1',
4520             b'http://www.w3.org/2006/10/ttaf1',
4521         ]),
4522         (b'http://www.w3.org/ns/ttml#styling', [
4523             b'http://www.w3.org/ns/ttml#style',
4524         ]),
4525     )
4526
4527     SUPPORTED_STYLING = [
4528         'color',
4529         'fontFamily',
4530         'fontSize',
4531         'fontStyle',
4532         'fontWeight',
4533         'textDecoration'
4534     ]
4535
4536     _x = functools.partial(xpath_with_ns, ns_map={
4537         'xml': 'http://www.w3.org/XML/1998/namespace',
4538         'ttml': 'http://www.w3.org/ns/ttml',
4539         'tts': 'http://www.w3.org/ns/ttml#styling',
4540     })
4541
4542     styles = {}
4543     default_style = {}
4544
4545     class TTMLPElementParser(object):
4546         _out = ''
4547         _unclosed_elements = []
4548         _applied_styles = []
4549
4550         def start(self, tag, attrib):
4551             if tag in (_x('ttml:br'), 'br'):
4552                 self._out += '\n'
4553             else:
4554                 unclosed_elements = []
4555                 style = {}
4556                 element_style_id = attrib.get('style')
4557                 if default_style:
4558                     style.update(default_style)
4559                 if element_style_id:
4560                     style.update(styles.get(element_style_id, {}))
4561                 for prop in SUPPORTED_STYLING:
4562                     prop_val = attrib.get(_x('tts:' + prop))
4563                     if prop_val:
4564                         style[prop] = prop_val
4565                 if style:
4566                     font = ''
4567                     for k, v in sorted(style.items()):
4568                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4569                             continue
4570                         if k == 'color':
4571                             font += ' color="%s"' % v
4572                         elif k == 'fontSize':
4573                             font += ' size="%s"' % v
4574                         elif k == 'fontFamily':
4575                             font += ' face="%s"' % v
4576                         elif k == 'fontWeight' and v == 'bold':
4577                             self._out += '<b>'
4578                             unclosed_elements.append('b')
4579                         elif k == 'fontStyle' and v == 'italic':
4580                             self._out += '<i>'
4581                             unclosed_elements.append('i')
4582                         elif k == 'textDecoration' and v == 'underline':
4583                             self._out += '<u>'
4584                             unclosed_elements.append('u')
4585                     if font:
4586                         self._out += '<font' + font + '>'
4587                         unclosed_elements.append('font')
4588                     applied_style = {}
4589                     if self._applied_styles:
4590                         applied_style.update(self._applied_styles[-1])
4591                     applied_style.update(style)
4592                     self._applied_styles.append(applied_style)
4593                 self._unclosed_elements.append(unclosed_elements)
4594
4595         def end(self, tag):
4596             if tag not in (_x('ttml:br'), 'br'):
4597                 unclosed_elements = self._unclosed_elements.pop()
4598                 for element in reversed(unclosed_elements):
4599                     self._out += '</%s>' % element
4600                 if unclosed_elements and self._applied_styles:
4601                     self._applied_styles.pop()
4602
4603         def data(self, data):
4604             self._out += data
4605
4606         def close(self):
4607             return self._out.strip()
4608
4609     def parse_node(node):
4610         target = TTMLPElementParser()
4611         parser = xml.etree.ElementTree.XMLParser(target=target)
4612         parser.feed(xml.etree.ElementTree.tostring(node))
4613         return parser.close()
4614
4615     for k, v in LEGACY_NAMESPACES:
4616         for ns in v:
4617             dfxp_data = dfxp_data.replace(ns, k)
4618
4619     dfxp = compat_etree_fromstring(dfxp_data)
4620     out = []
4621     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4622
4623     if not paras:
4624         raise ValueError('Invalid dfxp/TTML subtitle')
4625
4626     repeat = False
4627     while True:
4628         for style in dfxp.findall(_x('.//ttml:style')):
4629             style_id = style.get('id') or style.get(_x('xml:id'))
4630             if not style_id:
4631                 continue
4632             parent_style_id = style.get('style')
4633             if parent_style_id:
4634                 if parent_style_id not in styles:
4635                     repeat = True
4636                     continue
4637                 styles[style_id] = styles[parent_style_id].copy()
4638             for prop in SUPPORTED_STYLING:
4639                 prop_val = style.get(_x('tts:' + prop))
4640                 if prop_val:
4641                     styles.setdefault(style_id, {})[prop] = prop_val
4642         if repeat:
4643             repeat = False
4644         else:
4645             break
4646
4647     for p in ('body', 'div'):
4648         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4649         if ele is None:
4650             continue
4651         style = styles.get(ele.get('style'))
4652         if not style:
4653             continue
4654         default_style.update(style)
4655
4656     for para, index in zip(paras, itertools.count(1)):
4657         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4658         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4659         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4660         if begin_time is None:
4661             continue
4662         if not end_time:
4663             if not dur:
4664                 continue
4665             end_time = begin_time + dur
4666         out.append('%d\n%s --> %s\n%s\n\n' % (
4667             index,
4668             srt_subtitles_timecode(begin_time),
4669             srt_subtitles_timecode(end_time),
4670             parse_node(para)))
4671
4672     return ''.join(out)
4673
4674
4675 def cli_option(params, command_option, param):
4676     param = params.get(param)
4677     if param:
4678         param = compat_str(param)
4679     return [command_option, param] if param is not None else []
4680
4681
4682 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4683     param = params.get(param)
4684     if param is None:
4685         return []
4686     assert isinstance(param, bool)
4687     if separator:
4688         return [command_option + separator + (true_value if param else false_value)]
4689     return [command_option, true_value if param else false_value]
4690
4691
4692 def cli_valueless_option(params, command_option, param, expected_value=True):
4693     param = params.get(param)
4694     return [command_option] if param == expected_value else []
4695
4696
4697 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4698     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4699         if use_compat:
4700             return argdict
4701         else:
4702             argdict = None
4703     if argdict is None:
4704         return default
4705     assert isinstance(argdict, dict)
4706
4707     assert isinstance(keys, (list, tuple))
4708     for key_list in keys:
4709         if isinstance(key_list, compat_str):
4710             key_list = (key_list,)
4711         arg_list = list(filter(
4712             lambda x: x is not None,
4713             [argdict.get(key.lower()) for key in key_list]))
4714         if arg_list:
4715             return [arg for args in arg_list for arg in args]
4716     return default
4717
4718
4719 class ISO639Utils(object):
4720     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4721     _lang_map = {
4722         'aa': 'aar',
4723         'ab': 'abk',
4724         'ae': 'ave',
4725         'af': 'afr',
4726         'ak': 'aka',
4727         'am': 'amh',
4728         'an': 'arg',
4729         'ar': 'ara',
4730         'as': 'asm',
4731         'av': 'ava',
4732         'ay': 'aym',
4733         'az': 'aze',
4734         'ba': 'bak',
4735         'be': 'bel',
4736         'bg': 'bul',
4737         'bh': 'bih',
4738         'bi': 'bis',
4739         'bm': 'bam',
4740         'bn': 'ben',
4741         'bo': 'bod',
4742         'br': 'bre',
4743         'bs': 'bos',
4744         'ca': 'cat',
4745         'ce': 'che',
4746         'ch': 'cha',
4747         'co': 'cos',
4748         'cr': 'cre',
4749         'cs': 'ces',
4750         'cu': 'chu',
4751         'cv': 'chv',
4752         'cy': 'cym',
4753         'da': 'dan',
4754         'de': 'deu',
4755         'dv': 'div',
4756         'dz': 'dzo',
4757         'ee': 'ewe',
4758         'el': 'ell',
4759         'en': 'eng',
4760         'eo': 'epo',
4761         'es': 'spa',
4762         'et': 'est',
4763         'eu': 'eus',
4764         'fa': 'fas',
4765         'ff': 'ful',
4766         'fi': 'fin',
4767         'fj': 'fij',
4768         'fo': 'fao',
4769         'fr': 'fra',
4770         'fy': 'fry',
4771         'ga': 'gle',
4772         'gd': 'gla',
4773         'gl': 'glg',
4774         'gn': 'grn',
4775         'gu': 'guj',
4776         'gv': 'glv',
4777         'ha': 'hau',
4778         'he': 'heb',
4779         'iw': 'heb',  # Replaced by he in 1989 revision
4780         'hi': 'hin',
4781         'ho': 'hmo',
4782         'hr': 'hrv',
4783         'ht': 'hat',
4784         'hu': 'hun',
4785         'hy': 'hye',
4786         'hz': 'her',
4787         'ia': 'ina',
4788         'id': 'ind',
4789         'in': 'ind',  # Replaced by id in 1989 revision
4790         'ie': 'ile',
4791         'ig': 'ibo',
4792         'ii': 'iii',
4793         'ik': 'ipk',
4794         'io': 'ido',
4795         'is': 'isl',
4796         'it': 'ita',
4797         'iu': 'iku',
4798         'ja': 'jpn',
4799         'jv': 'jav',
4800         'ka': 'kat',
4801         'kg': 'kon',
4802         'ki': 'kik',
4803         'kj': 'kua',
4804         'kk': 'kaz',
4805         'kl': 'kal',
4806         'km': 'khm',
4807         'kn': 'kan',
4808         'ko': 'kor',
4809         'kr': 'kau',
4810         'ks': 'kas',
4811         'ku': 'kur',
4812         'kv': 'kom',
4813         'kw': 'cor',
4814         'ky': 'kir',
4815         'la': 'lat',
4816         'lb': 'ltz',
4817         'lg': 'lug',
4818         'li': 'lim',
4819         'ln': 'lin',
4820         'lo': 'lao',
4821         'lt': 'lit',
4822         'lu': 'lub',
4823         'lv': 'lav',
4824         'mg': 'mlg',
4825         'mh': 'mah',
4826         'mi': 'mri',
4827         'mk': 'mkd',
4828         'ml': 'mal',
4829         'mn': 'mon',
4830         'mr': 'mar',
4831         'ms': 'msa',
4832         'mt': 'mlt',
4833         'my': 'mya',
4834         'na': 'nau',
4835         'nb': 'nob',
4836         'nd': 'nde',
4837         'ne': 'nep',
4838         'ng': 'ndo',
4839         'nl': 'nld',
4840         'nn': 'nno',
4841         'no': 'nor',
4842         'nr': 'nbl',
4843         'nv': 'nav',
4844         'ny': 'nya',
4845         'oc': 'oci',
4846         'oj': 'oji',
4847         'om': 'orm',
4848         'or': 'ori',
4849         'os': 'oss',
4850         'pa': 'pan',
4851         'pi': 'pli',
4852         'pl': 'pol',
4853         'ps': 'pus',
4854         'pt': 'por',
4855         'qu': 'que',
4856         'rm': 'roh',
4857         'rn': 'run',
4858         'ro': 'ron',
4859         'ru': 'rus',
4860         'rw': 'kin',
4861         'sa': 'san',
4862         'sc': 'srd',
4863         'sd': 'snd',
4864         'se': 'sme',
4865         'sg': 'sag',
4866         'si': 'sin',
4867         'sk': 'slk',
4868         'sl': 'slv',
4869         'sm': 'smo',
4870         'sn': 'sna',
4871         'so': 'som',
4872         'sq': 'sqi',
4873         'sr': 'srp',
4874         'ss': 'ssw',
4875         'st': 'sot',
4876         'su': 'sun',
4877         'sv': 'swe',
4878         'sw': 'swa',
4879         'ta': 'tam',
4880         'te': 'tel',
4881         'tg': 'tgk',
4882         'th': 'tha',
4883         'ti': 'tir',
4884         'tk': 'tuk',
4885         'tl': 'tgl',
4886         'tn': 'tsn',
4887         'to': 'ton',
4888         'tr': 'tur',
4889         'ts': 'tso',
4890         'tt': 'tat',
4891         'tw': 'twi',
4892         'ty': 'tah',
4893         'ug': 'uig',
4894         'uk': 'ukr',
4895         'ur': 'urd',
4896         'uz': 'uzb',
4897         've': 'ven',
4898         'vi': 'vie',
4899         'vo': 'vol',
4900         'wa': 'wln',
4901         'wo': 'wol',
4902         'xh': 'xho',
4903         'yi': 'yid',
4904         'ji': 'yid',  # Replaced by yi in 1989 revision
4905         'yo': 'yor',
4906         'za': 'zha',
4907         'zh': 'zho',
4908         'zu': 'zul',
4909     }
4910
4911     @classmethod
4912     def short2long(cls, code):
4913         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4914         return cls._lang_map.get(code[:2])
4915
4916     @classmethod
4917     def long2short(cls, code):
4918         """Convert language code from ISO 639-2/T to ISO 639-1"""
4919         for short_name, long_name in cls._lang_map.items():
4920             if long_name == code:
4921                 return short_name
4922
4923
4924 class ISO3166Utils(object):
4925     # From http://data.okfn.org/data/core/country-list
4926     _country_map = {
4927         'AF': 'Afghanistan',
4928         'AX': 'Åland Islands',
4929         'AL': 'Albania',
4930         'DZ': 'Algeria',
4931         'AS': 'American Samoa',
4932         'AD': 'Andorra',
4933         'AO': 'Angola',
4934         'AI': 'Anguilla',
4935         'AQ': 'Antarctica',
4936         'AG': 'Antigua and Barbuda',
4937         'AR': 'Argentina',
4938         'AM': 'Armenia',
4939         'AW': 'Aruba',
4940         'AU': 'Australia',
4941         'AT': 'Austria',
4942         'AZ': 'Azerbaijan',
4943         'BS': 'Bahamas',
4944         'BH': 'Bahrain',
4945         'BD': 'Bangladesh',
4946         'BB': 'Barbados',
4947         'BY': 'Belarus',
4948         'BE': 'Belgium',
4949         'BZ': 'Belize',
4950         'BJ': 'Benin',
4951         'BM': 'Bermuda',
4952         'BT': 'Bhutan',
4953         'BO': 'Bolivia, Plurinational State of',
4954         'BQ': 'Bonaire, Sint Eustatius and Saba',
4955         'BA': 'Bosnia and Herzegovina',
4956         'BW': 'Botswana',
4957         'BV': 'Bouvet Island',
4958         'BR': 'Brazil',
4959         'IO': 'British Indian Ocean Territory',
4960         'BN': 'Brunei Darussalam',
4961         'BG': 'Bulgaria',
4962         'BF': 'Burkina Faso',
4963         'BI': 'Burundi',
4964         'KH': 'Cambodia',
4965         'CM': 'Cameroon',
4966         'CA': 'Canada',
4967         'CV': 'Cape Verde',
4968         'KY': 'Cayman Islands',
4969         'CF': 'Central African Republic',
4970         'TD': 'Chad',
4971         'CL': 'Chile',
4972         'CN': 'China',
4973         'CX': 'Christmas Island',
4974         'CC': 'Cocos (Keeling) Islands',
4975         'CO': 'Colombia',
4976         'KM': 'Comoros',
4977         'CG': 'Congo',
4978         'CD': 'Congo, the Democratic Republic of the',
4979         'CK': 'Cook Islands',
4980         'CR': 'Costa Rica',
4981         'CI': 'Côte d\'Ivoire',
4982         'HR': 'Croatia',
4983         'CU': 'Cuba',
4984         'CW': 'Curaçao',
4985         'CY': 'Cyprus',
4986         'CZ': 'Czech Republic',
4987         'DK': 'Denmark',
4988         'DJ': 'Djibouti',
4989         'DM': 'Dominica',
4990         'DO': 'Dominican Republic',
4991         'EC': 'Ecuador',
4992         'EG': 'Egypt',
4993         'SV': 'El Salvador',
4994         'GQ': 'Equatorial Guinea',
4995         'ER': 'Eritrea',
4996         'EE': 'Estonia',
4997         'ET': 'Ethiopia',
4998         'FK': 'Falkland Islands (Malvinas)',
4999         'FO': 'Faroe Islands',
5000         'FJ': 'Fiji',
5001         'FI': 'Finland',
5002         'FR': 'France',
5003         'GF': 'French Guiana',
5004         'PF': 'French Polynesia',
5005         'TF': 'French Southern Territories',
5006         'GA': 'Gabon',
5007         'GM': 'Gambia',
5008         'GE': 'Georgia',
5009         'DE': 'Germany',
5010         'GH': 'Ghana',
5011         'GI': 'Gibraltar',
5012         'GR': 'Greece',
5013         'GL': 'Greenland',
5014         'GD': 'Grenada',
5015         'GP': 'Guadeloupe',
5016         'GU': 'Guam',
5017         'GT': 'Guatemala',
5018         'GG': 'Guernsey',
5019         'GN': 'Guinea',
5020         'GW': 'Guinea-Bissau',
5021         'GY': 'Guyana',
5022         'HT': 'Haiti',
5023         'HM': 'Heard Island and McDonald Islands',
5024         'VA': 'Holy See (Vatican City State)',
5025         'HN': 'Honduras',
5026         'HK': 'Hong Kong',
5027         'HU': 'Hungary',
5028         'IS': 'Iceland',
5029         'IN': 'India',
5030         'ID': 'Indonesia',
5031         'IR': 'Iran, Islamic Republic of',
5032         'IQ': 'Iraq',
5033         'IE': 'Ireland',
5034         'IM': 'Isle of Man',
5035         'IL': 'Israel',
5036         'IT': 'Italy',
5037         'JM': 'Jamaica',
5038         'JP': 'Japan',
5039         'JE': 'Jersey',
5040         'JO': 'Jordan',
5041         'KZ': 'Kazakhstan',
5042         'KE': 'Kenya',
5043         'KI': 'Kiribati',
5044         'KP': 'Korea, Democratic People\'s Republic of',
5045         'KR': 'Korea, Republic of',
5046         'KW': 'Kuwait',
5047         'KG': 'Kyrgyzstan',
5048         'LA': 'Lao People\'s Democratic Republic',
5049         'LV': 'Latvia',
5050         'LB': 'Lebanon',
5051         'LS': 'Lesotho',
5052         'LR': 'Liberia',
5053         'LY': 'Libya',
5054         'LI': 'Liechtenstein',
5055         'LT': 'Lithuania',
5056         'LU': 'Luxembourg',
5057         'MO': 'Macao',
5058         'MK': 'Macedonia, the Former Yugoslav Republic of',
5059         'MG': 'Madagascar',
5060         'MW': 'Malawi',
5061         'MY': 'Malaysia',
5062         'MV': 'Maldives',
5063         'ML': 'Mali',
5064         'MT': 'Malta',
5065         'MH': 'Marshall Islands',
5066         'MQ': 'Martinique',
5067         'MR': 'Mauritania',
5068         'MU': 'Mauritius',
5069         'YT': 'Mayotte',
5070         'MX': 'Mexico',
5071         'FM': 'Micronesia, Federated States of',
5072         'MD': 'Moldova, Republic of',
5073         'MC': 'Monaco',
5074         'MN': 'Mongolia',
5075         'ME': 'Montenegro',
5076         'MS': 'Montserrat',
5077         'MA': 'Morocco',
5078         'MZ': 'Mozambique',
5079         'MM': 'Myanmar',
5080         'NA': 'Namibia',
5081         'NR': 'Nauru',
5082         'NP': 'Nepal',
5083         'NL': 'Netherlands',
5084         'NC': 'New Caledonia',
5085         'NZ': 'New Zealand',
5086         'NI': 'Nicaragua',
5087         'NE': 'Niger',
5088         'NG': 'Nigeria',
5089         'NU': 'Niue',
5090         'NF': 'Norfolk Island',
5091         'MP': 'Northern Mariana Islands',
5092         'NO': 'Norway',
5093         'OM': 'Oman',
5094         'PK': 'Pakistan',
5095         'PW': 'Palau',
5096         'PS': 'Palestine, State of',
5097         'PA': 'Panama',
5098         'PG': 'Papua New Guinea',
5099         'PY': 'Paraguay',
5100         'PE': 'Peru',
5101         'PH': 'Philippines',
5102         'PN': 'Pitcairn',
5103         'PL': 'Poland',
5104         'PT': 'Portugal',
5105         'PR': 'Puerto Rico',
5106         'QA': 'Qatar',
5107         'RE': 'Réunion',
5108         'RO': 'Romania',
5109         'RU': 'Russian Federation',
5110         'RW': 'Rwanda',
5111         'BL': 'Saint Barthélemy',
5112         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5113         'KN': 'Saint Kitts and Nevis',
5114         'LC': 'Saint Lucia',
5115         'MF': 'Saint Martin (French part)',
5116         'PM': 'Saint Pierre and Miquelon',
5117         'VC': 'Saint Vincent and the Grenadines',
5118         'WS': 'Samoa',
5119         'SM': 'San Marino',
5120         'ST': 'Sao Tome and Principe',
5121         'SA': 'Saudi Arabia',
5122         'SN': 'Senegal',
5123         'RS': 'Serbia',
5124         'SC': 'Seychelles',
5125         'SL': 'Sierra Leone',
5126         'SG': 'Singapore',
5127         'SX': 'Sint Maarten (Dutch part)',
5128         'SK': 'Slovakia',
5129         'SI': 'Slovenia',
5130         'SB': 'Solomon Islands',
5131         'SO': 'Somalia',
5132         'ZA': 'South Africa',
5133         'GS': 'South Georgia and the South Sandwich Islands',
5134         'SS': 'South Sudan',
5135         'ES': 'Spain',
5136         'LK': 'Sri Lanka',
5137         'SD': 'Sudan',
5138         'SR': 'Suriname',
5139         'SJ': 'Svalbard and Jan Mayen',
5140         'SZ': 'Swaziland',
5141         'SE': 'Sweden',
5142         'CH': 'Switzerland',
5143         'SY': 'Syrian Arab Republic',
5144         'TW': 'Taiwan, Province of China',
5145         'TJ': 'Tajikistan',
5146         'TZ': 'Tanzania, United Republic of',
5147         'TH': 'Thailand',
5148         'TL': 'Timor-Leste',
5149         'TG': 'Togo',
5150         'TK': 'Tokelau',
5151         'TO': 'Tonga',
5152         'TT': 'Trinidad and Tobago',
5153         'TN': 'Tunisia',
5154         'TR': 'Turkey',
5155         'TM': 'Turkmenistan',
5156         'TC': 'Turks and Caicos Islands',
5157         'TV': 'Tuvalu',
5158         'UG': 'Uganda',
5159         'UA': 'Ukraine',
5160         'AE': 'United Arab Emirates',
5161         'GB': 'United Kingdom',
5162         'US': 'United States',
5163         'UM': 'United States Minor Outlying Islands',
5164         'UY': 'Uruguay',
5165         'UZ': 'Uzbekistan',
5166         'VU': 'Vanuatu',
5167         'VE': 'Venezuela, Bolivarian Republic of',
5168         'VN': 'Viet Nam',
5169         'VG': 'Virgin Islands, British',
5170         'VI': 'Virgin Islands, U.S.',
5171         'WF': 'Wallis and Futuna',
5172         'EH': 'Western Sahara',
5173         'YE': 'Yemen',
5174         'ZM': 'Zambia',
5175         'ZW': 'Zimbabwe',
5176     }
5177
5178     @classmethod
5179     def short2full(cls, code):
5180         """Convert an ISO 3166-2 country code to the corresponding full name"""
5181         return cls._country_map.get(code.upper())
5182
5183
5184 class GeoUtils(object):
5185     # Major IPv4 address blocks per country
5186     _country_ip_map = {
5187         'AD': '46.172.224.0/19',
5188         'AE': '94.200.0.0/13',
5189         'AF': '149.54.0.0/17',
5190         'AG': '209.59.64.0/18',
5191         'AI': '204.14.248.0/21',
5192         'AL': '46.99.0.0/16',
5193         'AM': '46.70.0.0/15',
5194         'AO': '105.168.0.0/13',
5195         'AP': '182.50.184.0/21',
5196         'AQ': '23.154.160.0/24',
5197         'AR': '181.0.0.0/12',
5198         'AS': '202.70.112.0/20',
5199         'AT': '77.116.0.0/14',
5200         'AU': '1.128.0.0/11',
5201         'AW': '181.41.0.0/18',
5202         'AX': '185.217.4.0/22',
5203         'AZ': '5.197.0.0/16',
5204         'BA': '31.176.128.0/17',
5205         'BB': '65.48.128.0/17',
5206         'BD': '114.130.0.0/16',
5207         'BE': '57.0.0.0/8',
5208         'BF': '102.178.0.0/15',
5209         'BG': '95.42.0.0/15',
5210         'BH': '37.131.0.0/17',
5211         'BI': '154.117.192.0/18',
5212         'BJ': '137.255.0.0/16',
5213         'BL': '185.212.72.0/23',
5214         'BM': '196.12.64.0/18',
5215         'BN': '156.31.0.0/16',
5216         'BO': '161.56.0.0/16',
5217         'BQ': '161.0.80.0/20',
5218         'BR': '191.128.0.0/12',
5219         'BS': '24.51.64.0/18',
5220         'BT': '119.2.96.0/19',
5221         'BW': '168.167.0.0/16',
5222         'BY': '178.120.0.0/13',
5223         'BZ': '179.42.192.0/18',
5224         'CA': '99.224.0.0/11',
5225         'CD': '41.243.0.0/16',
5226         'CF': '197.242.176.0/21',
5227         'CG': '160.113.0.0/16',
5228         'CH': '85.0.0.0/13',
5229         'CI': '102.136.0.0/14',
5230         'CK': '202.65.32.0/19',
5231         'CL': '152.172.0.0/14',
5232         'CM': '102.244.0.0/14',
5233         'CN': '36.128.0.0/10',
5234         'CO': '181.240.0.0/12',
5235         'CR': '201.192.0.0/12',
5236         'CU': '152.206.0.0/15',
5237         'CV': '165.90.96.0/19',
5238         'CW': '190.88.128.0/17',
5239         'CY': '31.153.0.0/16',
5240         'CZ': '88.100.0.0/14',
5241         'DE': '53.0.0.0/8',
5242         'DJ': '197.241.0.0/17',
5243         'DK': '87.48.0.0/12',
5244         'DM': '192.243.48.0/20',
5245         'DO': '152.166.0.0/15',
5246         'DZ': '41.96.0.0/12',
5247         'EC': '186.68.0.0/15',
5248         'EE': '90.190.0.0/15',
5249         'EG': '156.160.0.0/11',
5250         'ER': '196.200.96.0/20',
5251         'ES': '88.0.0.0/11',
5252         'ET': '196.188.0.0/14',
5253         'EU': '2.16.0.0/13',
5254         'FI': '91.152.0.0/13',
5255         'FJ': '144.120.0.0/16',
5256         'FK': '80.73.208.0/21',
5257         'FM': '119.252.112.0/20',
5258         'FO': '88.85.32.0/19',
5259         'FR': '90.0.0.0/9',
5260         'GA': '41.158.0.0/15',
5261         'GB': '25.0.0.0/8',
5262         'GD': '74.122.88.0/21',
5263         'GE': '31.146.0.0/16',
5264         'GF': '161.22.64.0/18',
5265         'GG': '62.68.160.0/19',
5266         'GH': '154.160.0.0/12',
5267         'GI': '95.164.0.0/16',
5268         'GL': '88.83.0.0/19',
5269         'GM': '160.182.0.0/15',
5270         'GN': '197.149.192.0/18',
5271         'GP': '104.250.0.0/19',
5272         'GQ': '105.235.224.0/20',
5273         'GR': '94.64.0.0/13',
5274         'GT': '168.234.0.0/16',
5275         'GU': '168.123.0.0/16',
5276         'GW': '197.214.80.0/20',
5277         'GY': '181.41.64.0/18',
5278         'HK': '113.252.0.0/14',
5279         'HN': '181.210.0.0/16',
5280         'HR': '93.136.0.0/13',
5281         'HT': '148.102.128.0/17',
5282         'HU': '84.0.0.0/14',
5283         'ID': '39.192.0.0/10',
5284         'IE': '87.32.0.0/12',
5285         'IL': '79.176.0.0/13',
5286         'IM': '5.62.80.0/20',
5287         'IN': '117.192.0.0/10',
5288         'IO': '203.83.48.0/21',
5289         'IQ': '37.236.0.0/14',
5290         'IR': '2.176.0.0/12',
5291         'IS': '82.221.0.0/16',
5292         'IT': '79.0.0.0/10',
5293         'JE': '87.244.64.0/18',
5294         'JM': '72.27.0.0/17',
5295         'JO': '176.29.0.0/16',
5296         'JP': '133.0.0.0/8',
5297         'KE': '105.48.0.0/12',
5298         'KG': '158.181.128.0/17',
5299         'KH': '36.37.128.0/17',
5300         'KI': '103.25.140.0/22',
5301         'KM': '197.255.224.0/20',
5302         'KN': '198.167.192.0/19',
5303         'KP': '175.45.176.0/22',
5304         'KR': '175.192.0.0/10',
5305         'KW': '37.36.0.0/14',
5306         'KY': '64.96.0.0/15',
5307         'KZ': '2.72.0.0/13',
5308         'LA': '115.84.64.0/18',
5309         'LB': '178.135.0.0/16',
5310         'LC': '24.92.144.0/20',
5311         'LI': '82.117.0.0/19',
5312         'LK': '112.134.0.0/15',
5313         'LR': '102.183.0.0/16',
5314         'LS': '129.232.0.0/17',
5315         'LT': '78.56.0.0/13',
5316         'LU': '188.42.0.0/16',
5317         'LV': '46.109.0.0/16',
5318         'LY': '41.252.0.0/14',
5319         'MA': '105.128.0.0/11',
5320         'MC': '88.209.64.0/18',
5321         'MD': '37.246.0.0/16',
5322         'ME': '178.175.0.0/17',
5323         'MF': '74.112.232.0/21',
5324         'MG': '154.126.0.0/17',
5325         'MH': '117.103.88.0/21',
5326         'MK': '77.28.0.0/15',
5327         'ML': '154.118.128.0/18',
5328         'MM': '37.111.0.0/17',
5329         'MN': '49.0.128.0/17',
5330         'MO': '60.246.0.0/16',
5331         'MP': '202.88.64.0/20',
5332         'MQ': '109.203.224.0/19',
5333         'MR': '41.188.64.0/18',
5334         'MS': '208.90.112.0/22',
5335         'MT': '46.11.0.0/16',
5336         'MU': '105.16.0.0/12',
5337         'MV': '27.114.128.0/18',
5338         'MW': '102.70.0.0/15',
5339         'MX': '187.192.0.0/11',
5340         'MY': '175.136.0.0/13',
5341         'MZ': '197.218.0.0/15',
5342         'NA': '41.182.0.0/16',
5343         'NC': '101.101.0.0/18',
5344         'NE': '197.214.0.0/18',
5345         'NF': '203.17.240.0/22',
5346         'NG': '105.112.0.0/12',
5347         'NI': '186.76.0.0/15',
5348         'NL': '145.96.0.0/11',
5349         'NO': '84.208.0.0/13',
5350         'NP': '36.252.0.0/15',
5351         'NR': '203.98.224.0/19',
5352         'NU': '49.156.48.0/22',
5353         'NZ': '49.224.0.0/14',
5354         'OM': '5.36.0.0/15',
5355         'PA': '186.72.0.0/15',
5356         'PE': '186.160.0.0/14',
5357         'PF': '123.50.64.0/18',
5358         'PG': '124.240.192.0/19',
5359         'PH': '49.144.0.0/13',
5360         'PK': '39.32.0.0/11',
5361         'PL': '83.0.0.0/11',
5362         'PM': '70.36.0.0/20',
5363         'PR': '66.50.0.0/16',
5364         'PS': '188.161.0.0/16',
5365         'PT': '85.240.0.0/13',
5366         'PW': '202.124.224.0/20',
5367         'PY': '181.120.0.0/14',
5368         'QA': '37.210.0.0/15',
5369         'RE': '102.35.0.0/16',
5370         'RO': '79.112.0.0/13',
5371         'RS': '93.86.0.0/15',
5372         'RU': '5.136.0.0/13',
5373         'RW': '41.186.0.0/16',
5374         'SA': '188.48.0.0/13',
5375         'SB': '202.1.160.0/19',
5376         'SC': '154.192.0.0/11',
5377         'SD': '102.120.0.0/13',
5378         'SE': '78.64.0.0/12',
5379         'SG': '8.128.0.0/10',
5380         'SI': '188.196.0.0/14',
5381         'SK': '78.98.0.0/15',
5382         'SL': '102.143.0.0/17',
5383         'SM': '89.186.32.0/19',
5384         'SN': '41.82.0.0/15',
5385         'SO': '154.115.192.0/18',
5386         'SR': '186.179.128.0/17',
5387         'SS': '105.235.208.0/21',
5388         'ST': '197.159.160.0/19',
5389         'SV': '168.243.0.0/16',
5390         'SX': '190.102.0.0/20',
5391         'SY': '5.0.0.0/16',
5392         'SZ': '41.84.224.0/19',
5393         'TC': '65.255.48.0/20',
5394         'TD': '154.68.128.0/19',
5395         'TG': '196.168.0.0/14',
5396         'TH': '171.96.0.0/13',
5397         'TJ': '85.9.128.0/18',
5398         'TK': '27.96.24.0/21',
5399         'TL': '180.189.160.0/20',
5400         'TM': '95.85.96.0/19',
5401         'TN': '197.0.0.0/11',
5402         'TO': '175.176.144.0/21',
5403         'TR': '78.160.0.0/11',
5404         'TT': '186.44.0.0/15',
5405         'TV': '202.2.96.0/19',
5406         'TW': '120.96.0.0/11',
5407         'TZ': '156.156.0.0/14',
5408         'UA': '37.52.0.0/14',
5409         'UG': '102.80.0.0/13',
5410         'US': '6.0.0.0/8',
5411         'UY': '167.56.0.0/13',
5412         'UZ': '84.54.64.0/18',
5413         'VA': '212.77.0.0/19',
5414         'VC': '207.191.240.0/21',
5415         'VE': '186.88.0.0/13',
5416         'VG': '66.81.192.0/20',
5417         'VI': '146.226.0.0/16',
5418         'VN': '14.160.0.0/11',
5419         'VU': '202.80.32.0/20',
5420         'WF': '117.20.32.0/21',
5421         'WS': '202.4.32.0/19',
5422         'YE': '134.35.0.0/16',
5423         'YT': '41.242.116.0/22',
5424         'ZA': '41.0.0.0/11',
5425         'ZM': '102.144.0.0/13',
5426         'ZW': '102.177.192.0/18',
5427     }
5428
5429     @classmethod
5430     def random_ipv4(cls, code_or_block):
5431         if len(code_or_block) == 2:
5432             block = cls._country_ip_map.get(code_or_block.upper())
5433             if not block:
5434                 return None
5435         else:
5436             block = code_or_block
5437         addr, preflen = block.split('/')
5438         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5439         addr_max = addr_min | (0xffffffff >> int(preflen))
5440         return compat_str(socket.inet_ntoa(
5441             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5442
5443
5444 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5445     def __init__(self, proxies=None):
5446         # Set default handlers
5447         for type in ('http', 'https'):
5448             setattr(self, '%s_open' % type,
5449                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5450                         meth(r, proxy, type))
5451         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5452
5453     def proxy_open(self, req, proxy, type):
5454         req_proxy = req.headers.get('Ytdl-request-proxy')
5455         if req_proxy is not None:
5456             proxy = req_proxy
5457             del req.headers['Ytdl-request-proxy']
5458
5459         if proxy == '__noproxy__':
5460             return None  # No Proxy
5461         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5462             req.add_header('Ytdl-socks-proxy', proxy)
5463             # yt-dlp's http/https handlers do wrapping the socket with socks
5464             return None
5465         return compat_urllib_request.ProxyHandler.proxy_open(
5466             self, req, proxy, type)
5467
5468
5469 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5470 # released into Public Domain
5471 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5472
5473 def long_to_bytes(n, blocksize=0):
5474     """long_to_bytes(n:long, blocksize:int) : string
5475     Convert a long integer to a byte string.
5476
5477     If optional blocksize is given and greater than zero, pad the front of the
5478     byte string with binary zeros so that the length is a multiple of
5479     blocksize.
5480     """
5481     # after much testing, this algorithm was deemed to be the fastest
5482     s = b''
5483     n = int(n)
5484     while n > 0:
5485         s = compat_struct_pack('>I', n & 0xffffffff) + s
5486         n = n >> 32
5487     # strip off leading zeros
5488     for i in range(len(s)):
5489         if s[i] != b'\000'[0]:
5490             break
5491     else:
5492         # only happens when n == 0
5493         s = b'\000'
5494         i = 0
5495     s = s[i:]
5496     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5497     # de-padding being done above, but sigh...
5498     if blocksize > 0 and len(s) % blocksize:
5499         s = (blocksize - len(s) % blocksize) * b'\000' + s
5500     return s
5501
5502
5503 def bytes_to_long(s):
5504     """bytes_to_long(string) : long
5505     Convert a byte string to a long integer.
5506
5507     This is (essentially) the inverse of long_to_bytes().
5508     """
5509     acc = 0
5510     length = len(s)
5511     if length % 4:
5512         extra = (4 - length % 4)
5513         s = b'\000' * extra + s
5514         length = length + extra
5515     for i in range(0, length, 4):
5516         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5517     return acc
5518
5519
5520 def ohdave_rsa_encrypt(data, exponent, modulus):
5521     '''
5522     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5523
5524     Input:
5525         data: data to encrypt, bytes-like object
5526         exponent, modulus: parameter e and N of RSA algorithm, both integer
5527     Output: hex string of encrypted data
5528
5529     Limitation: supports one block encryption only
5530     '''
5531
5532     payload = int(binascii.hexlify(data[::-1]), 16)
5533     encrypted = pow(payload, exponent, modulus)
5534     return '%x' % encrypted
5535
5536
5537 def pkcs1pad(data, length):
5538     """
5539     Padding input data with PKCS#1 scheme
5540
5541     @param {int[]} data        input data
5542     @param {int}   length      target length
5543     @returns {int[]}           padded data
5544     """
5545     if len(data) > length - 11:
5546         raise ValueError('Input data too long for PKCS#1 padding')
5547
5548     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5549     return [0, 2] + pseudo_random + [0] + data
5550
5551
5552 def encode_base_n(num, n, table=None):
5553     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5554     if not table:
5555         table = FULL_TABLE[:n]
5556
5557     if n > len(table):
5558         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5559
5560     if num == 0:
5561         return table[0]
5562
5563     ret = ''
5564     while num:
5565         ret = table[num % n] + ret
5566         num = num // n
5567     return ret
5568
5569
5570 def decode_packed_codes(code):
5571     mobj = re.search(PACKED_CODES_RE, code)
5572     obfuscated_code, base, count, symbols = mobj.groups()
5573     base = int(base)
5574     count = int(count)
5575     symbols = symbols.split('|')
5576     symbol_table = {}
5577
5578     while count:
5579         count -= 1
5580         base_n_count = encode_base_n(count, base)
5581         symbol_table[base_n_count] = symbols[count] or base_n_count
5582
5583     return re.sub(
5584         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5585         obfuscated_code)
5586
5587
5588 def caesar(s, alphabet, shift):
5589     if shift == 0:
5590         return s
5591     l = len(alphabet)
5592     return ''.join(
5593         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5594         for c in s)
5595
5596
5597 def rot47(s):
5598     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5599
5600
5601 def parse_m3u8_attributes(attrib):
5602     info = {}
5603     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5604         if val.startswith('"'):
5605             val = val[1:-1]
5606         info[key] = val
5607     return info
5608
5609
5610 def urshift(val, n):
5611     return val >> n if val >= 0 else (val + 0x100000000) >> n
5612
5613
5614 # Based on png2str() written by @gdkchan and improved by @yokrysty
5615 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5616 def decode_png(png_data):
5617     # Reference: https://www.w3.org/TR/PNG/
5618     header = png_data[8:]
5619
5620     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5621         raise IOError('Not a valid PNG file.')
5622
5623     int_map = {1: '>B', 2: '>H', 4: '>I'}
5624     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5625
5626     chunks = []
5627
5628     while header:
5629         length = unpack_integer(header[:4])
5630         header = header[4:]
5631
5632         chunk_type = header[:4]
5633         header = header[4:]
5634
5635         chunk_data = header[:length]
5636         header = header[length:]
5637
5638         header = header[4:]  # Skip CRC
5639
5640         chunks.append({
5641             'type': chunk_type,
5642             'length': length,
5643             'data': chunk_data
5644         })
5645
5646     ihdr = chunks[0]['data']
5647
5648     width = unpack_integer(ihdr[:4])
5649     height = unpack_integer(ihdr[4:8])
5650
5651     idat = b''
5652
5653     for chunk in chunks:
5654         if chunk['type'] == b'IDAT':
5655             idat += chunk['data']
5656
5657     if not idat:
5658         raise IOError('Unable to read PNG data.')
5659
5660     decompressed_data = bytearray(zlib.decompress(idat))
5661
5662     stride = width * 3
5663     pixels = []
5664
5665     def _get_pixel(idx):
5666         x = idx % stride
5667         y = idx // stride
5668         return pixels[y][x]
5669
5670     for y in range(height):
5671         basePos = y * (1 + stride)
5672         filter_type = decompressed_data[basePos]
5673
5674         current_row = []
5675
5676         pixels.append(current_row)
5677
5678         for x in range(stride):
5679             color = decompressed_data[1 + basePos + x]
5680             basex = y * stride + x
5681             left = 0
5682             up = 0
5683
5684             if x > 2:
5685                 left = _get_pixel(basex - 3)
5686             if y > 0:
5687                 up = _get_pixel(basex - stride)
5688
5689             if filter_type == 1:  # Sub
5690                 color = (color + left) & 0xff
5691             elif filter_type == 2:  # Up
5692                 color = (color + up) & 0xff
5693             elif filter_type == 3:  # Average
5694                 color = (color + ((left + up) >> 1)) & 0xff
5695             elif filter_type == 4:  # Paeth
5696                 a = left
5697                 b = up
5698                 c = 0
5699
5700                 if x > 2 and y > 0:
5701                     c = _get_pixel(basex - stride - 3)
5702
5703                 p = a + b - c
5704
5705                 pa = abs(p - a)
5706                 pb = abs(p - b)
5707                 pc = abs(p - c)
5708
5709                 if pa <= pb and pa <= pc:
5710                     color = (color + a) & 0xff
5711                 elif pb <= pc:
5712                     color = (color + b) & 0xff
5713                 else:
5714                     color = (color + c) & 0xff
5715
5716             current_row.append(color)
5717
5718     return width, height, pixels
5719
5720
5721 def write_xattr(path, key, value):
5722     # This mess below finds the best xattr tool for the job
5723     try:
5724         # try the pyxattr module...
5725         import xattr
5726
5727         if hasattr(xattr, 'set'):  # pyxattr
5728             # Unicode arguments are not supported in python-pyxattr until
5729             # version 0.5.0
5730             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5731             pyxattr_required_version = '0.5.0'
5732             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5733                 # TODO: fallback to CLI tools
5734                 raise XAttrUnavailableError(
5735                     'python-pyxattr is detected but is too old. '
5736                     'yt-dlp requires %s or above while your version is %s. '
5737                     'Falling back to other xattr implementations' % (
5738                         pyxattr_required_version, xattr.__version__))
5739
5740             setxattr = xattr.set
5741         else:  # xattr
5742             setxattr = xattr.setxattr
5743
5744         try:
5745             setxattr(path, key, value)
5746         except EnvironmentError as e:
5747             raise XAttrMetadataError(e.errno, e.strerror)
5748
5749     except ImportError:
5750         if compat_os_name == 'nt':
5751             # Write xattrs to NTFS Alternate Data Streams:
5752             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5753             assert ':' not in key
5754             assert os.path.exists(path)
5755
5756             ads_fn = path + ':' + key
5757             try:
5758                 with open(ads_fn, 'wb') as f:
5759                     f.write(value)
5760             except EnvironmentError as e:
5761                 raise XAttrMetadataError(e.errno, e.strerror)
5762         else:
5763             user_has_setfattr = check_executable('setfattr', ['--version'])
5764             user_has_xattr = check_executable('xattr', ['-h'])
5765
5766             if user_has_setfattr or user_has_xattr:
5767
5768                 value = value.decode('utf-8')
5769                 if user_has_setfattr:
5770                     executable = 'setfattr'
5771                     opts = ['-n', key, '-v', value]
5772                 elif user_has_xattr:
5773                     executable = 'xattr'
5774                     opts = ['-w', key, value]
5775
5776                 cmd = ([encodeFilename(executable, True)]
5777                        + [encodeArgument(o) for o in opts]
5778                        + [encodeFilename(path, True)])
5779
5780                 try:
5781                     p = subprocess.Popen(
5782                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5783                 except EnvironmentError as e:
5784                     raise XAttrMetadataError(e.errno, e.strerror)
5785                 stdout, stderr = process_communicate_or_kill(p)
5786                 stderr = stderr.decode('utf-8', 'replace')
5787                 if p.returncode != 0:
5788                     raise XAttrMetadataError(p.returncode, stderr)
5789
5790             else:
5791                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5792                 if sys.platform.startswith('linux'):
5793                     raise XAttrUnavailableError(
5794                         "Couldn't find a tool to set the xattrs. "
5795                         "Install either the python 'pyxattr' or 'xattr' "
5796                         "modules, or the GNU 'attr' package "
5797                         "(which contains the 'setfattr' tool).")
5798                 else:
5799                     raise XAttrUnavailableError(
5800                         "Couldn't find a tool to set the xattrs. "
5801                         "Install either the python 'xattr' module, "
5802                         "or the 'xattr' binary.")
5803
5804
5805 def random_birthday(year_field, month_field, day_field):
5806     start_date = datetime.date(1950, 1, 1)
5807     end_date = datetime.date(1995, 12, 31)
5808     offset = random.randint(0, (end_date - start_date).days)
5809     random_date = start_date + datetime.timedelta(offset)
5810     return {
5811         year_field: str(random_date.year),
5812         month_field: str(random_date.month),
5813         day_field: str(random_date.day),
5814     }
5815
5816
5817 # Templates for internet shortcut files, which are plain text files.
5818 DOT_URL_LINK_TEMPLATE = '''
5819 [InternetShortcut]
5820 URL=%(url)s
5821 '''.lstrip()
5822
5823 DOT_WEBLOC_LINK_TEMPLATE = '''
5824 <?xml version="1.0" encoding="UTF-8"?>
5825 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5826 <plist version="1.0">
5827 <dict>
5828 \t<key>URL</key>
5829 \t<string>%(url)s</string>
5830 </dict>
5831 </plist>
5832 '''.lstrip()
5833
5834 DOT_DESKTOP_LINK_TEMPLATE = '''
5835 [Desktop Entry]
5836 Encoding=UTF-8
5837 Name=%(filename)s
5838 Type=Link
5839 URL=%(url)s
5840 Icon=text-html
5841 '''.lstrip()
5842
5843
5844 def iri_to_uri(iri):
5845     """
5846     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5847
5848     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5849     """
5850
5851     iri_parts = compat_urllib_parse_urlparse(iri)
5852
5853     if '[' in iri_parts.netloc:
5854         raise ValueError('IPv6 URIs are not, yet, supported.')
5855         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5856
5857     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5858
5859     net_location = ''
5860     if iri_parts.username:
5861         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5862         if iri_parts.password is not None:
5863             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5864         net_location += '@'
5865
5866     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5867     # The 'idna' encoding produces ASCII text.
5868     if iri_parts.port is not None and iri_parts.port != 80:
5869         net_location += ':' + str(iri_parts.port)
5870
5871     return compat_urllib_parse_urlunparse(
5872         (iri_parts.scheme,
5873             net_location,
5874
5875             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5876
5877             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5878             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5879
5880             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5881             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5882
5883             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5884
5885     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5886
5887
5888 def to_high_limit_path(path):
5889     if sys.platform in ['win32', 'cygwin']:
5890         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5891         return r'\\?\ '.rstrip() + os.path.abspath(path)
5892
5893     return path
5894
5895
5896 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5897     val = obj.get(field, default)
5898     if func and val not in ignore:
5899         val = func(val)
5900     return template % val if val not in ignore else default
5901
5902
5903 def clean_podcast_url(url):
5904     return re.sub(r'''(?x)
5905         (?:
5906             (?:
5907                 chtbl\.com/track|
5908                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5909                 play\.podtrac\.com
5910             )/[^/]+|
5911             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5912             flex\.acast\.com|
5913             pd(?:
5914                 cn\.co| # https://podcorn.com/analytics-prefix/
5915                 st\.fm # https://podsights.com/docs/
5916             )/e
5917         )/''', '', url)
5918
5919
5920 _HEX_TABLE = '0123456789abcdef'
5921
5922
5923 def random_uuidv4():
5924     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5925
5926
5927 def make_dir(path, to_screen=None):
5928     try:
5929         dn = os.path.dirname(path)
5930         if dn and not os.path.exists(dn):
5931             os.makedirs(dn)
5932         return True
5933     except (OSError, IOError) as err:
5934         if callable(to_screen) is not None:
5935             to_screen('unable to create directory ' + error_to_compat_str(err))
5936         return False
5937
5938
5939 def get_executable_path():
5940     from zipimport import zipimporter
5941     if hasattr(sys, 'frozen'):  # Running from PyInstaller
5942         path = os.path.dirname(sys.executable)
5943     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
5944         path = os.path.join(os.path.dirname(__file__), '../..')
5945     else:
5946         path = os.path.join(os.path.dirname(__file__), '..')
5947     return os.path.abspath(path)
5948
5949
5950 def load_plugins(name, type, namespace):
5951     plugin_info = [None]
5952     classes = []
5953     try:
5954         plugin_info = imp.find_module(
5955             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5956         plugins = imp.load_module(name, *plugin_info)
5957         for name in dir(plugins):
5958             if not name.endswith(type):
5959                 continue
5960             klass = getattr(plugins, name)
5961             classes.append(klass)
5962             namespace[name] = klass
5963     except ImportError:
5964         pass
5965     finally:
5966         if plugin_info[0] is not None:
5967             plugin_info[0].close()
5968     return classes
5969
5970
5971 def traverse_dict(dictn, keys, casesense=True):
5972     if not isinstance(dictn, dict):
5973         return None
5974     first_key = keys[0]
5975     if not casesense:
5976         dictn = {key.lower(): val for key, val in dictn.items()}
5977         first_key = first_key.lower()
5978     value = dictn.get(first_key, None)
5979     return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)