yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import hashlib
  20 import hmac
  21 import importlib.util
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import operator
  28 import os
  29 import platform
  30 import random
  31 import re
  32 import socket
  33 import ssl
  34 import subprocess
  35 import sys
  36 import tempfile
  37 import time
  38 import traceback
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import (
  43     compat_HTMLParseError,
  44     compat_HTMLParser,
  45     compat_HTTPError,
  46     compat_basestring,
  47     compat_chr,
  48     compat_cookiejar,
  49     compat_ctypes_WINFUNCTYPE,
  50     compat_etree_fromstring,
  51     compat_expanduser,
  52     compat_html_entities,
  53     compat_html_entities_html5,
  54     compat_http_client,
  55     compat_integer_types,
  56     compat_numeric_types,
  57     compat_kwargs,
  58     compat_os_name,
  59     compat_parse_qs,
  60     compat_shlex_quote,
  61     compat_str,
  62     compat_struct_pack,
  63     compat_struct_unpack,
  64     compat_urllib_error,
  65     compat_urllib_parse,
  66     compat_urllib_parse_urlencode,
  67     compat_urllib_parse_urlparse,
  68     compat_urllib_parse_urlunparse,
  69     compat_urllib_parse_quote,
  70     compat_urllib_parse_quote_plus,
  71     compat_urllib_parse_unquote_plus,
  72     compat_urllib_request,
  73     compat_urlparse,
  74     compat_xpath,
  75 )
  76
  77 from .socks import (
  78     ProxyType,
  79     sockssocket,
  80 )
  81
  82
  83 def register_socks_protocols():
  84     # "Register" SOCKS protocols
  85     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  86     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  87     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  88         if scheme not in compat_urlparse.uses_netloc:
  89             compat_urlparse.uses_netloc.append(scheme)
  90
  91
  92 # This is not clearly defined otherwise
  93 compiled_regex_type = type(re.compile(''))
  94
  95
  96 def random_user_agent():
  97     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  98     _CHROME_VERSIONS = (
  99         '74.0.3729.129',
 100         '76.0.3780.3',
 101         '76.0.3780.2',
 102         '74.0.3729.128',
 103         '76.0.3780.1',
 104         '76.0.3780.0',
 105         '75.0.3770.15',
 106         '74.0.3729.127',
 107         '74.0.3729.126',
 108         '76.0.3779.1',
 109         '76.0.3779.0',
 110         '75.0.3770.14',
 111         '74.0.3729.125',
 112         '76.0.3778.1',
 113         '76.0.3778.0',
 114         '75.0.3770.13',
 115         '74.0.3729.124',
 116         '74.0.3729.123',
 117         '73.0.3683.121',
 118         '76.0.3777.1',
 119         '76.0.3777.0',
 120         '75.0.3770.12',
 121         '74.0.3729.122',
 122         '76.0.3776.4',
 123         '75.0.3770.11',
 124         '74.0.3729.121',
 125         '76.0.3776.3',
 126         '76.0.3776.2',
 127         '73.0.3683.120',
 128         '74.0.3729.120',
 129         '74.0.3729.119',
 130         '74.0.3729.118',
 131         '76.0.3776.1',
 132         '76.0.3776.0',
 133         '76.0.3775.5',
 134         '75.0.3770.10',
 135         '74.0.3729.117',
 136         '76.0.3775.4',
 137         '76.0.3775.3',
 138         '74.0.3729.116',
 139         '75.0.3770.9',
 140         '76.0.3775.2',
 141         '76.0.3775.1',
 142         '76.0.3775.0',
 143         '75.0.3770.8',
 144         '74.0.3729.115',
 145         '74.0.3729.114',
 146         '76.0.3774.1',
 147         '76.0.3774.0',
 148         '75.0.3770.7',
 149         '74.0.3729.113',
 150         '74.0.3729.112',
 151         '74.0.3729.111',
 152         '76.0.3773.1',
 153         '76.0.3773.0',
 154         '75.0.3770.6',
 155         '74.0.3729.110',
 156         '74.0.3729.109',
 157         '76.0.3772.1',
 158         '76.0.3772.0',
 159         '75.0.3770.5',
 160         '74.0.3729.108',
 161         '74.0.3729.107',
 162         '76.0.3771.1',
 163         '76.0.3771.0',
 164         '75.0.3770.4',
 165         '74.0.3729.106',
 166         '74.0.3729.105',
 167         '75.0.3770.3',
 168         '74.0.3729.104',
 169         '74.0.3729.103',
 170         '74.0.3729.102',
 171         '75.0.3770.2',
 172         '74.0.3729.101',
 173         '75.0.3770.1',
 174         '75.0.3770.0',
 175         '74.0.3729.100',
 176         '75.0.3769.5',
 177         '75.0.3769.4',
 178         '74.0.3729.99',
 179         '75.0.3769.3',
 180         '75.0.3769.2',
 181         '75.0.3768.6',
 182         '74.0.3729.98',
 183         '75.0.3769.1',
 184         '75.0.3769.0',
 185         '74.0.3729.97',
 186         '73.0.3683.119',
 187         '73.0.3683.118',
 188         '74.0.3729.96',
 189         '75.0.3768.5',
 190         '75.0.3768.4',
 191         '75.0.3768.3',
 192         '75.0.3768.2',
 193         '74.0.3729.95',
 194         '74.0.3729.94',
 195         '75.0.3768.1',
 196         '75.0.3768.0',
 197         '74.0.3729.93',
 198         '74.0.3729.92',
 199         '73.0.3683.117',
 200         '74.0.3729.91',
 201         '75.0.3766.3',
 202         '74.0.3729.90',
 203         '75.0.3767.2',
 204         '75.0.3767.1',
 205         '75.0.3767.0',
 206         '74.0.3729.89',
 207         '73.0.3683.116',
 208         '75.0.3766.2',
 209         '74.0.3729.88',
 210         '75.0.3766.1',
 211         '75.0.3766.0',
 212         '74.0.3729.87',
 213         '73.0.3683.115',
 214         '74.0.3729.86',
 215         '75.0.3765.1',
 216         '75.0.3765.0',
 217         '74.0.3729.85',
 218         '73.0.3683.114',
 219         '74.0.3729.84',
 220         '75.0.3764.1',
 221         '75.0.3764.0',
 222         '74.0.3729.83',
 223         '73.0.3683.113',
 224         '75.0.3763.2',
 225         '75.0.3761.4',
 226         '74.0.3729.82',
 227         '75.0.3763.1',
 228         '75.0.3763.0',
 229         '74.0.3729.81',
 230         '73.0.3683.112',
 231         '75.0.3762.1',
 232         '75.0.3762.0',
 233         '74.0.3729.80',
 234         '75.0.3761.3',
 235         '74.0.3729.79',
 236         '73.0.3683.111',
 237         '75.0.3761.2',
 238         '74.0.3729.78',
 239         '74.0.3729.77',
 240         '75.0.3761.1',
 241         '75.0.3761.0',
 242         '73.0.3683.110',
 243         '74.0.3729.76',
 244         '74.0.3729.75',
 245         '75.0.3760.0',
 246         '74.0.3729.74',
 247         '75.0.3759.8',
 248         '75.0.3759.7',
 249         '75.0.3759.6',
 250         '74.0.3729.73',
 251         '75.0.3759.5',
 252         '74.0.3729.72',
 253         '73.0.3683.109',
 254         '75.0.3759.4',
 255         '75.0.3759.3',
 256         '74.0.3729.71',
 257         '75.0.3759.2',
 258         '74.0.3729.70',
 259         '73.0.3683.108',
 260         '74.0.3729.69',
 261         '75.0.3759.1',
 262         '75.0.3759.0',
 263         '74.0.3729.68',
 264         '73.0.3683.107',
 265         '74.0.3729.67',
 266         '75.0.3758.1',
 267         '75.0.3758.0',
 268         '74.0.3729.66',
 269         '73.0.3683.106',
 270         '74.0.3729.65',
 271         '75.0.3757.1',
 272         '75.0.3757.0',
 273         '74.0.3729.64',
 274         '73.0.3683.105',
 275         '74.0.3729.63',
 276         '75.0.3756.1',
 277         '75.0.3756.0',
 278         '74.0.3729.62',
 279         '73.0.3683.104',
 280         '75.0.3755.3',
 281         '75.0.3755.2',
 282         '73.0.3683.103',
 283         '75.0.3755.1',
 284         '75.0.3755.0',
 285         '74.0.3729.61',
 286         '73.0.3683.102',
 287         '74.0.3729.60',
 288         '75.0.3754.2',
 289         '74.0.3729.59',
 290         '75.0.3753.4',
 291         '74.0.3729.58',
 292         '75.0.3754.1',
 293         '75.0.3754.0',
 294         '74.0.3729.57',
 295         '73.0.3683.101',
 296         '75.0.3753.3',
 297         '75.0.3752.2',
 298         '75.0.3753.2',
 299         '74.0.3729.56',
 300         '75.0.3753.1',
 301         '75.0.3753.0',
 302         '74.0.3729.55',
 303         '73.0.3683.100',
 304         '74.0.3729.54',
 305         '75.0.3752.1',
 306         '75.0.3752.0',
 307         '74.0.3729.53',
 308         '73.0.3683.99',
 309         '74.0.3729.52',
 310         '75.0.3751.1',
 311         '75.0.3751.0',
 312         '74.0.3729.51',
 313         '73.0.3683.98',
 314         '74.0.3729.50',
 315         '75.0.3750.0',
 316         '74.0.3729.49',
 317         '74.0.3729.48',
 318         '74.0.3729.47',
 319         '75.0.3749.3',
 320         '74.0.3729.46',
 321         '73.0.3683.97',
 322         '75.0.3749.2',
 323         '74.0.3729.45',
 324         '75.0.3749.1',
 325         '75.0.3749.0',
 326         '74.0.3729.44',
 327         '73.0.3683.96',
 328         '74.0.3729.43',
 329         '74.0.3729.42',
 330         '75.0.3748.1',
 331         '75.0.3748.0',
 332         '74.0.3729.41',
 333         '75.0.3747.1',
 334         '73.0.3683.95',
 335         '75.0.3746.4',
 336         '74.0.3729.40',
 337         '74.0.3729.39',
 338         '75.0.3747.0',
 339         '75.0.3746.3',
 340         '75.0.3746.2',
 341         '74.0.3729.38',
 342         '75.0.3746.1',
 343         '75.0.3746.0',
 344         '74.0.3729.37',
 345         '73.0.3683.94',
 346         '75.0.3745.5',
 347         '75.0.3745.4',
 348         '75.0.3745.3',
 349         '75.0.3745.2',
 350         '74.0.3729.36',
 351         '75.0.3745.1',
 352         '75.0.3745.0',
 353         '75.0.3744.2',
 354         '74.0.3729.35',
 355         '73.0.3683.93',
 356         '74.0.3729.34',
 357         '75.0.3744.1',
 358         '75.0.3744.0',
 359         '74.0.3729.33',
 360         '73.0.3683.92',
 361         '74.0.3729.32',
 362         '74.0.3729.31',
 363         '73.0.3683.91',
 364         '75.0.3741.2',
 365         '75.0.3740.5',
 366         '74.0.3729.30',
 367         '75.0.3741.1',
 368         '75.0.3741.0',
 369         '74.0.3729.29',
 370         '75.0.3740.4',
 371         '73.0.3683.90',
 372         '74.0.3729.28',
 373         '75.0.3740.3',
 374         '73.0.3683.89',
 375         '75.0.3740.2',
 376         '74.0.3729.27',
 377         '75.0.3740.1',
 378         '75.0.3740.0',
 379         '74.0.3729.26',
 380         '73.0.3683.88',
 381         '73.0.3683.87',
 382         '74.0.3729.25',
 383         '75.0.3739.1',
 384         '75.0.3739.0',
 385         '73.0.3683.86',
 386         '74.0.3729.24',
 387         '73.0.3683.85',
 388         '75.0.3738.4',
 389         '75.0.3738.3',
 390         '75.0.3738.2',
 391         '75.0.3738.1',
 392         '75.0.3738.0',
 393         '74.0.3729.23',
 394         '73.0.3683.84',
 395         '74.0.3729.22',
 396         '74.0.3729.21',
 397         '75.0.3737.1',
 398         '75.0.3737.0',
 399         '74.0.3729.20',
 400         '73.0.3683.83',
 401         '74.0.3729.19',
 402         '75.0.3736.1',
 403         '75.0.3736.0',
 404         '74.0.3729.18',
 405         '73.0.3683.82',
 406         '74.0.3729.17',
 407         '75.0.3735.1',
 408         '75.0.3735.0',
 409         '74.0.3729.16',
 410         '73.0.3683.81',
 411         '75.0.3734.1',
 412         '75.0.3734.0',
 413         '74.0.3729.15',
 414         '73.0.3683.80',
 415         '74.0.3729.14',
 416         '75.0.3733.1',
 417         '75.0.3733.0',
 418         '75.0.3732.1',
 419         '74.0.3729.13',
 420         '74.0.3729.12',
 421         '73.0.3683.79',
 422         '74.0.3729.11',
 423         '75.0.3732.0',
 424         '74.0.3729.10',
 425         '73.0.3683.78',
 426         '74.0.3729.9',
 427         '74.0.3729.8',
 428         '74.0.3729.7',
 429         '75.0.3731.3',
 430         '75.0.3731.2',
 431         '75.0.3731.0',
 432         '74.0.3729.6',
 433         '73.0.3683.77',
 434         '73.0.3683.76',
 435         '75.0.3730.5',
 436         '75.0.3730.4',
 437         '73.0.3683.75',
 438         '74.0.3729.5',
 439         '73.0.3683.74',
 440         '75.0.3730.3',
 441         '75.0.3730.2',
 442         '74.0.3729.4',
 443         '73.0.3683.73',
 444         '73.0.3683.72',
 445         '75.0.3730.1',
 446         '75.0.3730.0',
 447         '74.0.3729.3',
 448         '73.0.3683.71',
 449         '74.0.3729.2',
 450         '73.0.3683.70',
 451         '74.0.3729.1',
 452         '74.0.3729.0',
 453         '74.0.3726.4',
 454         '73.0.3683.69',
 455         '74.0.3726.3',
 456         '74.0.3728.0',
 457         '74.0.3726.2',
 458         '73.0.3683.68',
 459         '74.0.3726.1',
 460         '74.0.3726.0',
 461         '74.0.3725.4',
 462         '73.0.3683.67',
 463         '73.0.3683.66',
 464         '74.0.3725.3',
 465         '74.0.3725.2',
 466         '74.0.3725.1',
 467         '74.0.3724.8',
 468         '74.0.3725.0',
 469         '73.0.3683.65',
 470         '74.0.3724.7',
 471         '74.0.3724.6',
 472         '74.0.3724.5',
 473         '74.0.3724.4',
 474         '74.0.3724.3',
 475         '74.0.3724.2',
 476         '74.0.3724.1',
 477         '74.0.3724.0',
 478         '73.0.3683.64',
 479         '74.0.3723.1',
 480         '74.0.3723.0',
 481         '73.0.3683.63',
 482         '74.0.3722.1',
 483         '74.0.3722.0',
 484         '73.0.3683.62',
 485         '74.0.3718.9',
 486         '74.0.3702.3',
 487         '74.0.3721.3',
 488         '74.0.3721.2',
 489         '74.0.3721.1',
 490         '74.0.3721.0',
 491         '74.0.3720.6',
 492         '73.0.3683.61',
 493         '72.0.3626.122',
 494         '73.0.3683.60',
 495         '74.0.3720.5',
 496         '72.0.3626.121',
 497         '74.0.3718.8',
 498         '74.0.3720.4',
 499         '74.0.3720.3',
 500         '74.0.3718.7',
 501         '74.0.3720.2',
 502         '74.0.3720.1',
 503         '74.0.3720.0',
 504         '74.0.3718.6',
 505         '74.0.3719.5',
 506         '73.0.3683.59',
 507         '74.0.3718.5',
 508         '74.0.3718.4',
 509         '74.0.3719.4',
 510         '74.0.3719.3',
 511         '74.0.3719.2',
 512         '74.0.3719.1',
 513         '73.0.3683.58',
 514         '74.0.3719.0',
 515         '73.0.3683.57',
 516         '73.0.3683.56',
 517         '74.0.3718.3',
 518         '73.0.3683.55',
 519         '74.0.3718.2',
 520         '74.0.3718.1',
 521         '74.0.3718.0',
 522         '73.0.3683.54',
 523         '74.0.3717.2',
 524         '73.0.3683.53',
 525         '74.0.3717.1',
 526         '74.0.3717.0',
 527         '73.0.3683.52',
 528         '74.0.3716.1',
 529         '74.0.3716.0',
 530         '73.0.3683.51',
 531         '74.0.3715.1',
 532         '74.0.3715.0',
 533         '73.0.3683.50',
 534         '74.0.3711.2',
 535         '74.0.3714.2',
 536         '74.0.3713.3',
 537         '74.0.3714.1',
 538         '74.0.3714.0',
 539         '73.0.3683.49',
 540         '74.0.3713.1',
 541         '74.0.3713.0',
 542         '72.0.3626.120',
 543         '73.0.3683.48',
 544         '74.0.3712.2',
 545         '74.0.3712.1',
 546         '74.0.3712.0',
 547         '73.0.3683.47',
 548         '72.0.3626.119',
 549         '73.0.3683.46',
 550         '74.0.3710.2',
 551         '72.0.3626.118',
 552         '74.0.3711.1',
 553         '74.0.3711.0',
 554         '73.0.3683.45',
 555         '72.0.3626.117',
 556         '74.0.3710.1',
 557         '74.0.3710.0',
 558         '73.0.3683.44',
 559         '72.0.3626.116',
 560         '74.0.3709.1',
 561         '74.0.3709.0',
 562         '74.0.3704.9',
 563         '73.0.3683.43',
 564         '72.0.3626.115',
 565         '74.0.3704.8',
 566         '74.0.3704.7',
 567         '74.0.3708.0',
 568         '74.0.3706.7',
 569         '74.0.3704.6',
 570         '73.0.3683.42',
 571         '72.0.3626.114',
 572         '74.0.3706.6',
 573         '72.0.3626.113',
 574         '74.0.3704.5',
 575         '74.0.3706.5',
 576         '74.0.3706.4',
 577         '74.0.3706.3',
 578         '74.0.3706.2',
 579         '74.0.3706.1',
 580         '74.0.3706.0',
 581         '73.0.3683.41',
 582         '72.0.3626.112',
 583         '74.0.3705.1',
 584         '74.0.3705.0',
 585         '73.0.3683.40',
 586         '72.0.3626.111',
 587         '73.0.3683.39',
 588         '74.0.3704.4',
 589         '73.0.3683.38',
 590         '74.0.3704.3',
 591         '74.0.3704.2',
 592         '74.0.3704.1',
 593         '74.0.3704.0',
 594         '73.0.3683.37',
 595         '72.0.3626.110',
 596         '72.0.3626.109',
 597         '74.0.3703.3',
 598         '74.0.3703.2',
 599         '73.0.3683.36',
 600         '74.0.3703.1',
 601         '74.0.3703.0',
 602         '73.0.3683.35',
 603         '72.0.3626.108',
 604         '74.0.3702.2',
 605         '74.0.3699.3',
 606         '74.0.3702.1',
 607         '74.0.3702.0',
 608         '73.0.3683.34',
 609         '72.0.3626.107',
 610         '73.0.3683.33',
 611         '74.0.3701.1',
 612         '74.0.3701.0',
 613         '73.0.3683.32',
 614         '73.0.3683.31',
 615         '72.0.3626.105',
 616         '74.0.3700.1',
 617         '74.0.3700.0',
 618         '73.0.3683.29',
 619         '72.0.3626.103',
 620         '74.0.3699.2',
 621         '74.0.3699.1',
 622         '74.0.3699.0',
 623         '73.0.3683.28',
 624         '72.0.3626.102',
 625         '73.0.3683.27',
 626         '73.0.3683.26',
 627         '74.0.3698.0',
 628         '74.0.3696.2',
 629         '72.0.3626.101',
 630         '73.0.3683.25',
 631         '74.0.3696.1',
 632         '74.0.3696.0',
 633         '74.0.3694.8',
 634         '72.0.3626.100',
 635         '74.0.3694.7',
 636         '74.0.3694.6',
 637         '74.0.3694.5',
 638         '74.0.3694.4',
 639         '72.0.3626.99',
 640         '72.0.3626.98',
 641         '74.0.3694.3',
 642         '73.0.3683.24',
 643         '72.0.3626.97',
 644         '72.0.3626.96',
 645         '72.0.3626.95',
 646         '73.0.3683.23',
 647         '72.0.3626.94',
 648         '73.0.3683.22',
 649         '73.0.3683.21',
 650         '72.0.3626.93',
 651         '74.0.3694.2',
 652         '72.0.3626.92',
 653         '74.0.3694.1',
 654         '74.0.3694.0',
 655         '74.0.3693.6',
 656         '73.0.3683.20',
 657         '72.0.3626.91',
 658         '74.0.3693.5',
 659         '74.0.3693.4',
 660         '74.0.3693.3',
 661         '74.0.3693.2',
 662         '73.0.3683.19',
 663         '74.0.3693.1',
 664         '74.0.3693.0',
 665         '73.0.3683.18',
 666         '72.0.3626.90',
 667         '74.0.3692.1',
 668         '74.0.3692.0',
 669         '73.0.3683.17',
 670         '72.0.3626.89',
 671         '74.0.3687.3',
 672         '74.0.3691.1',
 673         '74.0.3691.0',
 674         '73.0.3683.16',
 675         '72.0.3626.88',
 676         '72.0.3626.87',
 677         '73.0.3683.15',
 678         '74.0.3690.1',
 679         '74.0.3690.0',
 680         '73.0.3683.14',
 681         '72.0.3626.86',
 682         '73.0.3683.13',
 683         '73.0.3683.12',
 684         '74.0.3689.1',
 685         '74.0.3689.0',
 686         '73.0.3683.11',
 687         '72.0.3626.85',
 688         '73.0.3683.10',
 689         '72.0.3626.84',
 690         '73.0.3683.9',
 691         '74.0.3688.1',
 692         '74.0.3688.0',
 693         '73.0.3683.8',
 694         '72.0.3626.83',
 695         '74.0.3687.2',
 696         '74.0.3687.1',
 697         '74.0.3687.0',
 698         '73.0.3683.7',
 699         '72.0.3626.82',
 700         '74.0.3686.4',
 701         '72.0.3626.81',
 702         '74.0.3686.3',
 703         '74.0.3686.2',
 704         '74.0.3686.1',
 705         '74.0.3686.0',
 706         '73.0.3683.6',
 707         '72.0.3626.80',
 708         '74.0.3685.1',
 709         '74.0.3685.0',
 710         '73.0.3683.5',
 711         '72.0.3626.79',
 712         '74.0.3684.1',
 713         '74.0.3684.0',
 714         '73.0.3683.4',
 715         '72.0.3626.78',
 716         '72.0.3626.77',
 717         '73.0.3683.3',
 718         '73.0.3683.2',
 719         '72.0.3626.76',
 720         '73.0.3683.1',
 721         '73.0.3683.0',
 722         '72.0.3626.75',
 723         '71.0.3578.141',
 724         '73.0.3682.1',
 725         '73.0.3682.0',
 726         '72.0.3626.74',
 727         '71.0.3578.140',
 728         '73.0.3681.4',
 729         '73.0.3681.3',
 730         '73.0.3681.2',
 731         '73.0.3681.1',
 732         '73.0.3681.0',
 733         '72.0.3626.73',
 734         '71.0.3578.139',
 735         '72.0.3626.72',
 736         '72.0.3626.71',
 737         '73.0.3680.1',
 738         '73.0.3680.0',
 739         '72.0.3626.70',
 740         '71.0.3578.138',
 741         '73.0.3678.2',
 742         '73.0.3679.1',
 743         '73.0.3679.0',
 744         '72.0.3626.69',
 745         '71.0.3578.137',
 746         '73.0.3678.1',
 747         '73.0.3678.0',
 748         '71.0.3578.136',
 749         '73.0.3677.1',
 750         '73.0.3677.0',
 751         '72.0.3626.68',
 752         '72.0.3626.67',
 753         '71.0.3578.135',
 754         '73.0.3676.1',
 755         '73.0.3676.0',
 756         '73.0.3674.2',
 757         '72.0.3626.66',
 758         '71.0.3578.134',
 759         '73.0.3674.1',
 760         '73.0.3674.0',
 761         '72.0.3626.65',
 762         '71.0.3578.133',
 763         '73.0.3673.2',
 764         '73.0.3673.1',
 765         '73.0.3673.0',
 766         '72.0.3626.64',
 767         '71.0.3578.132',
 768         '72.0.3626.63',
 769         '72.0.3626.62',
 770         '72.0.3626.61',
 771         '72.0.3626.60',
 772         '73.0.3672.1',
 773         '73.0.3672.0',
 774         '72.0.3626.59',
 775         '71.0.3578.131',
 776         '73.0.3671.3',
 777         '73.0.3671.2',
 778         '73.0.3671.1',
 779         '73.0.3671.0',
 780         '72.0.3626.58',
 781         '71.0.3578.130',
 782         '73.0.3670.1',
 783         '73.0.3670.0',
 784         '72.0.3626.57',
 785         '71.0.3578.129',
 786         '73.0.3669.1',
 787         '73.0.3669.0',
 788         '72.0.3626.56',
 789         '71.0.3578.128',
 790         '73.0.3668.2',
 791         '73.0.3668.1',
 792         '73.0.3668.0',
 793         '72.0.3626.55',
 794         '71.0.3578.127',
 795         '73.0.3667.2',
 796         '73.0.3667.1',
 797         '73.0.3667.0',
 798         '72.0.3626.54',
 799         '71.0.3578.126',
 800         '73.0.3666.1',
 801         '73.0.3666.0',
 802         '72.0.3626.53',
 803         '71.0.3578.125',
 804         '73.0.3665.4',
 805         '73.0.3665.3',
 806         '72.0.3626.52',
 807         '73.0.3665.2',
 808         '73.0.3664.4',
 809         '73.0.3665.1',
 810         '73.0.3665.0',
 811         '72.0.3626.51',
 812         '71.0.3578.124',
 813         '72.0.3626.50',
 814         '73.0.3664.3',
 815         '73.0.3664.2',
 816         '73.0.3664.1',
 817         '73.0.3664.0',
 818         '73.0.3663.2',
 819         '72.0.3626.49',
 820         '71.0.3578.123',
 821         '73.0.3663.1',
 822         '73.0.3663.0',
 823         '72.0.3626.48',
 824         '71.0.3578.122',
 825         '73.0.3662.1',
 826         '73.0.3662.0',
 827         '72.0.3626.47',
 828         '71.0.3578.121',
 829         '73.0.3661.1',
 830         '72.0.3626.46',
 831         '73.0.3661.0',
 832         '72.0.3626.45',
 833         '71.0.3578.120',
 834         '73.0.3660.2',
 835         '73.0.3660.1',
 836         '73.0.3660.0',
 837         '72.0.3626.44',
 838         '71.0.3578.119',
 839         '73.0.3659.1',
 840         '73.0.3659.0',
 841         '72.0.3626.43',
 842         '71.0.3578.118',
 843         '73.0.3658.1',
 844         '73.0.3658.0',
 845         '72.0.3626.42',
 846         '71.0.3578.117',
 847         '73.0.3657.1',
 848         '73.0.3657.0',
 849         '72.0.3626.41',
 850         '71.0.3578.116',
 851         '73.0.3656.1',
 852         '73.0.3656.0',
 853         '72.0.3626.40',
 854         '71.0.3578.115',
 855         '73.0.3655.1',
 856         '73.0.3655.0',
 857         '72.0.3626.39',
 858         '71.0.3578.114',
 859         '73.0.3654.1',
 860         '73.0.3654.0',
 861         '72.0.3626.38',
 862         '71.0.3578.113',
 863         '73.0.3653.1',
 864         '73.0.3653.0',
 865         '72.0.3626.37',
 866         '71.0.3578.112',
 867         '73.0.3652.1',
 868         '73.0.3652.0',
 869         '72.0.3626.36',
 870         '71.0.3578.111',
 871         '73.0.3651.1',
 872         '73.0.3651.0',
 873         '72.0.3626.35',
 874         '71.0.3578.110',
 875         '73.0.3650.1',
 876         '73.0.3650.0',
 877         '72.0.3626.34',
 878         '71.0.3578.109',
 879         '73.0.3649.1',
 880         '73.0.3649.0',
 881         '72.0.3626.33',
 882         '71.0.3578.108',
 883         '73.0.3648.2',
 884         '73.0.3648.1',
 885         '73.0.3648.0',
 886         '72.0.3626.32',
 887         '71.0.3578.107',
 888         '73.0.3647.2',
 889         '73.0.3647.1',
 890         '73.0.3647.0',
 891         '72.0.3626.31',
 892         '71.0.3578.106',
 893         '73.0.3635.3',
 894         '73.0.3646.2',
 895         '73.0.3646.1',
 896         '73.0.3646.0',
 897         '72.0.3626.30',
 898         '71.0.3578.105',
 899         '72.0.3626.29',
 900         '73.0.3645.2',
 901         '73.0.3645.1',
 902         '73.0.3645.0',
 903         '72.0.3626.28',
 904         '71.0.3578.104',
 905         '72.0.3626.27',
 906         '72.0.3626.26',
 907         '72.0.3626.25',
 908         '72.0.3626.24',
 909         '73.0.3644.0',
 910         '73.0.3643.2',
 911         '72.0.3626.23',
 912         '71.0.3578.103',
 913         '73.0.3643.1',
 914         '73.0.3643.0',
 915         '72.0.3626.22',
 916         '71.0.3578.102',
 917         '73.0.3642.1',
 918         '73.0.3642.0',
 919         '72.0.3626.21',
 920         '71.0.3578.101',
 921         '73.0.3641.1',
 922         '73.0.3641.0',
 923         '72.0.3626.20',
 924         '71.0.3578.100',
 925         '72.0.3626.19',
 926         '73.0.3640.1',
 927         '73.0.3640.0',
 928         '72.0.3626.18',
 929         '73.0.3639.1',
 930         '71.0.3578.99',
 931         '73.0.3639.0',
 932         '72.0.3626.17',
 933         '73.0.3638.2',
 934         '72.0.3626.16',
 935         '73.0.3638.1',
 936         '73.0.3638.0',
 937         '72.0.3626.15',
 938         '71.0.3578.98',
 939         '73.0.3635.2',
 940         '71.0.3578.97',
 941         '73.0.3637.1',
 942         '73.0.3637.0',
 943         '72.0.3626.14',
 944         '71.0.3578.96',
 945         '71.0.3578.95',
 946         '72.0.3626.13',
 947         '71.0.3578.94',
 948         '73.0.3636.2',
 949         '71.0.3578.93',
 950         '73.0.3636.1',
 951         '73.0.3636.0',
 952         '72.0.3626.12',
 953         '71.0.3578.92',
 954         '73.0.3635.1',
 955         '73.0.3635.0',
 956         '72.0.3626.11',
 957         '71.0.3578.91',
 958         '73.0.3634.2',
 959         '73.0.3634.1',
 960         '73.0.3634.0',
 961         '72.0.3626.10',
 962         '71.0.3578.90',
 963         '71.0.3578.89',
 964         '73.0.3633.2',
 965         '73.0.3633.1',
 966         '73.0.3633.0',
 967         '72.0.3610.4',
 968         '72.0.3626.9',
 969         '71.0.3578.88',
 970         '73.0.3632.5',
 971         '73.0.3632.4',
 972         '73.0.3632.3',
 973         '73.0.3632.2',
 974         '73.0.3632.1',
 975         '73.0.3632.0',
 976         '72.0.3626.8',
 977         '71.0.3578.87',
 978         '73.0.3631.2',
 979         '73.0.3631.1',
 980         '73.0.3631.0',
 981         '72.0.3626.7',
 982         '71.0.3578.86',
 983         '72.0.3626.6',
 984         '73.0.3630.1',
 985         '73.0.3630.0',
 986         '72.0.3626.5',
 987         '71.0.3578.85',
 988         '72.0.3626.4',
 989         '73.0.3628.3',
 990         '73.0.3628.2',
 991         '73.0.3629.1',
 992         '73.0.3629.0',
 993         '72.0.3626.3',
 994         '71.0.3578.84',
 995         '73.0.3628.1',
 996         '73.0.3628.0',
 997         '71.0.3578.83',
 998         '73.0.3627.1',
 999         '73.0.3627.0',
1000         '72.0.3626.2',
1001         '71.0.3578.82',
1002         '71.0.3578.81',
1003         '71.0.3578.80',
1004         '72.0.3626.1',
1005         '72.0.3626.0',
1006         '71.0.3578.79',
1007         '70.0.3538.124',
1008         '71.0.3578.78',
1009         '72.0.3623.4',
1010         '72.0.3625.2',
1011         '72.0.3625.1',
1012         '72.0.3625.0',
1013         '71.0.3578.77',
1014         '70.0.3538.123',
1015         '72.0.3624.4',
1016         '72.0.3624.3',
1017         '72.0.3624.2',
1018         '71.0.3578.76',
1019         '72.0.3624.1',
1020         '72.0.3624.0',
1021         '72.0.3623.3',
1022         '71.0.3578.75',
1023         '70.0.3538.122',
1024         '71.0.3578.74',
1025         '72.0.3623.2',
1026         '72.0.3610.3',
1027         '72.0.3623.1',
1028         '72.0.3623.0',
1029         '72.0.3622.3',
1030         '72.0.3622.2',
1031         '71.0.3578.73',
1032         '70.0.3538.121',
1033         '72.0.3622.1',
1034         '72.0.3622.0',
1035         '71.0.3578.72',
1036         '70.0.3538.120',
1037         '72.0.3621.1',
1038         '72.0.3621.0',
1039         '71.0.3578.71',
1040         '70.0.3538.119',
1041         '72.0.3620.1',
1042         '72.0.3620.0',
1043         '71.0.3578.70',
1044         '70.0.3538.118',
1045         '71.0.3578.69',
1046         '72.0.3619.1',
1047         '72.0.3619.0',
1048         '71.0.3578.68',
1049         '70.0.3538.117',
1050         '71.0.3578.67',
1051         '72.0.3618.1',
1052         '72.0.3618.0',
1053         '71.0.3578.66',
1054         '70.0.3538.116',
1055         '72.0.3617.1',
1056         '72.0.3617.0',
1057         '71.0.3578.65',
1058         '70.0.3538.115',
1059         '72.0.3602.3',
1060         '71.0.3578.64',
1061         '72.0.3616.1',
1062         '72.0.3616.0',
1063         '71.0.3578.63',
1064         '70.0.3538.114',
1065         '71.0.3578.62',
1066         '72.0.3615.1',
1067         '72.0.3615.0',
1068         '71.0.3578.61',
1069         '70.0.3538.113',
1070         '72.0.3614.1',
1071         '72.0.3614.0',
1072         '71.0.3578.60',
1073         '70.0.3538.112',
1074         '72.0.3613.1',
1075         '72.0.3613.0',
1076         '71.0.3578.59',
1077         '70.0.3538.111',
1078         '72.0.3612.2',
1079         '72.0.3612.1',
1080         '72.0.3612.0',
1081         '70.0.3538.110',
1082         '71.0.3578.58',
1083         '70.0.3538.109',
1084         '72.0.3611.2',
1085         '72.0.3611.1',
1086         '72.0.3611.0',
1087         '71.0.3578.57',
1088         '70.0.3538.108',
1089         '72.0.3610.2',
1090         '71.0.3578.56',
1091         '71.0.3578.55',
1092         '72.0.3610.1',
1093         '72.0.3610.0',
1094         '71.0.3578.54',
1095         '70.0.3538.107',
1096         '71.0.3578.53',
1097         '72.0.3609.3',
1098         '71.0.3578.52',
1099         '72.0.3609.2',
1100         '71.0.3578.51',
1101         '72.0.3608.5',
1102         '72.0.3609.1',
1103         '72.0.3609.0',
1104         '71.0.3578.50',
1105         '70.0.3538.106',
1106         '72.0.3608.4',
1107         '72.0.3608.3',
1108         '72.0.3608.2',
1109         '71.0.3578.49',
1110         '72.0.3608.1',
1111         '72.0.3608.0',
1112         '70.0.3538.105',
1113         '71.0.3578.48',
1114         '72.0.3607.1',
1115         '72.0.3607.0',
1116         '71.0.3578.47',
1117         '70.0.3538.104',
1118         '72.0.3606.2',
1119         '72.0.3606.1',
1120         '72.0.3606.0',
1121         '71.0.3578.46',
1122         '70.0.3538.103',
1123         '70.0.3538.102',
1124         '72.0.3605.3',
1125         '72.0.3605.2',
1126         '72.0.3605.1',
1127         '72.0.3605.0',
1128         '71.0.3578.45',
1129         '70.0.3538.101',
1130         '71.0.3578.44',
1131         '71.0.3578.43',
1132         '70.0.3538.100',
1133         '70.0.3538.99',
1134         '71.0.3578.42',
1135         '72.0.3604.1',
1136         '72.0.3604.0',
1137         '71.0.3578.41',
1138         '70.0.3538.98',
1139         '71.0.3578.40',
1140         '72.0.3603.2',
1141         '72.0.3603.1',
1142         '72.0.3603.0',
1143         '71.0.3578.39',
1144         '70.0.3538.97',
1145         '72.0.3602.2',
1146         '71.0.3578.38',
1147         '71.0.3578.37',
1148         '72.0.3602.1',
1149         '72.0.3602.0',
1150         '71.0.3578.36',
1151         '70.0.3538.96',
1152         '72.0.3601.1',
1153         '72.0.3601.0',
1154         '71.0.3578.35',
1155         '70.0.3538.95',
1156         '72.0.3600.1',
1157         '72.0.3600.0',
1158         '71.0.3578.34',
1159         '70.0.3538.94',
1160         '72.0.3599.3',
1161         '72.0.3599.2',
1162         '72.0.3599.1',
1163         '72.0.3599.0',
1164         '71.0.3578.33',
1165         '70.0.3538.93',
1166         '72.0.3598.1',
1167         '72.0.3598.0',
1168         '71.0.3578.32',
1169         '70.0.3538.87',
1170         '72.0.3597.1',
1171         '72.0.3597.0',
1172         '72.0.3596.2',
1173         '71.0.3578.31',
1174         '70.0.3538.86',
1175         '71.0.3578.30',
1176         '71.0.3578.29',
1177         '72.0.3596.1',
1178         '72.0.3596.0',
1179         '71.0.3578.28',
1180         '70.0.3538.85',
1181         '72.0.3595.2',
1182         '72.0.3591.3',
1183         '72.0.3595.1',
1184         '72.0.3595.0',
1185         '71.0.3578.27',
1186         '70.0.3538.84',
1187         '72.0.3594.1',
1188         '72.0.3594.0',
1189         '71.0.3578.26',
1190         '70.0.3538.83',
1191         '72.0.3593.2',
1192         '72.0.3593.1',
1193         '72.0.3593.0',
1194         '71.0.3578.25',
1195         '70.0.3538.82',
1196         '72.0.3589.3',
1197         '72.0.3592.2',
1198         '72.0.3592.1',
1199         '72.0.3592.0',
1200         '71.0.3578.24',
1201         '72.0.3589.2',
1202         '70.0.3538.81',
1203         '70.0.3538.80',
1204         '72.0.3591.2',
1205         '72.0.3591.1',
1206         '72.0.3591.0',
1207         '71.0.3578.23',
1208         '70.0.3538.79',
1209         '71.0.3578.22',
1210         '72.0.3590.1',
1211         '72.0.3590.0',
1212         '71.0.3578.21',
1213         '70.0.3538.78',
1214         '70.0.3538.77',
1215         '72.0.3589.1',
1216         '72.0.3589.0',
1217         '71.0.3578.20',
1218         '70.0.3538.76',
1219         '71.0.3578.19',
1220         '70.0.3538.75',
1221         '72.0.3588.1',
1222         '72.0.3588.0',
1223         '71.0.3578.18',
1224         '70.0.3538.74',
1225         '72.0.3586.2',
1226         '72.0.3587.0',
1227         '71.0.3578.17',
1228         '70.0.3538.73',
1229         '72.0.3586.1',
1230         '72.0.3586.0',
1231         '71.0.3578.16',
1232         '70.0.3538.72',
1233         '72.0.3585.1',
1234         '72.0.3585.0',
1235         '71.0.3578.15',
1236         '70.0.3538.71',
1237         '71.0.3578.14',
1238         '72.0.3584.1',
1239         '72.0.3584.0',
1240         '71.0.3578.13',
1241         '70.0.3538.70',
1242         '72.0.3583.2',
1243         '71.0.3578.12',
1244         '72.0.3583.1',
1245         '72.0.3583.0',
1246         '71.0.3578.11',
1247         '70.0.3538.69',
1248         '71.0.3578.10',
1249         '72.0.3582.0',
1250         '72.0.3581.4',
1251         '71.0.3578.9',
1252         '70.0.3538.67',
1253         '72.0.3581.3',
1254         '72.0.3581.2',
1255         '72.0.3581.1',
1256         '72.0.3581.0',
1257         '71.0.3578.8',
1258         '70.0.3538.66',
1259         '72.0.3580.1',
1260         '72.0.3580.0',
1261         '71.0.3578.7',
1262         '70.0.3538.65',
1263         '71.0.3578.6',
1264         '72.0.3579.1',
1265         '72.0.3579.0',
1266         '71.0.3578.5',
1267         '70.0.3538.64',
1268         '71.0.3578.4',
1269         '71.0.3578.3',
1270         '71.0.3578.2',
1271         '71.0.3578.1',
1272         '71.0.3578.0',
1273         '70.0.3538.63',
1274         '69.0.3497.128',
1275         '70.0.3538.62',
1276         '70.0.3538.61',
1277         '70.0.3538.60',
1278         '70.0.3538.59',
1279         '71.0.3577.1',
1280         '71.0.3577.0',
1281         '70.0.3538.58',
1282         '69.0.3497.127',
1283         '71.0.3576.2',
1284         '71.0.3576.1',
1285         '71.0.3576.0',
1286         '70.0.3538.57',
1287         '70.0.3538.56',
1288         '71.0.3575.2',
1289         '70.0.3538.55',
1290         '69.0.3497.126',
1291         '70.0.3538.54',
1292         '71.0.3575.1',
1293         '71.0.3575.0',
1294         '71.0.3574.1',
1295         '71.0.3574.0',
1296         '70.0.3538.53',
1297         '69.0.3497.125',
1298         '70.0.3538.52',
1299         '71.0.3573.1',
1300         '71.0.3573.0',
1301         '70.0.3538.51',
1302         '69.0.3497.124',
1303         '71.0.3572.1',
1304         '71.0.3572.0',
1305         '70.0.3538.50',
1306         '69.0.3497.123',
1307         '71.0.3571.2',
1308         '70.0.3538.49',
1309         '69.0.3497.122',
1310         '71.0.3571.1',
1311         '71.0.3571.0',
1312         '70.0.3538.48',
1313         '69.0.3497.121',
1314         '71.0.3570.1',
1315         '71.0.3570.0',
1316         '70.0.3538.47',
1317         '69.0.3497.120',
1318         '71.0.3568.2',
1319         '71.0.3569.1',
1320         '71.0.3569.0',
1321         '70.0.3538.46',
1322         '69.0.3497.119',
1323         '70.0.3538.45',
1324         '71.0.3568.1',
1325         '71.0.3568.0',
1326         '70.0.3538.44',
1327         '69.0.3497.118',
1328         '70.0.3538.43',
1329         '70.0.3538.42',
1330         '71.0.3567.1',
1331         '71.0.3567.0',
1332         '70.0.3538.41',
1333         '69.0.3497.117',
1334         '71.0.3566.1',
1335         '71.0.3566.0',
1336         '70.0.3538.40',
1337         '69.0.3497.116',
1338         '71.0.3565.1',
1339         '71.0.3565.0',
1340         '70.0.3538.39',
1341         '69.0.3497.115',
1342         '71.0.3564.1',
1343         '71.0.3564.0',
1344         '70.0.3538.38',
1345         '69.0.3497.114',
1346         '71.0.3563.0',
1347         '71.0.3562.2',
1348         '70.0.3538.37',
1349         '69.0.3497.113',
1350         '70.0.3538.36',
1351         '70.0.3538.35',
1352         '71.0.3562.1',
1353         '71.0.3562.0',
1354         '70.0.3538.34',
1355         '69.0.3497.112',
1356         '70.0.3538.33',
1357         '71.0.3561.1',
1358         '71.0.3561.0',
1359         '70.0.3538.32',
1360         '69.0.3497.111',
1361         '71.0.3559.6',
1362         '71.0.3560.1',
1363         '71.0.3560.0',
1364         '71.0.3559.5',
1365         '71.0.3559.4',
1366         '70.0.3538.31',
1367         '69.0.3497.110',
1368         '71.0.3559.3',
1369         '70.0.3538.30',
1370         '69.0.3497.109',
1371         '71.0.3559.2',
1372         '71.0.3559.1',
1373         '71.0.3559.0',
1374         '70.0.3538.29',
1375         '69.0.3497.108',
1376         '71.0.3558.2',
1377         '71.0.3558.1',
1378         '71.0.3558.0',
1379         '70.0.3538.28',
1380         '69.0.3497.107',
1381         '71.0.3557.2',
1382         '71.0.3557.1',
1383         '71.0.3557.0',
1384         '70.0.3538.27',
1385         '69.0.3497.106',
1386         '71.0.3554.4',
1387         '70.0.3538.26',
1388         '71.0.3556.1',
1389         '71.0.3556.0',
1390         '70.0.3538.25',
1391         '71.0.3554.3',
1392         '69.0.3497.105',
1393         '71.0.3554.2',
1394         '70.0.3538.24',
1395         '69.0.3497.104',
1396         '71.0.3555.2',
1397         '70.0.3538.23',
1398         '71.0.3555.1',
1399         '71.0.3555.0',
1400         '70.0.3538.22',
1401         '69.0.3497.103',
1402         '71.0.3554.1',
1403         '71.0.3554.0',
1404         '70.0.3538.21',
1405         '69.0.3497.102',
1406         '71.0.3553.3',
1407         '70.0.3538.20',
1408         '69.0.3497.101',
1409         '71.0.3553.2',
1410         '69.0.3497.100',
1411         '71.0.3553.1',
1412         '71.0.3553.0',
1413         '70.0.3538.19',
1414         '69.0.3497.99',
1415         '69.0.3497.98',
1416         '69.0.3497.97',
1417         '71.0.3552.6',
1418         '71.0.3552.5',
1419         '71.0.3552.4',
1420         '71.0.3552.3',
1421         '71.0.3552.2',
1422         '71.0.3552.1',
1423         '71.0.3552.0',
1424         '70.0.3538.18',
1425         '69.0.3497.96',
1426         '71.0.3551.3',
1427         '71.0.3551.2',
1428         '71.0.3551.1',
1429         '71.0.3551.0',
1430         '70.0.3538.17',
1431         '69.0.3497.95',
1432         '71.0.3550.3',
1433         '71.0.3550.2',
1434         '71.0.3550.1',
1435         '71.0.3550.0',
1436         '70.0.3538.16',
1437         '69.0.3497.94',
1438         '71.0.3549.1',
1439         '71.0.3549.0',
1440         '70.0.3538.15',
1441         '69.0.3497.93',
1442         '69.0.3497.92',
1443         '71.0.3548.1',
1444         '71.0.3548.0',
1445         '70.0.3538.14',
1446         '69.0.3497.91',
1447         '71.0.3547.1',
1448         '71.0.3547.0',
1449         '70.0.3538.13',
1450         '69.0.3497.90',
1451         '71.0.3546.2',
1452         '69.0.3497.89',
1453         '71.0.3546.1',
1454         '71.0.3546.0',
1455         '70.0.3538.12',
1456         '69.0.3497.88',
1457         '71.0.3545.4',
1458         '71.0.3545.3',
1459         '71.0.3545.2',
1460         '71.0.3545.1',
1461         '71.0.3545.0',
1462         '70.0.3538.11',
1463         '69.0.3497.87',
1464         '71.0.3544.5',
1465         '71.0.3544.4',
1466         '71.0.3544.3',
1467         '71.0.3544.2',
1468         '71.0.3544.1',
1469         '71.0.3544.0',
1470         '69.0.3497.86',
1471         '70.0.3538.10',
1472         '69.0.3497.85',
1473         '70.0.3538.9',
1474         '69.0.3497.84',
1475         '71.0.3543.4',
1476         '70.0.3538.8',
1477         '71.0.3543.3',
1478         '71.0.3543.2',
1479         '71.0.3543.1',
1480         '71.0.3543.0',
1481         '70.0.3538.7',
1482         '69.0.3497.83',
1483         '71.0.3542.2',
1484         '71.0.3542.1',
1485         '71.0.3542.0',
1486         '70.0.3538.6',
1487         '69.0.3497.82',
1488         '69.0.3497.81',
1489         '71.0.3541.1',
1490         '71.0.3541.0',
1491         '70.0.3538.5',
1492         '69.0.3497.80',
1493         '71.0.3540.1',
1494         '71.0.3540.0',
1495         '70.0.3538.4',
1496         '69.0.3497.79',
1497         '70.0.3538.3',
1498         '71.0.3539.1',
1499         '71.0.3539.0',
1500         '69.0.3497.78',
1501         '68.0.3440.134',
1502         '69.0.3497.77',
1503         '70.0.3538.2',
1504         '70.0.3538.1',
1505         '70.0.3538.0',
1506         '69.0.3497.76',
1507         '68.0.3440.133',
1508         '69.0.3497.75',
1509         '70.0.3537.2',
1510         '70.0.3537.1',
1511         '70.0.3537.0',
1512         '69.0.3497.74',
1513         '68.0.3440.132',
1514         '70.0.3536.0',
1515         '70.0.3535.5',
1516         '70.0.3535.4',
1517         '70.0.3535.3',
1518         '69.0.3497.73',
1519         '68.0.3440.131',
1520         '70.0.3532.8',
1521         '70.0.3532.7',
1522         '69.0.3497.72',
1523         '69.0.3497.71',
1524         '70.0.3535.2',
1525         '70.0.3535.1',
1526         '70.0.3535.0',
1527         '69.0.3497.70',
1528         '68.0.3440.130',
1529         '69.0.3497.69',
1530         '68.0.3440.129',
1531         '70.0.3534.4',
1532         '70.0.3534.3',
1533         '70.0.3534.2',
1534         '70.0.3534.1',
1535         '70.0.3534.0',
1536         '69.0.3497.68',
1537         '68.0.3440.128',
1538         '70.0.3533.2',
1539         '70.0.3533.1',
1540         '70.0.3533.0',
1541         '69.0.3497.67',
1542         '68.0.3440.127',
1543         '70.0.3532.6',
1544         '70.0.3532.5',
1545         '70.0.3532.4',
1546         '69.0.3497.66',
1547         '68.0.3440.126',
1548         '70.0.3532.3',
1549         '70.0.3532.2',
1550         '70.0.3532.1',
1551         '69.0.3497.60',
1552         '69.0.3497.65',
1553         '69.0.3497.64',
1554         '70.0.3532.0',
1555         '70.0.3531.0',
1556         '70.0.3530.4',
1557         '70.0.3530.3',
1558         '70.0.3530.2',
1559         '69.0.3497.58',
1560         '68.0.3440.125',
1561         '69.0.3497.57',
1562         '69.0.3497.56',
1563         '69.0.3497.55',
1564         '69.0.3497.54',
1565         '70.0.3530.1',
1566         '70.0.3530.0',
1567         '69.0.3497.53',
1568         '68.0.3440.124',
1569         '69.0.3497.52',
1570         '70.0.3529.3',
1571         '70.0.3529.2',
1572         '70.0.3529.1',
1573         '70.0.3529.0',
1574         '69.0.3497.51',
1575         '70.0.3528.4',
1576         '68.0.3440.123',
1577         '70.0.3528.3',
1578         '70.0.3528.2',
1579         '70.0.3528.1',
1580         '70.0.3528.0',
1581         '69.0.3497.50',
1582         '68.0.3440.122',
1583         '70.0.3527.1',
1584         '70.0.3527.0',
1585         '69.0.3497.49',
1586         '68.0.3440.121',
1587         '70.0.3526.1',
1588         '70.0.3526.0',
1589         '68.0.3440.120',
1590         '69.0.3497.48',
1591         '69.0.3497.47',
1592         '68.0.3440.119',
1593         '68.0.3440.118',
1594         '70.0.3525.5',
1595         '70.0.3525.4',
1596         '70.0.3525.3',
1597         '68.0.3440.117',
1598         '69.0.3497.46',
1599         '70.0.3525.2',
1600         '70.0.3525.1',
1601         '70.0.3525.0',
1602         '69.0.3497.45',
1603         '68.0.3440.116',
1604         '70.0.3524.4',
1605         '70.0.3524.3',
1606         '69.0.3497.44',
1607         '70.0.3524.2',
1608         '70.0.3524.1',
1609         '70.0.3524.0',
1610         '70.0.3523.2',
1611         '69.0.3497.43',
1612         '68.0.3440.115',
1613         '70.0.3505.9',
1614         '69.0.3497.42',
1615         '70.0.3505.8',
1616         '70.0.3523.1',
1617         '70.0.3523.0',
1618         '69.0.3497.41',
1619         '68.0.3440.114',
1620         '70.0.3505.7',
1621         '69.0.3497.40',
1622         '70.0.3522.1',
1623         '70.0.3522.0',
1624         '70.0.3521.2',
1625         '69.0.3497.39',
1626         '68.0.3440.113',
1627         '70.0.3505.6',
1628         '70.0.3521.1',
1629         '70.0.3521.0',
1630         '69.0.3497.38',
1631         '68.0.3440.112',
1632         '70.0.3520.1',
1633         '70.0.3520.0',
1634         '69.0.3497.37',
1635         '68.0.3440.111',
1636         '70.0.3519.3',
1637         '70.0.3519.2',
1638         '70.0.3519.1',
1639         '70.0.3519.0',
1640         '69.0.3497.36',
1641         '68.0.3440.110',
1642         '70.0.3518.1',
1643         '70.0.3518.0',
1644         '69.0.3497.35',
1645         '69.0.3497.34',
1646         '68.0.3440.109',
1647         '70.0.3517.1',
1648         '70.0.3517.0',
1649         '69.0.3497.33',
1650         '68.0.3440.108',
1651         '69.0.3497.32',
1652         '70.0.3516.3',
1653         '70.0.3516.2',
1654         '70.0.3516.1',
1655         '70.0.3516.0',
1656         '69.0.3497.31',
1657         '68.0.3440.107',
1658         '70.0.3515.4',
1659         '68.0.3440.106',
1660         '70.0.3515.3',
1661         '70.0.3515.2',
1662         '70.0.3515.1',
1663         '70.0.3515.0',
1664         '69.0.3497.30',
1665         '68.0.3440.105',
1666         '68.0.3440.104',
1667         '70.0.3514.2',
1668         '70.0.3514.1',
1669         '70.0.3514.0',
1670         '69.0.3497.29',
1671         '68.0.3440.103',
1672         '70.0.3513.1',
1673         '70.0.3513.0',
1674         '69.0.3497.28',
1675     )
1676     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680     'User-Agent': random_user_agent(),
1681     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683     'Accept-Encoding': 'gzip, deflate',
1684     'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696     'January', 'February', 'March', 'April', 'May', 'June',
1697     'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700     'en': ENGLISH_MONTH_NAMES,
1701     'fr': [
1702         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708     'flv', 'f4v', 'f4a', 'f4b',
1709     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710     'mkv', 'mka', 'mk3d',
1711     'avi', 'divx',
1712     'mov',
1713     'asf', 'wmv', 'wma',
1714     '3gp', '3g2',
1715     'mp3',
1716     'flac',
1717     'ape',
1718     'wav',
1719     'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727     '%d %B %Y',
1728     '%d %b %Y',
1729     '%B %d %Y',
1730     '%B %dst %Y',
1731     '%B %dnd %Y',
1732     '%B %drd %Y',
1733     '%B %dth %Y',
1734     '%b %d %Y',
1735     '%b %dst %Y',
1736     '%b %dnd %Y',
1737     '%b %drd %Y',
1738     '%b %dth %Y',
1739     '%b %dst %Y %I:%M',
1740     '%b %dnd %Y %I:%M',
1741     '%b %drd %Y %I:%M',
1742     '%b %dth %Y %I:%M',
1743     '%Y %m %d',
1744     '%Y-%m-%d',
1745     '%Y.%m.%d.',
1746     '%Y/%m/%d',
1747     '%Y/%m/%d %H:%M',
1748     '%Y/%m/%d %H:%M:%S',
1749     '%Y%m%d%H%M',
1750     '%Y%m%d%H%M%S',
1751     '%Y-%m-%d %H:%M',
1752     '%Y-%m-%d %H:%M:%S',
1753     '%Y-%m-%d %H:%M:%S.%f',
1754     '%Y-%m-%d %H:%M:%S:%f',
1755     '%d.%m.%Y %H:%M',
1756     '%d.%m.%Y %H.%M',
1757     '%Y-%m-%dT%H:%M:%SZ',
1758     '%Y-%m-%dT%H:%M:%S.%fZ',
1759     '%Y-%m-%dT%H:%M:%S.%f0Z',
1760     '%Y-%m-%dT%H:%M:%S',
1761     '%Y-%m-%dT%H:%M:%S.%f',
1762     '%Y-%m-%dT%H:%M',
1763     '%b %d %Y at %H:%M',
1764     '%b %d %Y at %H:%M:%S',
1765     '%B %d %Y at %H:%M',
1766     '%B %d %Y at %H:%M:%S',
1767     '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772     '%d-%m-%Y',
1773     '%d.%m.%Y',
1774     '%d.%m.%y',
1775     '%d/%m/%Y',
1776     '%d/%m/%y',
1777     '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782     '%m-%d-%Y',
1783     '%m.%d.%Y',
1784     '%m/%d/%Y',
1785     '%m/%d/%y',
1786     '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794     """Get preferred encoding.
1795
1796     Returns the best encoding scheme for the system, based on
1797     locale.getpreferredencoding() and some further tweaks.
1798     """
1799     try:
1800         pref = locale.getpreferredencoding()
1801         'TEST'.encode(pref)
1802     except Exception:
1803         pref = 'UTF-8'
1804
1805     return pref
1806
1807
1808 def write_json_file(obj, fn):
1809     """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811     fn = encodeFilename(fn)
1812     if sys.version_info < (3, 0) and sys.platform != 'win32':
1813         encoding = get_filesystem_encoding()
1814         # os.path.basename returns a bytes object, but NamedTemporaryFile
1815         # will fail if the filename contains non ascii characters unless we
1816         # use a unicode object
1817         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818         # the same for os.path.dirname
1819         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820     else:
1821         path_basename = os.path.basename
1822         path_dirname = os.path.dirname
1823
1824     args = {
1825         'suffix': '.tmp',
1826         'prefix': path_basename(fn) + '.',
1827         'dir': path_dirname(fn),
1828         'delete': False,
1829     }
1830
1831     # In Python 2.x, json.dump expects a bytestream.
1832     # In Python 3.x, it writes to a character stream
1833     if sys.version_info < (3, 0):
1834         args['mode'] = 'wb'
1835     else:
1836         args.update({
1837             'mode': 'w',
1838             'encoding': 'utf-8',
1839         })
1840
1841     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843     try:
1844         with tf:
1845             json.dump(obj, tf)
1846         if sys.platform == 'win32':
1847             # Need to remove existing file on Windows, else os.rename raises
1848             # WindowsError or FileExistsError.
1849             try:
1850                 os.unlink(fn)
1851             except OSError:
1852                 pass
1853         try:
1854             mask = os.umask(0)
1855             os.umask(mask)
1856             os.chmod(tf.name, 0o666 & ~mask)
1857         except OSError:
1858             pass
1859         os.rename(tf.name, fn)
1860     except Exception:
1861         try:
1862             os.remove(tf.name)
1863         except OSError:
1864             pass
1865         raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         """ Find the xpath xpath[@key=val] """
1871         assert re.match(r'^[a-zA-Z_-]+$', key)
1872         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873         return node.find(expr)
1874 else:
1875     def find_xpath_attr(node, xpath, key, val=None):
1876         for f in node.findall(compat_xpath(xpath)):
1877             if key not in f.attrib:
1878                 continue
1879             if val is None or f.attrib.get(key) == val:
1880                 return f
1881         return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888     components = [c.split(':') for c in path.split('/')]
1889     replaced = []
1890     for c in components:
1891         if len(c) == 1:
1892             replaced.append(c[0])
1893         else:
1894             ns, tag = c
1895             replaced.append('{%s}%s' % (ns_map[ns], tag))
1896     return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900     def _find_xpath(xpath):
1901         return node.find(compat_xpath(xpath))
1902
1903     if isinstance(xpath, (str, compat_str)):
1904         n = _find_xpath(xpath)
1905     else:
1906         for xp in xpath:
1907             n = _find_xpath(xp)
1908             if n is not None:
1909                 break
1910
1911     if n is None:
1912         if default is not NO_DEFAULT:
1913             return default
1914         elif fatal:
1915             name = xpath if name is None else name
1916             raise ExtractorError('Could not find XML element %s' % name)
1917         else:
1918             return None
1919     return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924     if n is None or n == default:
1925         return n
1926     if n.text is None:
1927         if default is not NO_DEFAULT:
1928             return default
1929         elif fatal:
1930             name = xpath if name is None else name
1931             raise ExtractorError('Could not find XML element\'s text %s' % name)
1932         else:
1933             return None
1934     return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938     n = find_xpath_attr(node, xpath, key)
1939     if n is None:
1940         if default is not NO_DEFAULT:
1941             return default
1942         elif fatal:
1943             name = '%s[@%s]' % (xpath, key) if name is None else name
1944             raise ExtractorError('Could not find XML attribute %s' % name)
1945         else:
1946             return None
1947     return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951     """Return the content of the tag with the specified ID in the passed HTML document"""
1952     return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956     """Return the content of the first tag with the specified class in the passed HTML document"""
1957     retval = get_elements_by_class(class_name, html)
1958     return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963     return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968     return get_elements_by_attribute(
1969         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970         html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974     """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976     value = re.escape(value) if escape_value else value
1977
1978     retlist = []
1979     for m in re.finditer(r'''(?xs)
1980         <([a-zA-Z0-9:._-]+)
1981          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982          \s+%s=['"]?%s['"]?
1983          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984         \s*>
1985         (?P<content>.*?)
1986         </\1>
1987     ''' % (re.escape(attribute), value), html):
1988         res = m.group('content')
1989
1990         if res.startswith('"') or res.startswith("'"):
1991             res = res[1:-1]
1992
1993         retlist.append(unescapeHTML(res))
1994
1995     return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999     """Trivial HTML parser to gather the attributes for a single element"""
2000
2001     def __init__(self):
2002         self.attrs = {}
2003         compat_HTMLParser.__init__(self)
2004
2005     def handle_starttag(self, tag, attrs):
2006         self.attrs = dict(attrs)
2007
2008
2009 def extract_attributes(html_element):
2010     """Given a string for an HTML element such as
2011     <el
2012          a="foo" B="bar" c="&98;az" d=boz
2013          empty= noval entity="&amp;"
2014          sq='"' dq="'"
2015     >
2016     Decode and return a dictionary of attributes.
2017     {
2018         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019         'empty': '', 'noval': None, 'entity': '&',
2020         'sq': '"', 'dq': '\''
2021     }.
2022     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024     """
2025     parser = HTMLAttributeParser()
2026     try:
2027         parser.feed(html_element)
2028         parser.close()
2029     # Older Python may throw HTMLParseError in case of malformed HTML
2030     except compat_HTMLParseError:
2031         pass
2032     return parser.attrs
2033
2034
2035 def clean_html(html):
2036     """Clean an HTML snippet into a readable string"""
2037
2038     if html is None:  # Convenience for sanitizing descriptions etc.
2039         return html
2040
2041     # Newline vs <br />
2042     html = html.replace('\n', ' ')
2043     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2045     # Strip html tags
2046     html = re.sub('<.*?>', '', html)
2047     # Replace html entities
2048     html = unescapeHTML(html)
2049     return html.strip()
2050
2051
2052 def sanitize_open(filename, open_mode):
2053     """Try to open the given filename, and slightly tweak it if this fails.
2054
2055     Attempts to open the given filename. If this fails, it tries to change
2056     the filename slightly, step by step, until it's either able to open it
2057     or it fails and raises a final exception, like the standard open()
2058     function.
2059
2060     It returns the tuple (stream, definitive_file_name).
2061     """
2062     try:
2063         if filename == '-':
2064             if sys.platform == 'win32':
2065                 import msvcrt
2066                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068         stream = open(encodeFilename(filename), open_mode)
2069         return (stream, filename)
2070     except (IOError, OSError) as err:
2071         if err.errno in (errno.EACCES,):
2072             raise
2073
2074         # In case of error, try to remove win32 forbidden chars
2075         alt_filename = sanitize_path(filename)
2076         if alt_filename == filename:
2077             raise
2078         else:
2079             # An exception here should be caught in the caller
2080             stream = open(encodeFilename(alt_filename), open_mode)
2081             return (stream, alt_filename)
2082
2083
2084 def timeconvert(timestr):
2085     """Convert RFC 2822 defined time string into system timestamp"""
2086     timestamp = None
2087     timetuple = email.utils.parsedate_tz(timestr)
2088     if timetuple is not None:
2089         timestamp = email.utils.mktime_tz(timetuple)
2090     return timestamp
2091
2092
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094     """Sanitizes a string so it could be used as part of a filename.
2095     If restricted is set, use a stricter subset of allowed characters.
2096     Set is_id if this is not an arbitrary string, but an ID that should be kept
2097     if possible.
2098     """
2099     def replace_insane(char):
2100         if restricted and char in ACCENT_CHARS:
2101             return ACCENT_CHARS[char]
2102         elif not restricted and char == '\n':
2103             return ' '
2104         elif char == '?' or ord(char) < 32 or ord(char) == 127:
2105             return ''
2106         elif char == '"':
2107             return '' if restricted else '\''
2108         elif char == ':':
2109             return '_-' if restricted else ' -'
2110         elif char in '\\/|*<>':
2111             return '_'
2112         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2113             return '_'
2114         if restricted and ord(char) > 127:
2115             return '_'
2116         return char
2117
2118     if s == '':
2119         return ''
2120     # Handle timestamps
2121     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2122     result = ''.join(map(replace_insane, s))
2123     if not is_id:
2124         while '__' in result:
2125             result = result.replace('__', '_')
2126         result = result.strip('_')
2127         # Common case of "Foreign band name - English song title"
2128         if restricted and result.startswith('-_'):
2129             result = result[2:]
2130         if result.startswith('-'):
2131             result = '_' + result[len('-'):]
2132         result = result.lstrip('.')
2133         if not result:
2134             result = '_'
2135     return result
2136
2137
2138 def sanitize_path(s, force=False):
2139     """Sanitizes and normalizes path on Windows"""
2140     if sys.platform == 'win32':
2141         force = False
2142         drive_or_unc, _ = os.path.splitdrive(s)
2143         if sys.version_info < (2, 7) and not drive_or_unc:
2144             drive_or_unc, _ = os.path.splitunc(s)
2145     elif force:
2146         drive_or_unc = ''
2147     else:
2148         return s
2149
2150     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151     if drive_or_unc:
2152         norm_path.pop(0)
2153     sanitized_path = [
2154         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2155         for path_part in norm_path]
2156     if drive_or_unc:
2157         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2158     elif force and s[0] == os.path.sep:
2159         sanitized_path.insert(0, os.path.sep)
2160     return os.path.join(*sanitized_path)
2161
2162
2163 def sanitize_url(url):
2164     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165     # the number of unwanted failures due to missing protocol
2166     if url.startswith('//'):
2167         return 'http:%s' % url
2168     # Fix some common typos seen so far
2169     COMMON_TYPOS = (
2170         # https://github.com/ytdl-org/youtube-dl/issues/15649
2171         (r'^httpss://', r'https://'),
2172         # https://bx1.be/lives/direct-tv/
2173         (r'^rmtp([es]?)://', r'rtmp\1://'),
2174     )
2175     for mistake, fixup in COMMON_TYPOS:
2176         if re.match(mistake, url):
2177             return re.sub(mistake, fixup, url)
2178     return url
2179
2180
2181 def extract_basic_auth(url):
2182     parts = compat_urlparse.urlsplit(url)
2183     if parts.username is None:
2184         return url, None
2185     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2186         parts.hostname if parts.port is None
2187         else '%s:%d' % (parts.hostname, parts.port))))
2188     auth_payload = base64.b64encode(
2189         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2190     return url, 'Basic ' + auth_payload.decode('utf-8')
2191
2192
2193 def sanitized_Request(url, *args, **kwargs):
2194     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2195     if auth_header is not None:
2196         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2197         headers['Authorization'] = auth_header
2198     return compat_urllib_request.Request(url, *args, **kwargs)
2199
2200
2201 def expand_path(s):
2202     """Expand shell variables and ~"""
2203     return os.path.expandvars(compat_expanduser(s))
2204
2205
2206 def orderedSet(iterable):
2207     """ Remove all duplicates from the input iterable """
2208     res = []
2209     for el in iterable:
2210         if el not in res:
2211             res.append(el)
2212     return res
2213
2214
2215 def _htmlentity_transform(entity_with_semicolon):
2216     """Transforms an HTML entity to a character."""
2217     entity = entity_with_semicolon[:-1]
2218
2219     # Known non-numeric HTML entity
2220     if entity in compat_html_entities.name2codepoint:
2221         return compat_chr(compat_html_entities.name2codepoint[entity])
2222
2223     # TODO: HTML5 allows entities without a semicolon. For example,
2224     # '&Eacuteric' should be decoded as 'Éric'.
2225     if entity_with_semicolon in compat_html_entities_html5:
2226         return compat_html_entities_html5[entity_with_semicolon]
2227
2228     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2229     if mobj is not None:
2230         numstr = mobj.group(1)
2231         if numstr.startswith('x'):
2232             base = 16
2233             numstr = '0%s' % numstr
2234         else:
2235             base = 10
2236         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2237         try:
2238             return compat_chr(int(numstr, base))
2239         except ValueError:
2240             pass
2241
2242     # Unknown entity in name, return its literal representation
2243     return '&%s;' % entity
2244
2245
2246 def unescapeHTML(s):
2247     if s is None:
2248         return None
2249     assert type(s) == compat_str
2250
2251     return re.sub(
2252         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2253
2254
2255 def escapeHTML(text):
2256     return (
2257         text
2258         .replace('&', '&amp;')
2259         .replace('<', '&lt;')
2260         .replace('>', '&gt;')
2261         .replace('"', '&quot;')
2262         .replace("'", '&#39;')
2263     )
2264
2265
2266 def process_communicate_or_kill(p, *args, **kwargs):
2267     try:
2268         return p.communicate(*args, **kwargs)
2269     except BaseException:  # Including KeyboardInterrupt
2270         p.kill()
2271         p.wait()
2272         raise
2273
2274
2275 class Popen(subprocess.Popen):
2276     if sys.platform == 'win32':
2277         _startupinfo = subprocess.STARTUPINFO()
2278         _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2279     else:
2280         _startupinfo = None
2281
2282     def __init__(self, *args, **kwargs):
2283         super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2284
2285     def communicate_or_kill(self, *args, **kwargs):
2286         return process_communicate_or_kill(self, *args, **kwargs)
2287
2288
2289 def get_subprocess_encoding():
2290     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2291         # For subprocess calls, encode with locale encoding
2292         # Refer to http://stackoverflow.com/a/9951851/35070
2293         encoding = preferredencoding()
2294     else:
2295         encoding = sys.getfilesystemencoding()
2296     if encoding is None:
2297         encoding = 'utf-8'
2298     return encoding
2299
2300
2301 def encodeFilename(s, for_subprocess=False):
2302     """
2303     @param s The name of the file
2304     """
2305
2306     assert type(s) == compat_str
2307
2308     # Python 3 has a Unicode API
2309     if sys.version_info >= (3, 0):
2310         return s
2311
2312     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2316         return s
2317
2318     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319     if sys.platform.startswith('java'):
2320         return s
2321
2322     return s.encode(get_subprocess_encoding(), 'ignore')
2323
2324
2325 def decodeFilename(b, for_subprocess=False):
2326
2327     if sys.version_info >= (3, 0):
2328         return b
2329
2330     if not isinstance(b, bytes):
2331         return b
2332
2333     return b.decode(get_subprocess_encoding(), 'ignore')
2334
2335
2336 def encodeArgument(s):
2337     if not isinstance(s, compat_str):
2338         # Legacy code that uses byte strings
2339         # Uncomment the following line after fixing all post processors
2340         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2341         s = s.decode('ascii')
2342     return encodeFilename(s, True)
2343
2344
2345 def decodeArgument(b):
2346     return decodeFilename(b, True)
2347
2348
2349 def decodeOption(optval):
2350     if optval is None:
2351         return optval
2352     if isinstance(optval, bytes):
2353         optval = optval.decode(preferredencoding())
2354
2355     assert isinstance(optval, compat_str)
2356     return optval
2357
2358
2359 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2360
2361
2362 def timetuple_from_msec(msec):
2363     secs, msec = divmod(msec, 1000)
2364     mins, secs = divmod(secs, 60)
2365     hrs, mins = divmod(mins, 60)
2366     return _timetuple(hrs, mins, secs, msec)
2367
2368
2369 def formatSeconds(secs, delim=':', msec=False):
2370     time = timetuple_from_msec(secs * 1000)
2371     if time.hours:
2372         ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2373     elif time.minutes:
2374         ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2375     else:
2376         ret = '%d' % time.seconds
2377     return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2378
2379
2380 def _ssl_load_windows_store_certs(ssl_context, storename):
2381     # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2382     try:
2383         certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2384                  if encoding == 'x509_asn' and (
2385                      trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2386     except PermissionError:
2387         return
2388     for cert in certs:
2389         try:
2390             ssl_context.load_verify_locations(cadata=cert)
2391         except ssl.SSLError:
2392             pass
2393
2394
2395 def make_HTTPS_handler(params, **kwargs):
2396     opts_check_certificate = not params.get('nocheckcertificate')
2397     context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2398     context.check_hostname = opts_check_certificate
2399     context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2400     if opts_check_certificate:
2401         try:
2402             context.load_default_certs()
2403             # Work around the issue in load_default_certs when there are bad certificates. See:
2404             # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405             # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406         except ssl.SSLError:
2407             # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408             if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2409                 # Create a new context to discard any certificates that were already loaded
2410                 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2411                 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2412                 for storename in ('CA', 'ROOT'):
2413                     _ssl_load_windows_store_certs(context, storename)
2414             context.set_default_verify_paths()
2415     return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2416
2417
2418 def bug_reports_message(before=';'):
2419     if ytdl_is_updateable():
2420         update_cmd = 'type  yt-dlp -U  to update'
2421     else:
2422         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2423     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2424     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2425     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2426
2427     before = before.rstrip()
2428     if not before or before.endswith(('.', '!', '?')):
2429         msg = msg[0].title() + msg[1:]
2430
2431     return (before + ' ' if before else '') + msg
2432
2433
2434 class YoutubeDLError(Exception):
2435     """Base exception for YoutubeDL errors."""
2436     pass
2437
2438
2439 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2440 if hasattr(ssl, 'CertificateError'):
2441     network_exceptions.append(ssl.CertificateError)
2442 network_exceptions = tuple(network_exceptions)
2443
2444
2445 class ExtractorError(YoutubeDLError):
2446     """Error during info extraction."""
2447
2448     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2449         """ tb, if given, is the original traceback (so that it can be printed out).
2450         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2451         """
2452         if sys.exc_info()[0] in network_exceptions:
2453             expected = True
2454
2455         self.msg = str(msg)
2456         self.traceback = tb
2457         self.expected = expected
2458         self.cause = cause
2459         self.video_id = video_id
2460         self.ie = ie
2461         self.exc_info = sys.exc_info()  # preserve original exception
2462
2463         super(ExtractorError, self).__init__(''.join((
2464             format_field(ie, template='[%s] '),
2465             format_field(video_id, template='%s: '),
2466             self.msg,
2467             format_field(cause, template=' (caused by %r)'),
2468             '' if expected else bug_reports_message())))
2469
2470     def format_traceback(self):
2471         if self.traceback is None:
2472             return None
2473         return ''.join(traceback.format_tb(self.traceback))
2474
2475
2476 class UnsupportedError(ExtractorError):
2477     def __init__(self, url):
2478         super(UnsupportedError, self).__init__(
2479             'Unsupported URL: %s' % url, expected=True)
2480         self.url = url
2481
2482
2483 class RegexNotFoundError(ExtractorError):
2484     """Error when a regex didn't match"""
2485     pass
2486
2487
2488 class GeoRestrictedError(ExtractorError):
2489     """Geographic restriction Error exception.
2490
2491     This exception may be thrown when a video is not available from your
2492     geographic location due to geographic restrictions imposed by a website.
2493     """
2494
2495     def __init__(self, msg, countries=None, **kwargs):
2496         kwargs['expected'] = True
2497         super(GeoRestrictedError, self).__init__(msg, **kwargs)
2498         self.countries = countries
2499
2500
2501 class DownloadError(YoutubeDLError):
2502     """Download Error exception.
2503
2504     This exception may be thrown by FileDownloader objects if they are not
2505     configured to continue on errors. They will contain the appropriate
2506     error message.
2507     """
2508
2509     def __init__(self, msg, exc_info=None):
2510         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511         super(DownloadError, self).__init__(msg)
2512         self.exc_info = exc_info
2513
2514
2515 class EntryNotInPlaylist(YoutubeDLError):
2516     """Entry not in playlist exception.
2517
2518     This exception will be thrown by YoutubeDL when a requested entry
2519     is not found in the playlist info_dict
2520     """
2521     pass
2522
2523
2524 class SameFileError(YoutubeDLError):
2525     """Same File exception.
2526
2527     This exception will be thrown by FileDownloader objects if they detect
2528     multiple files would have to be downloaded to the same file on disk.
2529     """
2530     pass
2531
2532
2533 class PostProcessingError(YoutubeDLError):
2534     """Post Processing exception.
2535
2536     This exception may be raised by PostProcessor's .run() method to
2537     indicate an error in the postprocessing task.
2538     """
2539
2540     def __init__(self, msg):
2541         super(PostProcessingError, self).__init__(msg)
2542         self.msg = msg
2543
2544
2545 class DownloadCancelled(YoutubeDLError):
2546     """ Exception raised when the download queue should be interrupted """
2547     msg = 'The download was cancelled'
2548
2549     def __init__(self, msg=None):
2550         if msg is not None:
2551             self.msg = msg
2552         YoutubeDLError.__init__(self, self.msg)
2553
2554
2555 class ExistingVideoReached(DownloadCancelled):
2556     """ --break-on-existing triggered """
2557     msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2558
2559
2560 class RejectedVideoReached(DownloadCancelled):
2561     """ --break-on-reject triggered """
2562     msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2563
2564
2565 class MaxDownloadsReached(DownloadCancelled):
2566     """ --max-downloads limit has been reached. """
2567     msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2568
2569
2570 class ThrottledDownload(YoutubeDLError):
2571     """ Download speed below --throttled-rate. """
2572     pass
2573
2574
2575 class UnavailableVideoError(YoutubeDLError):
2576     """Unavailable Format exception.
2577
2578     This exception will be thrown when a video is requested
2579     in a format that is not available for that video.
2580     """
2581     pass
2582
2583
2584 class ContentTooShortError(YoutubeDLError):
2585     """Content Too Short exception.
2586
2587     This exception may be raised by FileDownloader objects when a file they
2588     download is too small for what the server announced first, indicating
2589     the connection was probably interrupted.
2590     """
2591
2592     def __init__(self, downloaded, expected):
2593         super(ContentTooShortError, self).__init__(
2594             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2595         )
2596         # Both in bytes
2597         self.downloaded = downloaded
2598         self.expected = expected
2599
2600
2601 class XAttrMetadataError(YoutubeDLError):
2602     def __init__(self, code=None, msg='Unknown error'):
2603         super(XAttrMetadataError, self).__init__(msg)
2604         self.code = code
2605         self.msg = msg
2606
2607         # Parsing code and msg
2608         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2609                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2610             self.reason = 'NO_SPACE'
2611         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2612             self.reason = 'VALUE_TOO_LONG'
2613         else:
2614             self.reason = 'NOT_SUPPORTED'
2615
2616
2617 class XAttrUnavailableError(YoutubeDLError):
2618     pass
2619
2620
2621 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2622     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2623     # expected HTTP responses to meet HTTP/1.0 or later (see also
2624     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2625     if sys.version_info < (3, 0):
2626         kwargs['strict'] = True
2627     hc = http_class(*args, **compat_kwargs(kwargs))
2628     source_address = ydl_handler._params.get('source_address')
2629
2630     if source_address is not None:
2631         # This is to workaround _create_connection() from socket where it will try all
2632         # address data from getaddrinfo() including IPv6. This filters the result from
2633         # getaddrinfo() based on the source_address value.
2634         # This is based on the cpython socket.create_connection() function.
2635         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2636         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2637             host, port = address
2638             err = None
2639             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2640             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2641             ip_addrs = [addr for addr in addrs if addr[0] == af]
2642             if addrs and not ip_addrs:
2643                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2644                 raise socket.error(
2645                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2646                     % (ip_version, source_address[0]))
2647             for res in ip_addrs:
2648                 af, socktype, proto, canonname, sa = res
2649                 sock = None
2650                 try:
2651                     sock = socket.socket(af, socktype, proto)
2652                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2653                         sock.settimeout(timeout)
2654                     sock.bind(source_address)
2655                     sock.connect(sa)
2656                     err = None  # Explicitly break reference cycle
2657                     return sock
2658                 except socket.error as _:
2659                     err = _
2660                     if sock is not None:
2661                         sock.close()
2662             if err is not None:
2663                 raise err
2664             else:
2665                 raise socket.error('getaddrinfo returns an empty list')
2666         if hasattr(hc, '_create_connection'):
2667             hc._create_connection = _create_connection
2668         sa = (source_address, 0)
2669         if hasattr(hc, 'source_address'):  # Python 2.7+
2670             hc.source_address = sa
2671         else:  # Python 2.6
2672             def _hc_connect(self, *args, **kwargs):
2673                 sock = _create_connection(
2674                     (self.host, self.port), self.timeout, sa)
2675                 if is_https:
2676                     self.sock = ssl.wrap_socket(
2677                         sock, self.key_file, self.cert_file,
2678                         ssl_version=ssl.PROTOCOL_TLSv1)
2679                 else:
2680                     self.sock = sock
2681             hc.connect = functools.partial(_hc_connect, hc)
2682
2683     return hc
2684
2685
2686 def handle_youtubedl_headers(headers):
2687     filtered_headers = headers
2688
2689     if 'Youtubedl-no-compression' in filtered_headers:
2690         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2691         del filtered_headers['Youtubedl-no-compression']
2692
2693     return filtered_headers
2694
2695
2696 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2697     """Handler for HTTP requests and responses.
2698
2699     This class, when installed with an OpenerDirector, automatically adds
2700     the standard headers to every HTTP request and handles gzipped and
2701     deflated responses from web servers. If compression is to be avoided in
2702     a particular request, the original request in the program code only has
2703     to include the HTTP header "Youtubedl-no-compression", which will be
2704     removed before making the real request.
2705
2706     Part of this code was copied from:
2707
2708     http://techknack.net/python-urllib2-handlers/
2709
2710     Andrew Rowls, the author of that code, agreed to release it to the
2711     public domain.
2712     """
2713
2714     def __init__(self, params, *args, **kwargs):
2715         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2716         self._params = params
2717
2718     def http_open(self, req):
2719         conn_class = compat_http_client.HTTPConnection
2720
2721         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2722         if socks_proxy:
2723             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2724             del req.headers['Ytdl-socks-proxy']
2725
2726         return self.do_open(functools.partial(
2727             _create_http_connection, self, conn_class, False),
2728             req)
2729
2730     @staticmethod
2731     def deflate(data):
2732         if not data:
2733             return data
2734         try:
2735             return zlib.decompress(data, -zlib.MAX_WBITS)
2736         except zlib.error:
2737             return zlib.decompress(data)
2738
2739     def http_request(self, req):
2740         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2741         # always respected by websites, some tend to give out URLs with non percent-encoded
2742         # non-ASCII characters (see telemb.py, ard.py [#3412])
2743         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2744         # To work around aforementioned issue we will replace request's original URL with
2745         # percent-encoded one
2746         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2747         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2748         url = req.get_full_url()
2749         url_escaped = escape_url(url)
2750
2751         # Substitute URL if any change after escaping
2752         if url != url_escaped:
2753             req = update_Request(req, url=url_escaped)
2754
2755         for h, v in std_headers.items():
2756             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2757             # The dict keys are capitalized because of this bug by urllib
2758             if h.capitalize() not in req.headers:
2759                 req.add_header(h, v)
2760
2761         req.headers = handle_youtubedl_headers(req.headers)
2762
2763         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2764             # Python 2.6 is brain-dead when it comes to fragments
2765             req._Request__original = req._Request__original.partition('#')[0]
2766             req._Request__r_type = req._Request__r_type.partition('#')[0]
2767
2768         return req
2769
2770     def http_response(self, req, resp):
2771         old_resp = resp
2772         # gzip
2773         if resp.headers.get('Content-encoding', '') == 'gzip':
2774             content = resp.read()
2775             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2776             try:
2777                 uncompressed = io.BytesIO(gz.read())
2778             except IOError as original_ioerror:
2779                 # There may be junk add the end of the file
2780                 # See http://stackoverflow.com/q/4928560/35070 for details
2781                 for i in range(1, 1024):
2782                     try:
2783                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2784                         uncompressed = io.BytesIO(gz.read())
2785                     except IOError:
2786                         continue
2787                     break
2788                 else:
2789                     raise original_ioerror
2790             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2791             resp.msg = old_resp.msg
2792             del resp.headers['Content-encoding']
2793         # deflate
2794         if resp.headers.get('Content-encoding', '') == 'deflate':
2795             gz = io.BytesIO(self.deflate(resp.read()))
2796             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2797             resp.msg = old_resp.msg
2798             del resp.headers['Content-encoding']
2799         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2800         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2801         if 300 <= resp.code < 400:
2802             location = resp.headers.get('Location')
2803             if location:
2804                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2805                 if sys.version_info >= (3, 0):
2806                     location = location.encode('iso-8859-1').decode('utf-8')
2807                 else:
2808                     location = location.decode('utf-8')
2809                 location_escaped = escape_url(location)
2810                 if location != location_escaped:
2811                     del resp.headers['Location']
2812                     if sys.version_info < (3, 0):
2813                         location_escaped = location_escaped.encode('utf-8')
2814                     resp.headers['Location'] = location_escaped
2815         return resp
2816
2817     https_request = http_request
2818     https_response = http_response
2819
2820
2821 def make_socks_conn_class(base_class, socks_proxy):
2822     assert issubclass(base_class, (
2823         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2824
2825     url_components = compat_urlparse.urlparse(socks_proxy)
2826     if url_components.scheme.lower() == 'socks5':
2827         socks_type = ProxyType.SOCKS5
2828     elif url_components.scheme.lower() in ('socks', 'socks4'):
2829         socks_type = ProxyType.SOCKS4
2830     elif url_components.scheme.lower() == 'socks4a':
2831         socks_type = ProxyType.SOCKS4A
2832
2833     def unquote_if_non_empty(s):
2834         if not s:
2835             return s
2836         return compat_urllib_parse_unquote_plus(s)
2837
2838     proxy_args = (
2839         socks_type,
2840         url_components.hostname, url_components.port or 1080,
2841         True,  # Remote DNS
2842         unquote_if_non_empty(url_components.username),
2843         unquote_if_non_empty(url_components.password),
2844     )
2845
2846     class SocksConnection(base_class):
2847         def connect(self):
2848             self.sock = sockssocket()
2849             self.sock.setproxy(*proxy_args)
2850             if type(self.timeout) in (int, float):
2851                 self.sock.settimeout(self.timeout)
2852             self.sock.connect((self.host, self.port))
2853
2854             if isinstance(self, compat_http_client.HTTPSConnection):
2855                 if hasattr(self, '_context'):  # Python > 2.6
2856                     self.sock = self._context.wrap_socket(
2857                         self.sock, server_hostname=self.host)
2858                 else:
2859                     self.sock = ssl.wrap_socket(self.sock)
2860
2861     return SocksConnection
2862
2863
2864 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2865     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2866         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2867         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2868         self._params = params
2869
2870     def https_open(self, req):
2871         kwargs = {}
2872         conn_class = self._https_conn_class
2873
2874         if hasattr(self, '_context'):  # python > 2.6
2875             kwargs['context'] = self._context
2876         if hasattr(self, '_check_hostname'):  # python 3.x
2877             kwargs['check_hostname'] = self._check_hostname
2878
2879         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2880         if socks_proxy:
2881             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2882             del req.headers['Ytdl-socks-proxy']
2883
2884         return self.do_open(functools.partial(
2885             _create_http_connection, self, conn_class, True),
2886             req, **kwargs)
2887
2888
2889 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2890     """
2891     See [1] for cookie file format.
2892
2893     1. https://curl.haxx.se/docs/http-cookies.html
2894     """
2895     _HTTPONLY_PREFIX = '#HttpOnly_'
2896     _ENTRY_LEN = 7
2897     _HEADER = '''# Netscape HTTP Cookie File
2898 # This file is generated by yt-dlp.  Do not edit.
2899
2900 '''
2901     _CookieFileEntry = collections.namedtuple(
2902         'CookieFileEntry',
2903         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2904
2905     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2906         """
2907         Save cookies to a file.
2908
2909         Most of the code is taken from CPython 3.8 and slightly adapted
2910         to support cookie files with UTF-8 in both python 2 and 3.
2911         """
2912         if filename is None:
2913             if self.filename is not None:
2914                 filename = self.filename
2915             else:
2916                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2917
2918         # Store session cookies with `expires` set to 0 instead of an empty
2919         # string
2920         for cookie in self:
2921             if cookie.expires is None:
2922                 cookie.expires = 0
2923
2924         with io.open(filename, 'w', encoding='utf-8') as f:
2925             f.write(self._HEADER)
2926             now = time.time()
2927             for cookie in self:
2928                 if not ignore_discard and cookie.discard:
2929                     continue
2930                 if not ignore_expires and cookie.is_expired(now):
2931                     continue
2932                 if cookie.secure:
2933                     secure = 'TRUE'
2934                 else:
2935                     secure = 'FALSE'
2936                 if cookie.domain.startswith('.'):
2937                     initial_dot = 'TRUE'
2938                 else:
2939                     initial_dot = 'FALSE'
2940                 if cookie.expires is not None:
2941                     expires = compat_str(cookie.expires)
2942                 else:
2943                     expires = ''
2944                 if cookie.value is None:
2945                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2946                     # with no name, whereas http.cookiejar regards it as a
2947                     # cookie with no value.
2948                     name = ''
2949                     value = cookie.name
2950                 else:
2951                     name = cookie.name
2952                     value = cookie.value
2953                 f.write(
2954                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2955                                secure, expires, name, value]) + '\n')
2956
2957     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2958         """Load cookies from a file."""
2959         if filename is None:
2960             if self.filename is not None:
2961                 filename = self.filename
2962             else:
2963                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2964
2965         def prepare_line(line):
2966             if line.startswith(self._HTTPONLY_PREFIX):
2967                 line = line[len(self._HTTPONLY_PREFIX):]
2968             # comments and empty lines are fine
2969             if line.startswith('#') or not line.strip():
2970                 return line
2971             cookie_list = line.split('\t')
2972             if len(cookie_list) != self._ENTRY_LEN:
2973                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2974             cookie = self._CookieFileEntry(*cookie_list)
2975             if cookie.expires_at and not cookie.expires_at.isdigit():
2976                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2977             return line
2978
2979         cf = io.StringIO()
2980         with io.open(filename, encoding='utf-8') as f:
2981             for line in f:
2982                 try:
2983                     cf.write(prepare_line(line))
2984                 except compat_cookiejar.LoadError as e:
2985                     write_string(
2986                         'WARNING: skipping cookie file entry due to %s: %r\n'
2987                         % (e, line), sys.stderr)
2988                     continue
2989         cf.seek(0)
2990         self._really_load(cf, filename, ignore_discard, ignore_expires)
2991         # Session cookies are denoted by either `expires` field set to
2992         # an empty string or 0. MozillaCookieJar only recognizes the former
2993         # (see [1]). So we need force the latter to be recognized as session
2994         # cookies on our own.
2995         # Session cookies may be important for cookies-based authentication,
2996         # e.g. usually, when user does not check 'Remember me' check box while
2997         # logging in on a site, some important cookies are stored as session
2998         # cookies so that not recognizing them will result in failed login.
2999         # 1. https://bugs.python.org/issue17164
3000         for cookie in self:
3001             # Treat `expires=0` cookies as session cookies
3002             if cookie.expires == 0:
3003                 cookie.expires = None
3004                 cookie.discard = True
3005
3006
3007 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3008     def __init__(self, cookiejar=None):
3009         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3010
3011     def http_response(self, request, response):
3012         # Python 2 will choke on next HTTP request in row if there are non-ASCII
3013         # characters in Set-Cookie HTTP header of last response (see
3014         # https://github.com/ytdl-org/youtube-dl/issues/6769).
3015         # In order to at least prevent crashing we will percent encode Set-Cookie
3016         # header before HTTPCookieProcessor starts processing it.
3017         # if sys.version_info < (3, 0) and response.headers:
3018         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3019         #         set_cookie = response.headers.get(set_cookie_header)
3020         #         if set_cookie:
3021         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3022         #             if set_cookie != set_cookie_escaped:
3023         #                 del response.headers[set_cookie_header]
3024         #                 response.headers[set_cookie_header] = set_cookie_escaped
3025         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3026
3027     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3028     https_response = http_response
3029
3030
3031 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3032     """YoutubeDL redirect handler
3033
3034     The code is based on HTTPRedirectHandler implementation from CPython [1].
3035
3036     This redirect handler solves two issues:
3037      - ensures redirect URL is always unicode under python 2
3038      - introduces support for experimental HTTP response status code
3039        308 Permanent Redirect [2] used by some sites [3]
3040
3041     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3042     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3043     3. https://github.com/ytdl-org/youtube-dl/issues/28768
3044     """
3045
3046     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3047
3048     def redirect_request(self, req, fp, code, msg, headers, newurl):
3049         """Return a Request or None in response to a redirect.
3050
3051         This is called by the http_error_30x methods when a
3052         redirection response is received.  If a redirection should
3053         take place, return a new Request to allow http_error_30x to
3054         perform the redirect.  Otherwise, raise HTTPError if no-one
3055         else should try to handle this url.  Return None if you can't
3056         but another Handler might.
3057         """
3058         m = req.get_method()
3059         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3060                  or code in (301, 302, 303) and m == "POST")):
3061             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3062         # Strictly (according to RFC 2616), 301 or 302 in response to
3063         # a POST MUST NOT cause a redirection without confirmation
3064         # from the user (of urllib.request, in this case).  In practice,
3065         # essentially all clients do redirect in this case, so we do
3066         # the same.
3067
3068         # On python 2 urlh.geturl() may sometimes return redirect URL
3069         # as byte string instead of unicode. This workaround allows
3070         # to force it always return unicode.
3071         if sys.version_info[0] < 3:
3072             newurl = compat_str(newurl)
3073
3074         # Be conciliant with URIs containing a space.  This is mainly
3075         # redundant with the more complete encoding done in http_error_302(),
3076         # but it is kept for compatibility with other callers.
3077         newurl = newurl.replace(' ', '%20')
3078
3079         CONTENT_HEADERS = ("content-length", "content-type")
3080         # NB: don't use dict comprehension for python 2.6 compatibility
3081         newheaders = dict((k, v) for k, v in req.headers.items()
3082                           if k.lower() not in CONTENT_HEADERS)
3083         return compat_urllib_request.Request(
3084             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3085             unverifiable=True)
3086
3087
3088 def extract_timezone(date_str):
3089     m = re.search(
3090         r'''(?x)
3091             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
3092             (?P<tz>Z|                                            # just the UTC Z, or
3093                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
3094                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3095                    [ ]?                                          # optional space
3096                 (?P<sign>\+|-)                                   # +/-
3097                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
3098             $)
3099         ''', date_str)
3100     if not m:
3101         timezone = datetime.timedelta()
3102     else:
3103         date_str = date_str[:-len(m.group('tz'))]
3104         if not m.group('sign'):
3105             timezone = datetime.timedelta()
3106         else:
3107             sign = 1 if m.group('sign') == '+' else -1
3108             timezone = datetime.timedelta(
3109                 hours=sign * int(m.group('hours')),
3110                 minutes=sign * int(m.group('minutes')))
3111     return timezone, date_str
3112
3113
3114 def parse_iso8601(date_str, delimiter='T', timezone=None):
3115     """ Return a UNIX timestamp from the given date """
3116
3117     if date_str is None:
3118         return None
3119
3120     date_str = re.sub(r'\.[0-9]+', '', date_str)
3121
3122     if timezone is None:
3123         timezone, date_str = extract_timezone(date_str)
3124
3125     try:
3126         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3127         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3128         return calendar.timegm(dt.timetuple())
3129     except ValueError:
3130         pass
3131
3132
3133 def date_formats(day_first=True):
3134     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3135
3136
3137 def unified_strdate(date_str, day_first=True):
3138     """Return a string with the date in the format YYYYMMDD"""
3139
3140     if date_str is None:
3141         return None
3142     upload_date = None
3143     # Replace commas
3144     date_str = date_str.replace(',', ' ')
3145     # Remove AM/PM + timezone
3146     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3147     _, date_str = extract_timezone(date_str)
3148
3149     for expression in date_formats(day_first):
3150         try:
3151             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3152         except ValueError:
3153             pass
3154     if upload_date is None:
3155         timetuple = email.utils.parsedate_tz(date_str)
3156         if timetuple:
3157             try:
3158                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3159             except ValueError:
3160                 pass
3161     if upload_date is not None:
3162         return compat_str(upload_date)
3163
3164
3165 def unified_timestamp(date_str, day_first=True):
3166     if date_str is None:
3167         return None
3168
3169     date_str = re.sub(r'[,|]', '', date_str)
3170
3171     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3172     timezone, date_str = extract_timezone(date_str)
3173
3174     # Remove AM/PM + timezone
3175     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3176
3177     # Remove unrecognized timezones from ISO 8601 alike timestamps
3178     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3179     if m:
3180         date_str = date_str[:-len(m.group('tz'))]
3181
3182     # Python only supports microseconds, so remove nanoseconds
3183     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3184     if m:
3185         date_str = m.group(1)
3186
3187     for expression in date_formats(day_first):
3188         try:
3189             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3190             return calendar.timegm(dt.timetuple())
3191         except ValueError:
3192             pass
3193     timetuple = email.utils.parsedate_tz(date_str)
3194     if timetuple:
3195         return calendar.timegm(timetuple) + pm_delta * 3600
3196
3197
3198 def determine_ext(url, default_ext='unknown_video'):
3199     if url is None or '.' not in url:
3200         return default_ext
3201     guess = url.partition('?')[0].rpartition('.')[2]
3202     if re.match(r'^[A-Za-z0-9]+$', guess):
3203         return guess
3204     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3205     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3206         return guess.rstrip('/')
3207     else:
3208         return default_ext
3209
3210
3211 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3212     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3213
3214
3215 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3216     """
3217     Return a datetime object from a string in the format YYYYMMDD or
3218     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3219
3220     format: string date format used to return datetime object from
3221     precision: round the time portion of a datetime object.
3222                 auto|microsecond|second|minute|hour|day.
3223                 auto: round to the unit provided in date_str (if applicable).
3224     """
3225     auto_precision = False
3226     if precision == 'auto':
3227         auto_precision = True
3228         precision = 'microsecond'
3229     today = datetime_round(datetime.datetime.now(), precision)
3230     if date_str in ('now', 'today'):
3231         return today
3232     if date_str == 'yesterday':
3233         return today - datetime.timedelta(days=1)
3234     match = re.match(
3235         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3236         date_str)
3237     if match is not None:
3238         start_time = datetime_from_str(match.group('start'), precision, format)
3239         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3240         unit = match.group('unit')
3241         if unit == 'month' or unit == 'year':
3242             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3243             unit = 'day'
3244         else:
3245             if unit == 'week':
3246                 unit = 'day'
3247                 time *= 7
3248             delta = datetime.timedelta(**{unit + 's': time})
3249             new_date = start_time + delta
3250         if auto_precision:
3251             return datetime_round(new_date, unit)
3252         return new_date
3253
3254     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3255
3256
3257 def date_from_str(date_str, format='%Y%m%d'):
3258     """
3259     Return a datetime object from a string in the format YYYYMMDD or
3260     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3261
3262     format: string date format used to return datetime object from
3263     """
3264     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3265
3266
3267 def datetime_add_months(dt, months):
3268     """Increment/Decrement a datetime object by months."""
3269     month = dt.month + months - 1
3270     year = dt.year + month // 12
3271     month = month % 12 + 1
3272     day = min(dt.day, calendar.monthrange(year, month)[1])
3273     return dt.replace(year, month, day)
3274
3275
3276 def datetime_round(dt, precision='day'):
3277     """
3278     Round a datetime object's time to a specific precision
3279     """
3280     if precision == 'microsecond':
3281         return dt
3282
3283     unit_seconds = {
3284         'day': 86400,
3285         'hour': 3600,
3286         'minute': 60,
3287         'second': 1,
3288     }
3289     roundto = lambda x, n: ((x + n / 2) // n) * n
3290     timestamp = calendar.timegm(dt.timetuple())
3291     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3292
3293
3294 def hyphenate_date(date_str):
3295     """
3296     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3297     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3298     if match is not None:
3299         return '-'.join(match.groups())
3300     else:
3301         return date_str
3302
3303
3304 class DateRange(object):
3305     """Represents a time interval between two dates"""
3306
3307     def __init__(self, start=None, end=None):
3308         """start and end must be strings in the format accepted by date"""
3309         if start is not None:
3310             self.start = date_from_str(start)
3311         else:
3312             self.start = datetime.datetime.min.date()
3313         if end is not None:
3314             self.end = date_from_str(end)
3315         else:
3316             self.end = datetime.datetime.max.date()
3317         if self.start > self.end:
3318             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3319
3320     @classmethod
3321     def day(cls, day):
3322         """Returns a range that only contains the given day"""
3323         return cls(day, day)
3324
3325     def __contains__(self, date):
3326         """Check if the date is in the range"""
3327         if not isinstance(date, datetime.date):
3328             date = date_from_str(date)
3329         return self.start <= date <= self.end
3330
3331     def __str__(self):
3332         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3333
3334
3335 def platform_name():
3336     """ Returns the platform name as a compat_str """
3337     res = platform.platform()
3338     if isinstance(res, bytes):
3339         res = res.decode(preferredencoding())
3340
3341     assert isinstance(res, compat_str)
3342     return res
3343
3344
3345 def get_windows_version():
3346     ''' Get Windows version. None if it's not running on Windows '''
3347     if compat_os_name == 'nt':
3348         return version_tuple(platform.win32_ver()[1])
3349     else:
3350         return None
3351
3352
3353 def _windows_write_string(s, out):
3354     """ Returns True if the string was written using special methods,
3355     False if it has yet to be written out."""
3356     # Adapted from http://stackoverflow.com/a/3259271/35070
3357
3358     import ctypes
3359     import ctypes.wintypes
3360
3361     WIN_OUTPUT_IDS = {
3362         1: -11,
3363         2: -12,
3364     }
3365
3366     try:
3367         fileno = out.fileno()
3368     except AttributeError:
3369         # If the output stream doesn't have a fileno, it's virtual
3370         return False
3371     except io.UnsupportedOperation:
3372         # Some strange Windows pseudo files?
3373         return False
3374     if fileno not in WIN_OUTPUT_IDS:
3375         return False
3376
3377     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3378         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3379         ('GetStdHandle', ctypes.windll.kernel32))
3380     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3381
3382     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3383         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3384         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3385         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3386     written = ctypes.wintypes.DWORD(0)
3387
3388     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3389     FILE_TYPE_CHAR = 0x0002
3390     FILE_TYPE_REMOTE = 0x8000
3391     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3392         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3393         ctypes.POINTER(ctypes.wintypes.DWORD))(
3394         ('GetConsoleMode', ctypes.windll.kernel32))
3395     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3396
3397     def not_a_console(handle):
3398         if handle == INVALID_HANDLE_VALUE or handle is None:
3399             return True
3400         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3401                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3402
3403     if not_a_console(h):
3404         return False
3405
3406     def next_nonbmp_pos(s):
3407         try:
3408             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3409         except StopIteration:
3410             return len(s)
3411
3412     while s:
3413         count = min(next_nonbmp_pos(s), 1024)
3414
3415         ret = WriteConsoleW(
3416             h, s, count if count else 2, ctypes.byref(written), None)
3417         if ret == 0:
3418             raise OSError('Failed to write string')
3419         if not count:  # We just wrote a non-BMP character
3420             assert written.value == 2
3421             s = s[1:]
3422         else:
3423             assert written.value > 0
3424             s = s[written.value:]
3425     return True
3426
3427
3428 def write_string(s, out=None, encoding=None):
3429     if out is None:
3430         out = sys.stderr
3431     assert type(s) == compat_str
3432
3433     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3434         if _windows_write_string(s, out):
3435             return
3436
3437     if ('b' in getattr(out, 'mode', '')
3438             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3439         byt = s.encode(encoding or preferredencoding(), 'ignore')
3440         out.write(byt)
3441     elif hasattr(out, 'buffer'):
3442         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3443         byt = s.encode(enc, 'ignore')
3444         out.buffer.write(byt)
3445     else:
3446         out.write(s)
3447     out.flush()
3448
3449
3450 def bytes_to_intlist(bs):
3451     if not bs:
3452         return []
3453     if isinstance(bs[0], int):  # Python 3
3454         return list(bs)
3455     else:
3456         return [ord(c) for c in bs]
3457
3458
3459 def intlist_to_bytes(xs):
3460     if not xs:
3461         return b''
3462     return compat_struct_pack('%dB' % len(xs), *xs)
3463
3464
3465 # Cross-platform file locking
3466 if sys.platform == 'win32':
3467     import ctypes.wintypes
3468     import msvcrt
3469
3470     class OVERLAPPED(ctypes.Structure):
3471         _fields_ = [
3472             ('Internal', ctypes.wintypes.LPVOID),
3473             ('InternalHigh', ctypes.wintypes.LPVOID),
3474             ('Offset', ctypes.wintypes.DWORD),
3475             ('OffsetHigh', ctypes.wintypes.DWORD),
3476             ('hEvent', ctypes.wintypes.HANDLE),
3477         ]
3478
3479     kernel32 = ctypes.windll.kernel32
3480     LockFileEx = kernel32.LockFileEx
3481     LockFileEx.argtypes = [
3482         ctypes.wintypes.HANDLE,     # hFile
3483         ctypes.wintypes.DWORD,      # dwFlags
3484         ctypes.wintypes.DWORD,      # dwReserved
3485         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3486         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3487         ctypes.POINTER(OVERLAPPED)  # Overlapped
3488     ]
3489     LockFileEx.restype = ctypes.wintypes.BOOL
3490     UnlockFileEx = kernel32.UnlockFileEx
3491     UnlockFileEx.argtypes = [
3492         ctypes.wintypes.HANDLE,     # hFile
3493         ctypes.wintypes.DWORD,      # dwReserved
3494         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3495         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3496         ctypes.POINTER(OVERLAPPED)  # Overlapped
3497     ]
3498     UnlockFileEx.restype = ctypes.wintypes.BOOL
3499     whole_low = 0xffffffff
3500     whole_high = 0x7fffffff
3501
3502     def _lock_file(f, exclusive):
3503         overlapped = OVERLAPPED()
3504         overlapped.Offset = 0
3505         overlapped.OffsetHigh = 0
3506         overlapped.hEvent = 0
3507         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3508         handle = msvcrt.get_osfhandle(f.fileno())
3509         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3510                           whole_low, whole_high, f._lock_file_overlapped_p):
3511             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3512
3513     def _unlock_file(f):
3514         assert f._lock_file_overlapped_p
3515         handle = msvcrt.get_osfhandle(f.fileno())
3516         if not UnlockFileEx(handle, 0,
3517                             whole_low, whole_high, f._lock_file_overlapped_p):
3518             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3519
3520 else:
3521     # Some platforms, such as Jython, is missing fcntl
3522     try:
3523         import fcntl
3524
3525         def _lock_file(f, exclusive):
3526             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3527
3528         def _unlock_file(f):
3529             fcntl.flock(f, fcntl.LOCK_UN)
3530     except ImportError:
3531         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3532
3533         def _lock_file(f, exclusive):
3534             raise IOError(UNSUPPORTED_MSG)
3535
3536         def _unlock_file(f):
3537             raise IOError(UNSUPPORTED_MSG)
3538
3539
3540 class locked_file(object):
3541     def __init__(self, filename, mode, encoding=None):
3542         assert mode in ['r', 'a', 'w']
3543         self.f = io.open(filename, mode, encoding=encoding)
3544         self.mode = mode
3545
3546     def __enter__(self):
3547         exclusive = self.mode != 'r'
3548         try:
3549             _lock_file(self.f, exclusive)
3550         except IOError:
3551             self.f.close()
3552             raise
3553         return self
3554
3555     def __exit__(self, etype, value, traceback):
3556         try:
3557             _unlock_file(self.f)
3558         finally:
3559             self.f.close()
3560
3561     def __iter__(self):
3562         return iter(self.f)
3563
3564     def write(self, *args):
3565         return self.f.write(*args)
3566
3567     def read(self, *args):
3568         return self.f.read(*args)
3569
3570
3571 def get_filesystem_encoding():
3572     encoding = sys.getfilesystemencoding()
3573     return encoding if encoding is not None else 'utf-8'
3574
3575
3576 def shell_quote(args):
3577     quoted_args = []
3578     encoding = get_filesystem_encoding()
3579     for a in args:
3580         if isinstance(a, bytes):
3581             # We may get a filename encoded with 'encodeFilename'
3582             a = a.decode(encoding)
3583         quoted_args.append(compat_shlex_quote(a))
3584     return ' '.join(quoted_args)
3585
3586
3587 def smuggle_url(url, data):
3588     """ Pass additional data in a URL for internal use. """
3589
3590     url, idata = unsmuggle_url(url, {})
3591     data.update(idata)
3592     sdata = compat_urllib_parse_urlencode(
3593         {'__youtubedl_smuggle': json.dumps(data)})
3594     return url + '#' + sdata
3595
3596
3597 def unsmuggle_url(smug_url, default=None):
3598     if '#__youtubedl_smuggle' not in smug_url:
3599         return smug_url, default
3600     url, _, sdata = smug_url.rpartition('#')
3601     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3602     data = json.loads(jsond)
3603     return url, data
3604
3605
3606 def format_bytes(bytes):
3607     if bytes is None:
3608         return 'N/A'
3609     if type(bytes) is str:
3610         bytes = float(bytes)
3611     if bytes == 0.0:
3612         exponent = 0
3613     else:
3614         exponent = int(math.log(bytes, 1024.0))
3615     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3616     converted = float(bytes) / float(1024 ** exponent)
3617     return '%.2f%s' % (converted, suffix)
3618
3619
3620 def lookup_unit_table(unit_table, s):
3621     units_re = '|'.join(re.escape(u) for u in unit_table)
3622     m = re.match(
3623         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3624     if not m:
3625         return None
3626     num_str = m.group('num').replace(',', '.')
3627     mult = unit_table[m.group('unit')]
3628     return int(float(num_str) * mult)
3629
3630
3631 def parse_filesize(s):
3632     if s is None:
3633         return None
3634
3635     # The lower-case forms are of course incorrect and unofficial,
3636     # but we support those too
3637     _UNIT_TABLE = {
3638         'B': 1,
3639         'b': 1,
3640         'bytes': 1,
3641         'KiB': 1024,
3642         'KB': 1000,
3643         'kB': 1024,
3644         'Kb': 1000,
3645         'kb': 1000,
3646         'kilobytes': 1000,
3647         'kibibytes': 1024,
3648         'MiB': 1024 ** 2,
3649         'MB': 1000 ** 2,
3650         'mB': 1024 ** 2,
3651         'Mb': 1000 ** 2,
3652         'mb': 1000 ** 2,
3653         'megabytes': 1000 ** 2,
3654         'mebibytes': 1024 ** 2,
3655         'GiB': 1024 ** 3,
3656         'GB': 1000 ** 3,
3657         'gB': 1024 ** 3,
3658         'Gb': 1000 ** 3,
3659         'gb': 1000 ** 3,
3660         'gigabytes': 1000 ** 3,
3661         'gibibytes': 1024 ** 3,
3662         'TiB': 1024 ** 4,
3663         'TB': 1000 ** 4,
3664         'tB': 1024 ** 4,
3665         'Tb': 1000 ** 4,
3666         'tb': 1000 ** 4,
3667         'terabytes': 1000 ** 4,
3668         'tebibytes': 1024 ** 4,
3669         'PiB': 1024 ** 5,
3670         'PB': 1000 ** 5,
3671         'pB': 1024 ** 5,
3672         'Pb': 1000 ** 5,
3673         'pb': 1000 ** 5,
3674         'petabytes': 1000 ** 5,
3675         'pebibytes': 1024 ** 5,
3676         'EiB': 1024 ** 6,
3677         'EB': 1000 ** 6,
3678         'eB': 1024 ** 6,
3679         'Eb': 1000 ** 6,
3680         'eb': 1000 ** 6,
3681         'exabytes': 1000 ** 6,
3682         'exbibytes': 1024 ** 6,
3683         'ZiB': 1024 ** 7,
3684         'ZB': 1000 ** 7,
3685         'zB': 1024 ** 7,
3686         'Zb': 1000 ** 7,
3687         'zb': 1000 ** 7,
3688         'zettabytes': 1000 ** 7,
3689         'zebibytes': 1024 ** 7,
3690         'YiB': 1024 ** 8,
3691         'YB': 1000 ** 8,
3692         'yB': 1024 ** 8,
3693         'Yb': 1000 ** 8,
3694         'yb': 1000 ** 8,
3695         'yottabytes': 1000 ** 8,
3696         'yobibytes': 1024 ** 8,
3697     }
3698
3699     return lookup_unit_table(_UNIT_TABLE, s)
3700
3701
3702 def parse_count(s):
3703     if s is None:
3704         return None
3705
3706     s = s.strip()
3707
3708     if re.match(r'^[\d,.]+$', s):
3709         return str_to_int(s)
3710
3711     _UNIT_TABLE = {
3712         'k': 1000,
3713         'K': 1000,
3714         'm': 1000 ** 2,
3715         'M': 1000 ** 2,
3716         'kk': 1000 ** 2,
3717         'KK': 1000 ** 2,
3718     }
3719
3720     return lookup_unit_table(_UNIT_TABLE, s)
3721
3722
3723 def parse_resolution(s):
3724     if s is None:
3725         return {}
3726
3727     mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3728     if mobj:
3729         return {
3730             'width': int(mobj.group('w')),
3731             'height': int(mobj.group('h')),
3732         }
3733
3734     mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3735     if mobj:
3736         return {'height': int(mobj.group(1))}
3737
3738     mobj = re.search(r'\b([48])[kK]\b', s)
3739     if mobj:
3740         return {'height': int(mobj.group(1)) * 540}
3741
3742     return {}
3743
3744
3745 def parse_bitrate(s):
3746     if not isinstance(s, compat_str):
3747         return
3748     mobj = re.search(r'\b(\d+)\s*kbps', s)
3749     if mobj:
3750         return int(mobj.group(1))
3751
3752
3753 def month_by_name(name, lang='en'):
3754     """ Return the number of a month by (locale-independently) English name """
3755
3756     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3757
3758     try:
3759         return month_names.index(name) + 1
3760     except ValueError:
3761         return None
3762
3763
3764 def month_by_abbreviation(abbrev):
3765     """ Return the number of a month by (locale-independently) English
3766         abbreviations """
3767
3768     try:
3769         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3770     except ValueError:
3771         return None
3772
3773
3774 def fix_xml_ampersands(xml_str):
3775     """Replace all the '&' by '&amp;' in XML"""
3776     return re.sub(
3777         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3778         '&amp;',
3779         xml_str)
3780
3781
3782 def setproctitle(title):
3783     assert isinstance(title, compat_str)
3784
3785     # ctypes in Jython is not complete
3786     # http://bugs.jython.org/issue2148
3787     if sys.platform.startswith('java'):
3788         return
3789
3790     try:
3791         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3792     except OSError:
3793         return
3794     except TypeError:
3795         # LoadLibrary in Windows Python 2.7.13 only expects
3796         # a bytestring, but since unicode_literals turns
3797         # every string into a unicode string, it fails.
3798         return
3799     title_bytes = title.encode('utf-8')
3800     buf = ctypes.create_string_buffer(len(title_bytes))
3801     buf.value = title_bytes
3802     try:
3803         libc.prctl(15, buf, 0, 0, 0)
3804     except AttributeError:
3805         return  # Strange libc, just skip this
3806
3807
3808 def remove_start(s, start):
3809     return s[len(start):] if s is not None and s.startswith(start) else s
3810
3811
3812 def remove_end(s, end):
3813     return s[:-len(end)] if s is not None and s.endswith(end) else s
3814
3815
3816 def remove_quotes(s):
3817     if s is None or len(s) < 2:
3818         return s
3819     for quote in ('"', "'", ):
3820         if s[0] == quote and s[-1] == quote:
3821             return s[1:-1]
3822     return s
3823
3824
3825 def get_domain(url):
3826     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3827     return domain.group('domain') if domain else None
3828
3829
3830 def url_basename(url):
3831     path = compat_urlparse.urlparse(url).path
3832     return path.strip('/').split('/')[-1]
3833
3834
3835 def base_url(url):
3836     return re.match(r'https?://[^?#&]+/', url).group()
3837
3838
3839 def urljoin(base, path):
3840     if isinstance(path, bytes):
3841         path = path.decode('utf-8')
3842     if not isinstance(path, compat_str) or not path:
3843         return None
3844     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3845         return path
3846     if isinstance(base, bytes):
3847         base = base.decode('utf-8')
3848     if not isinstance(base, compat_str) or not re.match(
3849             r'^(?:https?:)?//', base):
3850         return None
3851     return compat_urlparse.urljoin(base, path)
3852
3853
3854 class HEADRequest(compat_urllib_request.Request):
3855     def get_method(self):
3856         return 'HEAD'
3857
3858
3859 class PUTRequest(compat_urllib_request.Request):
3860     def get_method(self):
3861         return 'PUT'
3862
3863
3864 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3865     if get_attr:
3866         if v is not None:
3867             v = getattr(v, get_attr, None)
3868     if v == '':
3869         v = None
3870     if v is None:
3871         return default
3872     try:
3873         return int(v) * invscale // scale
3874     except (ValueError, TypeError, OverflowError):
3875         return default
3876
3877
3878 def str_or_none(v, default=None):
3879     return default if v is None else compat_str(v)
3880
3881
3882 def str_to_int(int_str):
3883     """ A more relaxed version of int_or_none """
3884     if isinstance(int_str, compat_integer_types):
3885         return int_str
3886     elif isinstance(int_str, compat_str):
3887         int_str = re.sub(r'[,\.\+]', '', int_str)
3888         return int_or_none(int_str)
3889
3890
3891 def float_or_none(v, scale=1, invscale=1, default=None):
3892     if v is None:
3893         return default
3894     try:
3895         return float(v) * invscale / scale
3896     except (ValueError, TypeError):
3897         return default
3898
3899
3900 def bool_or_none(v, default=None):
3901     return v if isinstance(v, bool) else default
3902
3903
3904 def strip_or_none(v, default=None):
3905     return v.strip() if isinstance(v, compat_str) else default
3906
3907
3908 def url_or_none(url):
3909     if not url or not isinstance(url, compat_str):
3910         return None
3911     url = url.strip()
3912     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3913
3914
3915 def strftime_or_none(timestamp, date_format, default=None):
3916     datetime_object = None
3917     try:
3918         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3919             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3920         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3921             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3922         return datetime_object.strftime(date_format)
3923     except (ValueError, TypeError, AttributeError):
3924         return default
3925
3926
3927 def parse_duration(s):
3928     if not isinstance(s, compat_basestring):
3929         return None
3930
3931     s = s.strip()
3932
3933     days, hours, mins, secs, ms = [None] * 5
3934     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3935     if m:
3936         days, hours, mins, secs, ms = m.groups()
3937     else:
3938         m = re.match(
3939             r'''(?ix)(?:P?
3940                 (?:
3941                     [0-9]+\s*y(?:ears?)?\s*
3942                 )?
3943                 (?:
3944                     [0-9]+\s*m(?:onths?)?\s*
3945                 )?
3946                 (?:
3947                     [0-9]+\s*w(?:eeks?)?\s*
3948                 )?
3949                 (?:
3950                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3951                 )?
3952                 T)?
3953                 (?:
3954                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3955                 )?
3956                 (?:
3957                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3958                 )?
3959                 (?:
3960                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3961                 )?Z?$''', s)
3962         if m:
3963             days, hours, mins, secs, ms = m.groups()
3964         else:
3965             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3966             if m:
3967                 hours, mins = m.groups()
3968             else:
3969                 return None
3970
3971     duration = 0
3972     if secs:
3973         duration += float(secs)
3974     if mins:
3975         duration += float(mins) * 60
3976     if hours:
3977         duration += float(hours) * 60 * 60
3978     if days:
3979         duration += float(days) * 24 * 60 * 60
3980     if ms:
3981         duration += float(ms)
3982     return duration
3983
3984
3985 def prepend_extension(filename, ext, expected_real_ext=None):
3986     name, real_ext = os.path.splitext(filename)
3987     return (
3988         '{0}.{1}{2}'.format(name, ext, real_ext)
3989         if not expected_real_ext or real_ext[1:] == expected_real_ext
3990         else '{0}.{1}'.format(filename, ext))
3991
3992
3993 def replace_extension(filename, ext, expected_real_ext=None):
3994     name, real_ext = os.path.splitext(filename)
3995     return '{0}.{1}'.format(
3996         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3997         ext)
3998
3999
4000 def check_executable(exe, args=[]):
4001     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4002     args can be a list of arguments for a short output (like -version) """
4003     try:
4004         Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
4005     except OSError:
4006         return False
4007     return exe
4008
4009
4010 def get_exe_version(exe, args=['--version'],
4011                     version_re=None, unrecognized='present'):
4012     """ Returns the version of the specified executable,
4013     or False if the executable is not present """
4014     try:
4015         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4016         # SIGTTOU if yt-dlp is run in the background.
4017         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4018         out, _ = Popen(
4019             [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4020             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4021     except OSError:
4022         return False
4023     if isinstance(out, bytes):  # Python 2.x
4024         out = out.decode('ascii', 'ignore')
4025     return detect_exe_version(out, version_re, unrecognized)
4026
4027
4028 def detect_exe_version(output, version_re=None, unrecognized='present'):
4029     assert isinstance(output, compat_str)
4030     if version_re is None:
4031         version_re = r'version\s+([-0-9._a-zA-Z]+)'
4032     m = re.search(version_re, output)
4033     if m:
4034         return m.group(1)
4035     else:
4036         return unrecognized
4037
4038
4039 class LazyList(collections.abc.Sequence):
4040     ''' Lazy immutable list from an iterable
4041     Note that slices of a LazyList are lists and not LazyList'''
4042
4043     class IndexError(IndexError):
4044         pass
4045
4046     def __init__(self, iterable):
4047         self.__iterable = iter(iterable)
4048         self.__cache = []
4049         self.__reversed = False
4050
4051     def __iter__(self):
4052         if self.__reversed:
4053             # We need to consume the entire iterable to iterate in reverse
4054             yield from self.exhaust()
4055             return
4056         yield from self.__cache
4057         for item in self.__iterable:
4058             self.__cache.append(item)
4059             yield item
4060
4061     def __exhaust(self):
4062         self.__cache.extend(self.__iterable)
4063         # Discard the emptied iterable to make it pickle-able
4064         self.__iterable = []
4065         return self.__cache
4066
4067     def exhaust(self):
4068         ''' Evaluate the entire iterable '''
4069         return self.__exhaust()[::-1 if self.__reversed else 1]
4070
4071     @staticmethod
4072     def __reverse_index(x):
4073         return None if x is None else -(x + 1)
4074
4075     def __getitem__(self, idx):
4076         if isinstance(idx, slice):
4077             if self.__reversed:
4078                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4079             start, stop, step = idx.start, idx.stop, idx.step or 1
4080         elif isinstance(idx, int):
4081             if self.__reversed:
4082                 idx = self.__reverse_index(idx)
4083             start, stop, step = idx, idx, 0
4084         else:
4085             raise TypeError('indices must be integers or slices')
4086         if ((start or 0) < 0 or (stop or 0) < 0
4087                 or (start is None and step < 0)
4088                 or (stop is None and step > 0)):
4089             # We need to consume the entire iterable to be able to slice from the end
4090             # Obviously, never use this with infinite iterables
4091             self.__exhaust()
4092             try:
4093                 return self.__cache[idx]
4094             except IndexError as e:
4095                 raise self.IndexError(e) from e
4096         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4097         if n > 0:
4098             self.__cache.extend(itertools.islice(self.__iterable, n))
4099         try:
4100             return self.__cache[idx]
4101         except IndexError as e:
4102             raise self.IndexError(e) from e
4103
4104     def __bool__(self):
4105         try:
4106             self[-1] if self.__reversed else self[0]
4107         except self.IndexError:
4108             return False
4109         return True
4110
4111     def __len__(self):
4112         self.__exhaust()
4113         return len(self.__cache)
4114
4115     def reverse(self):
4116         self.__reversed = not self.__reversed
4117         return self
4118
4119     def __repr__(self):
4120         # repr and str should mimic a list. So we exhaust the iterable
4121         return repr(self.exhaust())
4122
4123     def __str__(self):
4124         return repr(self.exhaust())
4125
4126
4127 class PagedList:
4128     def __len__(self):
4129         # This is only useful for tests
4130         return len(self.getslice())
4131
4132     def __init__(self, pagefunc, pagesize, use_cache=True):
4133         self._pagefunc = pagefunc
4134         self._pagesize = pagesize
4135         self._use_cache = use_cache
4136         self._cache = {}
4137
4138     def getpage(self, pagenum):
4139         page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4140         if self._use_cache:
4141             self._cache[pagenum] = page_results
4142         return page_results
4143
4144     def getslice(self, start=0, end=None):
4145         return list(self._getslice(start, end))
4146
4147     def _getslice(self, start, end):
4148         raise NotImplementedError('This method must be implemented by subclasses')
4149
4150     def __getitem__(self, idx):
4151         # NOTE: cache must be enabled if this is used
4152         if not isinstance(idx, int) or idx < 0:
4153             raise TypeError('indices must be non-negative integers')
4154         entries = self.getslice(idx, idx + 1)
4155         return entries[0] if entries else None
4156
4157
4158 class OnDemandPagedList(PagedList):
4159     def _getslice(self, start, end):
4160         for pagenum in itertools.count(start // self._pagesize):
4161             firstid = pagenum * self._pagesize
4162             nextfirstid = pagenum * self._pagesize + self._pagesize
4163             if start >= nextfirstid:
4164                 continue
4165
4166             startv = (
4167                 start % self._pagesize
4168                 if firstid <= start < nextfirstid
4169                 else 0)
4170             endv = (
4171                 ((end - 1) % self._pagesize) + 1
4172                 if (end is not None and firstid <= end <= nextfirstid)
4173                 else None)
4174
4175             page_results = self.getpage(pagenum)
4176             if startv != 0 or endv is not None:
4177                 page_results = page_results[startv:endv]
4178             yield from page_results
4179
4180             # A little optimization - if current page is not "full", ie. does
4181             # not contain page_size videos then we can assume that this page
4182             # is the last one - there are no more ids on further pages -
4183             # i.e. no need to query again.
4184             if len(page_results) + startv < self._pagesize:
4185                 break
4186
4187             # If we got the whole page, but the next page is not interesting,
4188             # break out early as well
4189             if end == nextfirstid:
4190                 break
4191
4192
4193 class InAdvancePagedList(PagedList):
4194     def __init__(self, pagefunc, pagecount, pagesize):
4195         self._pagecount = pagecount
4196         PagedList.__init__(self, pagefunc, pagesize, True)
4197
4198     def _getslice(self, start, end):
4199         start_page = start // self._pagesize
4200         end_page = (
4201             self._pagecount if end is None else (end // self._pagesize + 1))
4202         skip_elems = start - start_page * self._pagesize
4203         only_more = None if end is None else end - start
4204         for pagenum in range(start_page, end_page):
4205             page_results = self.getpage(pagenum)
4206             if skip_elems:
4207                 page_results = page_results[skip_elems:]
4208                 skip_elems = None
4209             if only_more is not None:
4210                 if len(page_results) < only_more:
4211                     only_more -= len(page_results)
4212                 else:
4213                     yield from page_results[:only_more]
4214                     break
4215             yield from page_results
4216
4217
4218 def uppercase_escape(s):
4219     unicode_escape = codecs.getdecoder('unicode_escape')
4220     return re.sub(
4221         r'\\U[0-9a-fA-F]{8}',
4222         lambda m: unicode_escape(m.group(0))[0],
4223         s)
4224
4225
4226 def lowercase_escape(s):
4227     unicode_escape = codecs.getdecoder('unicode_escape')
4228     return re.sub(
4229         r'\\u[0-9a-fA-F]{4}',
4230         lambda m: unicode_escape(m.group(0))[0],
4231         s)
4232
4233
4234 def escape_rfc3986(s):
4235     """Escape non-ASCII characters as suggested by RFC 3986"""
4236     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4237         s = s.encode('utf-8')
4238     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4239
4240
4241 def escape_url(url):
4242     """Escape URL as suggested by RFC 3986"""
4243     url_parsed = compat_urllib_parse_urlparse(url)
4244     return url_parsed._replace(
4245         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4246         path=escape_rfc3986(url_parsed.path),
4247         params=escape_rfc3986(url_parsed.params),
4248         query=escape_rfc3986(url_parsed.query),
4249         fragment=escape_rfc3986(url_parsed.fragment)
4250     ).geturl()
4251
4252
4253 def parse_qs(url):
4254     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4255
4256
4257 def read_batch_urls(batch_fd):
4258     def fixup(url):
4259         if not isinstance(url, compat_str):
4260             url = url.decode('utf-8', 'replace')
4261         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4262         for bom in BOM_UTF8:
4263             if url.startswith(bom):
4264                 url = url[len(bom):]
4265         url = url.lstrip()
4266         if not url or url.startswith(('#', ';', ']')):
4267             return False
4268         # "#" cannot be stripped out since it is part of the URI
4269         # However, it can be safely stipped out if follwing a whitespace
4270         return re.split(r'\s#', url, 1)[0].rstrip()
4271
4272     with contextlib.closing(batch_fd) as fd:
4273         return [url for url in map(fixup, fd) if url]
4274
4275
4276 def urlencode_postdata(*args, **kargs):
4277     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4278
4279
4280 def update_url_query(url, query):
4281     if not query:
4282         return url
4283     parsed_url = compat_urlparse.urlparse(url)
4284     qs = compat_parse_qs(parsed_url.query)
4285     qs.update(query)
4286     return compat_urlparse.urlunparse(parsed_url._replace(
4287         query=compat_urllib_parse_urlencode(qs, True)))
4288
4289
4290 def update_Request(req, url=None, data=None, headers={}, query={}):
4291     req_headers = req.headers.copy()
4292     req_headers.update(headers)
4293     req_data = data or req.data
4294     req_url = update_url_query(url or req.get_full_url(), query)
4295     req_get_method = req.get_method()
4296     if req_get_method == 'HEAD':
4297         req_type = HEADRequest
4298     elif req_get_method == 'PUT':
4299         req_type = PUTRequest
4300     else:
4301         req_type = compat_urllib_request.Request
4302     new_req = req_type(
4303         req_url, data=req_data, headers=req_headers,
4304         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4305     if hasattr(req, 'timeout'):
4306         new_req.timeout = req.timeout
4307     return new_req
4308
4309
4310 def _multipart_encode_impl(data, boundary):
4311     content_type = 'multipart/form-data; boundary=%s' % boundary
4312
4313     out = b''
4314     for k, v in data.items():
4315         out += b'--' + boundary.encode('ascii') + b'\r\n'
4316         if isinstance(k, compat_str):
4317             k = k.encode('utf-8')
4318         if isinstance(v, compat_str):
4319             v = v.encode('utf-8')
4320         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4321         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4322         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4323         if boundary.encode('ascii') in content:
4324             raise ValueError('Boundary overlaps with data')
4325         out += content
4326
4327     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4328
4329     return out, content_type
4330
4331
4332 def multipart_encode(data, boundary=None):
4333     '''
4334     Encode a dict to RFC 7578-compliant form-data
4335
4336     data:
4337         A dict where keys and values can be either Unicode or bytes-like
4338         objects.
4339     boundary:
4340         If specified a Unicode object, it's used as the boundary. Otherwise
4341         a random boundary is generated.
4342
4343     Reference: https://tools.ietf.org/html/rfc7578
4344     '''
4345     has_specified_boundary = boundary is not None
4346
4347     while True:
4348         if boundary is None:
4349             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4350
4351         try:
4352             out, content_type = _multipart_encode_impl(data, boundary)
4353             break
4354         except ValueError:
4355             if has_specified_boundary:
4356                 raise
4357             boundary = None
4358
4359     return out, content_type
4360
4361
4362 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4363     if isinstance(key_or_keys, (list, tuple)):
4364         for key in key_or_keys:
4365             if key not in d or d[key] is None or skip_false_values and not d[key]:
4366                 continue
4367             return d[key]
4368         return default
4369     return d.get(key_or_keys, default)
4370
4371
4372 def try_get(src, getter, expected_type=None):
4373     for get in variadic(getter):
4374         try:
4375             v = get(src)
4376         except (AttributeError, KeyError, TypeError, IndexError):
4377             pass
4378         else:
4379             if expected_type is None or isinstance(v, expected_type):
4380                 return v
4381
4382
4383 def merge_dicts(*dicts):
4384     merged = {}
4385     for a_dict in dicts:
4386         for k, v in a_dict.items():
4387             if v is None:
4388                 continue
4389             if (k not in merged
4390                     or (isinstance(v, compat_str) and v
4391                         and isinstance(merged[k], compat_str)
4392                         and not merged[k])):
4393                 merged[k] = v
4394     return merged
4395
4396
4397 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4398     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4399
4400
4401 US_RATINGS = {
4402     'G': 0,
4403     'PG': 10,
4404     'PG-13': 13,
4405     'R': 16,
4406     'NC': 18,
4407 }
4408
4409
4410 TV_PARENTAL_GUIDELINES = {
4411     'TV-Y': 0,
4412     'TV-Y7': 7,
4413     'TV-G': 0,
4414     'TV-PG': 0,
4415     'TV-14': 14,
4416     'TV-MA': 17,
4417 }
4418
4419
4420 def parse_age_limit(s):
4421     if type(s) == int:
4422         return s if 0 <= s <= 21 else None
4423     if not isinstance(s, compat_basestring):
4424         return None
4425     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4426     if m:
4427         return int(m.group('age'))
4428     s = s.upper()
4429     if s in US_RATINGS:
4430         return US_RATINGS[s]
4431     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4432     if m:
4433         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4434     return None
4435
4436
4437 def strip_jsonp(code):
4438     return re.sub(
4439         r'''(?sx)^
4440             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4441             (?:\s*&&\s*(?P=func_name))?
4442             \s*\(\s*(?P<callback_data>.*)\);?
4443             \s*?(?://[^\n]*)*$''',
4444         r'\g<callback_data>', code)
4445
4446
4447 def js_to_json(code, vars={}):
4448     # vars is a dict of var, val pairs to substitute
4449     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4450     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4451     INTEGER_TABLE = (
4452         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4453         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4454     )
4455
4456     def fix_kv(m):
4457         v = m.group(0)
4458         if v in ('true', 'false', 'null'):
4459             return v
4460         elif v in ('undefined', 'void 0'):
4461             return 'null'
4462         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4463             return ""
4464
4465         if v[0] in ("'", '"'):
4466             v = re.sub(r'(?s)\\.|"', lambda m: {
4467                 '"': '\\"',
4468                 "\\'": "'",
4469                 '\\\n': '',
4470                 '\\x': '\\u00',
4471             }.get(m.group(0), m.group(0)), v[1:-1])
4472         else:
4473             for regex, base in INTEGER_TABLE:
4474                 im = re.match(regex, v)
4475                 if im:
4476                     i = int(im.group(1), base)
4477                     return '"%d":' % i if v.endswith(':') else '%d' % i
4478
4479             if v in vars:
4480                 return vars[v]
4481
4482         return '"%s"' % v
4483
4484     return re.sub(r'''(?sx)
4485         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4486         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4487         {comment}|,(?={skip}[\]}}])|
4488         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4489         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4490         [0-9]+(?={skip}:)|
4491         !+
4492         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4493
4494
4495 def qualities(quality_ids):
4496     """ Get a numeric quality value out of a list of possible values """
4497     def q(qid):
4498         try:
4499             return quality_ids.index(qid)
4500         except ValueError:
4501             return -1
4502     return q
4503
4504
4505 DEFAULT_OUTTMPL = {
4506     'default': '%(title)s [%(id)s].%(ext)s',
4507     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4508 }
4509 OUTTMPL_TYPES = {
4510     'chapter': None,
4511     'subtitle': None,
4512     'thumbnail': None,
4513     'description': 'description',
4514     'annotation': 'annotations.xml',
4515     'infojson': 'info.json',
4516     'link': None,
4517     'pl_thumbnail': None,
4518     'pl_description': 'description',
4519     'pl_infojson': 'info.json',
4520 }
4521
4522 # As of [1] format syntax is:
4523 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4524 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4525 STR_FORMAT_RE_TMPL = r'''(?x)
4526     (?<!%)(?P<prefix>(?:%%)*)
4527     %
4528     (?P<has_key>\((?P<key>{0})\))?
4529     (?P<format>
4530         (?P<conversion>[#0\-+ ]+)?
4531         (?P<min_width>\d+)?
4532         (?P<precision>\.\d+)?
4533         (?P<len_mod>[hlL])?  # unused in python
4534         {1}  # conversion type
4535     )
4536 '''
4537
4538
4539 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4540
4541
4542 def limit_length(s, length):
4543     """ Add ellipses to overly long strings """
4544     if s is None:
4545         return None
4546     ELLIPSES = '...'
4547     if len(s) > length:
4548         return s[:length - len(ELLIPSES)] + ELLIPSES
4549     return s
4550
4551
4552 def version_tuple(v):
4553     return tuple(int(e) for e in re.split(r'[-.]', v))
4554
4555
4556 def is_outdated_version(version, limit, assume_new=True):
4557     if not version:
4558         return not assume_new
4559     try:
4560         return version_tuple(version) < version_tuple(limit)
4561     except ValueError:
4562         return not assume_new
4563
4564
4565 def ytdl_is_updateable():
4566     """ Returns if yt-dlp can be updated with -U """
4567
4568     from .update import is_non_updateable
4569
4570     return not is_non_updateable()
4571
4572
4573 def args_to_str(args):
4574     # Get a short string representation for a subprocess command
4575     return ' '.join(compat_shlex_quote(a) for a in args)
4576
4577
4578 def error_to_compat_str(err):
4579     err_str = str(err)
4580     # On python 2 error byte string must be decoded with proper
4581     # encoding rather than ascii
4582     if sys.version_info[0] < 3:
4583         err_str = err_str.decode(preferredencoding())
4584     return err_str
4585
4586
4587 def mimetype2ext(mt):
4588     if mt is None:
4589         return None
4590
4591     mt, _, params = mt.partition(';')
4592     mt = mt.strip()
4593
4594     FULL_MAP = {
4595         'audio/mp4': 'm4a',
4596         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4597         # it's the most popular one
4598         'audio/mpeg': 'mp3',
4599         'audio/x-wav': 'wav',
4600         'audio/wav': 'wav',
4601         'audio/wave': 'wav',
4602     }
4603
4604     ext = FULL_MAP.get(mt)
4605     if ext is not None:
4606         return ext
4607
4608     SUBTYPE_MAP = {
4609         '3gpp': '3gp',
4610         'smptett+xml': 'tt',
4611         'ttaf+xml': 'dfxp',
4612         'ttml+xml': 'ttml',
4613         'x-flv': 'flv',
4614         'x-mp4-fragmented': 'mp4',
4615         'x-ms-sami': 'sami',
4616         'x-ms-wmv': 'wmv',
4617         'mpegurl': 'm3u8',
4618         'x-mpegurl': 'm3u8',
4619         'vnd.apple.mpegurl': 'm3u8',
4620         'dash+xml': 'mpd',
4621         'f4m+xml': 'f4m',
4622         'hds+xml': 'f4m',
4623         'vnd.ms-sstr+xml': 'ism',
4624         'quicktime': 'mov',
4625         'mp2t': 'ts',
4626         'x-wav': 'wav',
4627         'filmstrip+json': 'fs',
4628         'svg+xml': 'svg',
4629     }
4630
4631     _, _, subtype = mt.rpartition('/')
4632     ext = SUBTYPE_MAP.get(subtype.lower())
4633     if ext is not None:
4634         return ext
4635
4636     SUFFIX_MAP = {
4637         'json': 'json',
4638         'xml': 'xml',
4639         'zip': 'zip',
4640         'gzip': 'gz',
4641     }
4642
4643     _, _, suffix = subtype.partition('+')
4644     ext = SUFFIX_MAP.get(suffix)
4645     if ext is not None:
4646         return ext
4647
4648     return subtype.replace('+', '.')
4649
4650
4651 def parse_codecs(codecs_str):
4652     # http://tools.ietf.org/html/rfc6381
4653     if not codecs_str:
4654         return {}
4655     split_codecs = list(filter(None, map(
4656         str.strip, codecs_str.strip().strip(',').split(','))))
4657     vcodec, acodec, hdr = None, None, None
4658     for full_codec in split_codecs:
4659         parts = full_codec.split('.')
4660         codec = parts[0].replace('0', '')
4661         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4662                      'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4663             if not vcodec:
4664                 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
4665                 if codec in ('dvh1', 'dvhe'):
4666                     hdr = 'DV'
4667                 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4668                     hdr = 'HDR10'
4669                 elif full_codec.replace('0', '').startswith('vp9.2'):
4670                     hdr = 'HDR10'
4671         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4672             if not acodec:
4673                 acodec = full_codec
4674         else:
4675             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4676     if not vcodec and not acodec:
4677         if len(split_codecs) == 2:
4678             return {
4679                 'vcodec': split_codecs[0],
4680                 'acodec': split_codecs[1],
4681             }
4682     else:
4683         return {
4684             'vcodec': vcodec or 'none',
4685             'acodec': acodec or 'none',
4686             'dynamic_range': hdr,
4687         }
4688     return {}
4689
4690
4691 def urlhandle_detect_ext(url_handle):
4692     getheader = url_handle.headers.get
4693
4694     cd = getheader('Content-Disposition')
4695     if cd:
4696         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4697         if m:
4698             e = determine_ext(m.group('filename'), default_ext=None)
4699             if e:
4700                 return e
4701
4702     return mimetype2ext(getheader('Content-Type'))
4703
4704
4705 def encode_data_uri(data, mime_type):
4706     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4707
4708
4709 def age_restricted(content_limit, age_limit):
4710     """ Returns True iff the content should be blocked """
4711
4712     if age_limit is None:  # No limit set
4713         return False
4714     if content_limit is None:
4715         return False  # Content available for everyone
4716     return age_limit < content_limit
4717
4718
4719 def is_html(first_bytes):
4720     """ Detect whether a file contains HTML by examining its first bytes. """
4721
4722     BOMS = [
4723         (b'\xef\xbb\xbf', 'utf-8'),
4724         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4725         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4726         (b'\xff\xfe', 'utf-16-le'),
4727         (b'\xfe\xff', 'utf-16-be'),
4728     ]
4729     for bom, enc in BOMS:
4730         if first_bytes.startswith(bom):
4731             s = first_bytes[len(bom):].decode(enc, 'replace')
4732             break
4733     else:
4734         s = first_bytes.decode('utf-8', 'replace')
4735
4736     return re.match(r'^\s*<', s)
4737
4738
4739 def determine_protocol(info_dict):
4740     protocol = info_dict.get('protocol')
4741     if protocol is not None:
4742         return protocol
4743
4744     url = sanitize_url(info_dict['url'])
4745     if url.startswith('rtmp'):
4746         return 'rtmp'
4747     elif url.startswith('mms'):
4748         return 'mms'
4749     elif url.startswith('rtsp'):
4750         return 'rtsp'
4751
4752     ext = determine_ext(url)
4753     if ext == 'm3u8':
4754         return 'm3u8'
4755     elif ext == 'f4m':
4756         return 'f4m'
4757
4758     return compat_urllib_parse_urlparse(url).scheme
4759
4760
4761 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4762     """ Render a list of rows, each as a list of values """
4763     def width(string):
4764         return len(remove_terminal_sequences(string))
4765
4766     def get_max_lens(table):
4767         return [max(width(str(v)) for v in col) for col in zip(*table)]
4768
4769     def filter_using_list(row, filterArray):
4770         return [col for (take, col) in zip(filterArray, row) if take]
4771
4772     if hideEmpty:
4773         max_lens = get_max_lens(data)
4774         header_row = filter_using_list(header_row, max_lens)
4775         data = [filter_using_list(row, max_lens) for row in data]
4776
4777     table = [header_row] + data
4778     max_lens = get_max_lens(table)
4779     extraGap += 1
4780     if delim:
4781         table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4782     max_lens[-1] = 0
4783     for row in table:
4784         for pos, text in enumerate(map(str, row)):
4785             row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4786     ret = '\n'.join(''.join(row) for row in table)
4787     return ret
4788
4789
4790 def _match_one(filter_part, dct, incomplete):
4791     # TODO: Generalize code with YoutubeDL._build_format_filter
4792     STRING_OPERATORS = {
4793         '*=': operator.contains,
4794         '^=': lambda attr, value: attr.startswith(value),
4795         '$=': lambda attr, value: attr.endswith(value),
4796         '~=': lambda attr, value: re.search(value, attr),
4797     }
4798     COMPARISON_OPERATORS = {
4799         **STRING_OPERATORS,
4800         '<=': operator.le,  # "<=" must be defined above "<"
4801         '<': operator.lt,
4802         '>=': operator.ge,
4803         '>': operator.gt,
4804         '=': operator.eq,
4805     }
4806
4807     operator_rex = re.compile(r'''(?x)\s*
4808         (?P<key>[a-z_]+)
4809         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4810         (?:
4811             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4812             (?P<strval>.+?)
4813         )
4814         \s*$
4815         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4816     m = operator_rex.search(filter_part)
4817     if m:
4818         m = m.groupdict()
4819         unnegated_op = COMPARISON_OPERATORS[m['op']]
4820         if m['negation']:
4821             op = lambda attr, value: not unnegated_op(attr, value)
4822         else:
4823             op = unnegated_op
4824         comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4825         if m['quote']:
4826             comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4827         actual_value = dct.get(m['key'])
4828         numeric_comparison = None
4829         if isinstance(actual_value, compat_numeric_types):
4830             # If the original field is a string and matching comparisonvalue is
4831             # a number we should respect the origin of the original field
4832             # and process comparison value as a string (see
4833             # https://github.com/ytdl-org/youtube-dl/issues/11082)
4834             try:
4835                 numeric_comparison = int(comparison_value)
4836             except ValueError:
4837                 numeric_comparison = parse_filesize(comparison_value)
4838                 if numeric_comparison is None:
4839                     numeric_comparison = parse_filesize(f'{comparison_value}B')
4840                 if numeric_comparison is None:
4841                     numeric_comparison = parse_duration(comparison_value)
4842         if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4843             raise ValueError('Operator %s only supports string values!' % m['op'])
4844         if actual_value is None:
4845             return incomplete or m['none_inclusive']
4846         return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4847
4848     UNARY_OPERATORS = {
4849         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4850         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4851     }
4852     operator_rex = re.compile(r'''(?x)\s*
4853         (?P<op>%s)\s*(?P<key>[a-z_]+)
4854         \s*$
4855         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4856     m = operator_rex.search(filter_part)
4857     if m:
4858         op = UNARY_OPERATORS[m.group('op')]
4859         actual_value = dct.get(m.group('key'))
4860         if incomplete and actual_value is None:
4861             return True
4862         return op(actual_value)
4863
4864     raise ValueError('Invalid filter part %r' % filter_part)
4865
4866
4867 def match_str(filter_str, dct, incomplete=False):
4868     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4869         When incomplete, all conditions passes on missing fields
4870     """
4871     return all(
4872         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4873         for filter_part in re.split(r'(?<!\\)&', filter_str))
4874
4875
4876 def match_filter_func(filter_str):
4877     def _match_func(info_dict, *args, **kwargs):
4878         if match_str(filter_str, info_dict, *args, **kwargs):
4879             return None
4880         else:
4881             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4882             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4883     return _match_func
4884
4885
4886 def parse_dfxp_time_expr(time_expr):
4887     if not time_expr:
4888         return
4889
4890     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4891     if mobj:
4892         return float(mobj.group('time_offset'))
4893
4894     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4895     if mobj:
4896         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4897
4898
4899 def srt_subtitles_timecode(seconds):
4900     return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4901
4902
4903 def ass_subtitles_timecode(seconds):
4904     time = timetuple_from_msec(seconds * 1000)
4905     return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4906
4907
4908 def dfxp2srt(dfxp_data):
4909     '''
4910     @param dfxp_data A bytes-like object containing DFXP data
4911     @returns A unicode object containing converted SRT data
4912     '''
4913     LEGACY_NAMESPACES = (
4914         (b'http://www.w3.org/ns/ttml', [
4915             b'http://www.w3.org/2004/11/ttaf1',
4916             b'http://www.w3.org/2006/04/ttaf1',
4917             b'http://www.w3.org/2006/10/ttaf1',
4918         ]),
4919         (b'http://www.w3.org/ns/ttml#styling', [
4920             b'http://www.w3.org/ns/ttml#style',
4921         ]),
4922     )
4923
4924     SUPPORTED_STYLING = [
4925         'color',
4926         'fontFamily',
4927         'fontSize',
4928         'fontStyle',
4929         'fontWeight',
4930         'textDecoration'
4931     ]
4932
4933     _x = functools.partial(xpath_with_ns, ns_map={
4934         'xml': 'http://www.w3.org/XML/1998/namespace',
4935         'ttml': 'http://www.w3.org/ns/ttml',
4936         'tts': 'http://www.w3.org/ns/ttml#styling',
4937     })
4938
4939     styles = {}
4940     default_style = {}
4941
4942     class TTMLPElementParser(object):
4943         _out = ''
4944         _unclosed_elements = []
4945         _applied_styles = []
4946
4947         def start(self, tag, attrib):
4948             if tag in (_x('ttml:br'), 'br'):
4949                 self._out += '\n'
4950             else:
4951                 unclosed_elements = []
4952                 style = {}
4953                 element_style_id = attrib.get('style')
4954                 if default_style:
4955                     style.update(default_style)
4956                 if element_style_id:
4957                     style.update(styles.get(element_style_id, {}))
4958                 for prop in SUPPORTED_STYLING:
4959                     prop_val = attrib.get(_x('tts:' + prop))
4960                     if prop_val:
4961                         style[prop] = prop_val
4962                 if style:
4963                     font = ''
4964                     for k, v in sorted(style.items()):
4965                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4966                             continue
4967                         if k == 'color':
4968                             font += ' color="%s"' % v
4969                         elif k == 'fontSize':
4970                             font += ' size="%s"' % v
4971                         elif k == 'fontFamily':
4972                             font += ' face="%s"' % v
4973                         elif k == 'fontWeight' and v == 'bold':
4974                             self._out += '<b>'
4975                             unclosed_elements.append('b')
4976                         elif k == 'fontStyle' and v == 'italic':
4977                             self._out += '<i>'
4978                             unclosed_elements.append('i')
4979                         elif k == 'textDecoration' and v == 'underline':
4980                             self._out += '<u>'
4981                             unclosed_elements.append('u')
4982                     if font:
4983                         self._out += '<font' + font + '>'
4984                         unclosed_elements.append('font')
4985                     applied_style = {}
4986                     if self._applied_styles:
4987                         applied_style.update(self._applied_styles[-1])
4988                     applied_style.update(style)
4989                     self._applied_styles.append(applied_style)
4990                 self._unclosed_elements.append(unclosed_elements)
4991
4992         def end(self, tag):
4993             if tag not in (_x('ttml:br'), 'br'):
4994                 unclosed_elements = self._unclosed_elements.pop()
4995                 for element in reversed(unclosed_elements):
4996                     self._out += '</%s>' % element
4997                 if unclosed_elements and self._applied_styles:
4998                     self._applied_styles.pop()
4999
5000         def data(self, data):
5001             self._out += data
5002
5003         def close(self):
5004             return self._out.strip()
5005
5006     def parse_node(node):
5007         target = TTMLPElementParser()
5008         parser = xml.etree.ElementTree.XMLParser(target=target)
5009         parser.feed(xml.etree.ElementTree.tostring(node))
5010         return parser.close()
5011
5012     for k, v in LEGACY_NAMESPACES:
5013         for ns in v:
5014             dfxp_data = dfxp_data.replace(ns, k)
5015
5016     dfxp = compat_etree_fromstring(dfxp_data)
5017     out = []
5018     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5019
5020     if not paras:
5021         raise ValueError('Invalid dfxp/TTML subtitle')
5022
5023     repeat = False
5024     while True:
5025         for style in dfxp.findall(_x('.//ttml:style')):
5026             style_id = style.get('id') or style.get(_x('xml:id'))
5027             if not style_id:
5028                 continue
5029             parent_style_id = style.get('style')
5030             if parent_style_id:
5031                 if parent_style_id not in styles:
5032                     repeat = True
5033                     continue
5034                 styles[style_id] = styles[parent_style_id].copy()
5035             for prop in SUPPORTED_STYLING:
5036                 prop_val = style.get(_x('tts:' + prop))
5037                 if prop_val:
5038                     styles.setdefault(style_id, {})[prop] = prop_val
5039         if repeat:
5040             repeat = False
5041         else:
5042             break
5043
5044     for p in ('body', 'div'):
5045         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5046         if ele is None:
5047             continue
5048         style = styles.get(ele.get('style'))
5049         if not style:
5050             continue
5051         default_style.update(style)
5052
5053     for para, index in zip(paras, itertools.count(1)):
5054         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5055         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5056         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5057         if begin_time is None:
5058             continue
5059         if not end_time:
5060             if not dur:
5061                 continue
5062             end_time = begin_time + dur
5063         out.append('%d\n%s --> %s\n%s\n\n' % (
5064             index,
5065             srt_subtitles_timecode(begin_time),
5066             srt_subtitles_timecode(end_time),
5067             parse_node(para)))
5068
5069     return ''.join(out)
5070
5071
5072 def cli_option(params, command_option, param):
5073     param = params.get(param)
5074     if param:
5075         param = compat_str(param)
5076     return [command_option, param] if param is not None else []
5077
5078
5079 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5080     param = params.get(param)
5081     if param is None:
5082         return []
5083     assert isinstance(param, bool)
5084     if separator:
5085         return [command_option + separator + (true_value if param else false_value)]
5086     return [command_option, true_value if param else false_value]
5087
5088
5089 def cli_valueless_option(params, command_option, param, expected_value=True):
5090     param = params.get(param)
5091     return [command_option] if param == expected_value else []
5092
5093
5094 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5095     if isinstance(argdict, (list, tuple)):  # for backward compatibility
5096         if use_compat:
5097             return argdict
5098         else:
5099             argdict = None
5100     if argdict is None:
5101         return default
5102     assert isinstance(argdict, dict)
5103
5104     assert isinstance(keys, (list, tuple))
5105     for key_list in keys:
5106         arg_list = list(filter(
5107             lambda x: x is not None,
5108             [argdict.get(key.lower()) for key in variadic(key_list)]))
5109         if arg_list:
5110             return [arg for args in arg_list for arg in args]
5111     return default
5112
5113
5114 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5115     main_key, exe = main_key.lower(), exe.lower()
5116     root_key = exe if main_key == exe else f'{main_key}+{exe}'
5117     keys = [f'{root_key}{k}' for k in (keys or [''])]
5118     if root_key in keys:
5119         if main_key != exe:
5120             keys.append((main_key, exe))
5121         keys.append('default')
5122     else:
5123         use_compat = False
5124     return cli_configuration_args(argdict, keys, default, use_compat)
5125
5126
5127 class ISO639Utils(object):
5128     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5129     _lang_map = {
5130         'aa': 'aar',
5131         'ab': 'abk',
5132         'ae': 'ave',
5133         'af': 'afr',
5134         'ak': 'aka',
5135         'am': 'amh',
5136         'an': 'arg',
5137         'ar': 'ara',
5138         'as': 'asm',
5139         'av': 'ava',
5140         'ay': 'aym',
5141         'az': 'aze',
5142         'ba': 'bak',
5143         'be': 'bel',
5144         'bg': 'bul',
5145         'bh': 'bih',
5146         'bi': 'bis',
5147         'bm': 'bam',
5148         'bn': 'ben',
5149         'bo': 'bod',
5150         'br': 'bre',
5151         'bs': 'bos',
5152         'ca': 'cat',
5153         'ce': 'che',
5154         'ch': 'cha',
5155         'co': 'cos',
5156         'cr': 'cre',
5157         'cs': 'ces',
5158         'cu': 'chu',
5159         'cv': 'chv',
5160         'cy': 'cym',
5161         'da': 'dan',
5162         'de': 'deu',
5163         'dv': 'div',
5164         'dz': 'dzo',
5165         'ee': 'ewe',
5166         'el': 'ell',
5167         'en': 'eng',
5168         'eo': 'epo',
5169         'es': 'spa',
5170         'et': 'est',
5171         'eu': 'eus',
5172         'fa': 'fas',
5173         'ff': 'ful',
5174         'fi': 'fin',
5175         'fj': 'fij',
5176         'fo': 'fao',
5177         'fr': 'fra',
5178         'fy': 'fry',
5179         'ga': 'gle',
5180         'gd': 'gla',
5181         'gl': 'glg',
5182         'gn': 'grn',
5183         'gu': 'guj',
5184         'gv': 'glv',
5185         'ha': 'hau',
5186         'he': 'heb',
5187         'iw': 'heb',  # Replaced by he in 1989 revision
5188         'hi': 'hin',
5189         'ho': 'hmo',
5190         'hr': 'hrv',
5191         'ht': 'hat',
5192         'hu': 'hun',
5193         'hy': 'hye',
5194         'hz': 'her',
5195         'ia': 'ina',
5196         'id': 'ind',
5197         'in': 'ind',  # Replaced by id in 1989 revision
5198         'ie': 'ile',
5199         'ig': 'ibo',
5200         'ii': 'iii',
5201         'ik': 'ipk',
5202         'io': 'ido',
5203         'is': 'isl',
5204         'it': 'ita',
5205         'iu': 'iku',
5206         'ja': 'jpn',
5207         'jv': 'jav',
5208         'ka': 'kat',
5209         'kg': 'kon',
5210         'ki': 'kik',
5211         'kj': 'kua',
5212         'kk': 'kaz',
5213         'kl': 'kal',
5214         'km': 'khm',
5215         'kn': 'kan',
5216         'ko': 'kor',
5217         'kr': 'kau',
5218         'ks': 'kas',
5219         'ku': 'kur',
5220         'kv': 'kom',
5221         'kw': 'cor',
5222         'ky': 'kir',
5223         'la': 'lat',
5224         'lb': 'ltz',
5225         'lg': 'lug',
5226         'li': 'lim',
5227         'ln': 'lin',
5228         'lo': 'lao',
5229         'lt': 'lit',
5230         'lu': 'lub',
5231         'lv': 'lav',
5232         'mg': 'mlg',
5233         'mh': 'mah',
5234         'mi': 'mri',
5235         'mk': 'mkd',
5236         'ml': 'mal',
5237         'mn': 'mon',
5238         'mr': 'mar',
5239         'ms': 'msa',
5240         'mt': 'mlt',
5241         'my': 'mya',
5242         'na': 'nau',
5243         'nb': 'nob',
5244         'nd': 'nde',
5245         'ne': 'nep',
5246         'ng': 'ndo',
5247         'nl': 'nld',
5248         'nn': 'nno',
5249         'no': 'nor',
5250         'nr': 'nbl',
5251         'nv': 'nav',
5252         'ny': 'nya',
5253         'oc': 'oci',
5254         'oj': 'oji',
5255         'om': 'orm',
5256         'or': 'ori',
5257         'os': 'oss',
5258         'pa': 'pan',
5259         'pi': 'pli',
5260         'pl': 'pol',
5261         'ps': 'pus',
5262         'pt': 'por',
5263         'qu': 'que',
5264         'rm': 'roh',
5265         'rn': 'run',
5266         'ro': 'ron',
5267         'ru': 'rus',
5268         'rw': 'kin',
5269         'sa': 'san',
5270         'sc': 'srd',
5271         'sd': 'snd',
5272         'se': 'sme',
5273         'sg': 'sag',
5274         'si': 'sin',
5275         'sk': 'slk',
5276         'sl': 'slv',
5277         'sm': 'smo',
5278         'sn': 'sna',
5279         'so': 'som',
5280         'sq': 'sqi',
5281         'sr': 'srp',
5282         'ss': 'ssw',
5283         'st': 'sot',
5284         'su': 'sun',
5285         'sv': 'swe',
5286         'sw': 'swa',
5287         'ta': 'tam',
5288         'te': 'tel',
5289         'tg': 'tgk',
5290         'th': 'tha',
5291         'ti': 'tir',
5292         'tk': 'tuk',
5293         'tl': 'tgl',
5294         'tn': 'tsn',
5295         'to': 'ton',
5296         'tr': 'tur',
5297         'ts': 'tso',
5298         'tt': 'tat',
5299         'tw': 'twi',
5300         'ty': 'tah',
5301         'ug': 'uig',
5302         'uk': 'ukr',
5303         'ur': 'urd',
5304         'uz': 'uzb',
5305         've': 'ven',
5306         'vi': 'vie',
5307         'vo': 'vol',
5308         'wa': 'wln',
5309         'wo': 'wol',
5310         'xh': 'xho',
5311         'yi': 'yid',
5312         'ji': 'yid',  # Replaced by yi in 1989 revision
5313         'yo': 'yor',
5314         'za': 'zha',
5315         'zh': 'zho',
5316         'zu': 'zul',
5317     }
5318
5319     @classmethod
5320     def short2long(cls, code):
5321         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5322         return cls._lang_map.get(code[:2])
5323
5324     @classmethod
5325     def long2short(cls, code):
5326         """Convert language code from ISO 639-2/T to ISO 639-1"""
5327         for short_name, long_name in cls._lang_map.items():
5328             if long_name == code:
5329                 return short_name
5330
5331
5332 class ISO3166Utils(object):
5333     # From http://data.okfn.org/data/core/country-list
5334     _country_map = {
5335         'AF': 'Afghanistan',
5336         'AX': 'Åland Islands',
5337         'AL': 'Albania',
5338         'DZ': 'Algeria',
5339         'AS': 'American Samoa',
5340         'AD': 'Andorra',
5341         'AO': 'Angola',
5342         'AI': 'Anguilla',
5343         'AQ': 'Antarctica',
5344         'AG': 'Antigua and Barbuda',
5345         'AR': 'Argentina',
5346         'AM': 'Armenia',
5347         'AW': 'Aruba',
5348         'AU': 'Australia',
5349         'AT': 'Austria',
5350         'AZ': 'Azerbaijan',
5351         'BS': 'Bahamas',
5352         'BH': 'Bahrain',
5353         'BD': 'Bangladesh',
5354         'BB': 'Barbados',
5355         'BY': 'Belarus',
5356         'BE': 'Belgium',
5357         'BZ': 'Belize',
5358         'BJ': 'Benin',
5359         'BM': 'Bermuda',
5360         'BT': 'Bhutan',
5361         'BO': 'Bolivia, Plurinational State of',
5362         'BQ': 'Bonaire, Sint Eustatius and Saba',
5363         'BA': 'Bosnia and Herzegovina',
5364         'BW': 'Botswana',
5365         'BV': 'Bouvet Island',
5366         'BR': 'Brazil',
5367         'IO': 'British Indian Ocean Territory',
5368         'BN': 'Brunei Darussalam',
5369         'BG': 'Bulgaria',
5370         'BF': 'Burkina Faso',
5371         'BI': 'Burundi',
5372         'KH': 'Cambodia',
5373         'CM': 'Cameroon',
5374         'CA': 'Canada',
5375         'CV': 'Cape Verde',
5376         'KY': 'Cayman Islands',
5377         'CF': 'Central African Republic',
5378         'TD': 'Chad',
5379         'CL': 'Chile',
5380         'CN': 'China',
5381         'CX': 'Christmas Island',
5382         'CC': 'Cocos (Keeling) Islands',
5383         'CO': 'Colombia',
5384         'KM': 'Comoros',
5385         'CG': 'Congo',
5386         'CD': 'Congo, the Democratic Republic of the',
5387         'CK': 'Cook Islands',
5388         'CR': 'Costa Rica',
5389         'CI': 'Côte d\'Ivoire',
5390         'HR': 'Croatia',
5391         'CU': 'Cuba',
5392         'CW': 'Curaçao',
5393         'CY': 'Cyprus',
5394         'CZ': 'Czech Republic',
5395         'DK': 'Denmark',
5396         'DJ': 'Djibouti',
5397         'DM': 'Dominica',
5398         'DO': 'Dominican Republic',
5399         'EC': 'Ecuador',
5400         'EG': 'Egypt',
5401         'SV': 'El Salvador',
5402         'GQ': 'Equatorial Guinea',
5403         'ER': 'Eritrea',
5404         'EE': 'Estonia',
5405         'ET': 'Ethiopia',
5406         'FK': 'Falkland Islands (Malvinas)',
5407         'FO': 'Faroe Islands',
5408         'FJ': 'Fiji',
5409         'FI': 'Finland',
5410         'FR': 'France',
5411         'GF': 'French Guiana',
5412         'PF': 'French Polynesia',
5413         'TF': 'French Southern Territories',
5414         'GA': 'Gabon',
5415         'GM': 'Gambia',
5416         'GE': 'Georgia',
5417         'DE': 'Germany',
5418         'GH': 'Ghana',
5419         'GI': 'Gibraltar',
5420         'GR': 'Greece',
5421         'GL': 'Greenland',
5422         'GD': 'Grenada',
5423         'GP': 'Guadeloupe',
5424         'GU': 'Guam',
5425         'GT': 'Guatemala',
5426         'GG': 'Guernsey',
5427         'GN': 'Guinea',
5428         'GW': 'Guinea-Bissau',
5429         'GY': 'Guyana',
5430         'HT': 'Haiti',
5431         'HM': 'Heard Island and McDonald Islands',
5432         'VA': 'Holy See (Vatican City State)',
5433         'HN': 'Honduras',
5434         'HK': 'Hong Kong',
5435         'HU': 'Hungary',
5436         'IS': 'Iceland',
5437         'IN': 'India',
5438         'ID': 'Indonesia',
5439         'IR': 'Iran, Islamic Republic of',
5440         'IQ': 'Iraq',
5441         'IE': 'Ireland',
5442         'IM': 'Isle of Man',
5443         'IL': 'Israel',
5444         'IT': 'Italy',
5445         'JM': 'Jamaica',
5446         'JP': 'Japan',
5447         'JE': 'Jersey',
5448         'JO': 'Jordan',
5449         'KZ': 'Kazakhstan',
5450         'KE': 'Kenya',
5451         'KI': 'Kiribati',
5452         'KP': 'Korea, Democratic People\'s Republic of',
5453         'KR': 'Korea, Republic of',
5454         'KW': 'Kuwait',
5455         'KG': 'Kyrgyzstan',
5456         'LA': 'Lao People\'s Democratic Republic',
5457         'LV': 'Latvia',
5458         'LB': 'Lebanon',
5459         'LS': 'Lesotho',
5460         'LR': 'Liberia',
5461         'LY': 'Libya',
5462         'LI': 'Liechtenstein',
5463         'LT': 'Lithuania',
5464         'LU': 'Luxembourg',
5465         'MO': 'Macao',
5466         'MK': 'Macedonia, the Former Yugoslav Republic of',
5467         'MG': 'Madagascar',
5468         'MW': 'Malawi',
5469         'MY': 'Malaysia',
5470         'MV': 'Maldives',
5471         'ML': 'Mali',
5472         'MT': 'Malta',
5473         'MH': 'Marshall Islands',
5474         'MQ': 'Martinique',
5475         'MR': 'Mauritania',
5476         'MU': 'Mauritius',
5477         'YT': 'Mayotte',
5478         'MX': 'Mexico',
5479         'FM': 'Micronesia, Federated States of',
5480         'MD': 'Moldova, Republic of',
5481         'MC': 'Monaco',
5482         'MN': 'Mongolia',
5483         'ME': 'Montenegro',
5484         'MS': 'Montserrat',
5485         'MA': 'Morocco',
5486         'MZ': 'Mozambique',
5487         'MM': 'Myanmar',
5488         'NA': 'Namibia',
5489         'NR': 'Nauru',
5490         'NP': 'Nepal',
5491         'NL': 'Netherlands',
5492         'NC': 'New Caledonia',
5493         'NZ': 'New Zealand',
5494         'NI': 'Nicaragua',
5495         'NE': 'Niger',
5496         'NG': 'Nigeria',
5497         'NU': 'Niue',
5498         'NF': 'Norfolk Island',
5499         'MP': 'Northern Mariana Islands',
5500         'NO': 'Norway',
5501         'OM': 'Oman',
5502         'PK': 'Pakistan',
5503         'PW': 'Palau',
5504         'PS': 'Palestine, State of',
5505         'PA': 'Panama',
5506         'PG': 'Papua New Guinea',
5507         'PY': 'Paraguay',
5508         'PE': 'Peru',
5509         'PH': 'Philippines',
5510         'PN': 'Pitcairn',
5511         'PL': 'Poland',
5512         'PT': 'Portugal',
5513         'PR': 'Puerto Rico',
5514         'QA': 'Qatar',
5515         'RE': 'Réunion',
5516         'RO': 'Romania',
5517         'RU': 'Russian Federation',
5518         'RW': 'Rwanda',
5519         'BL': 'Saint Barthélemy',
5520         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5521         'KN': 'Saint Kitts and Nevis',
5522         'LC': 'Saint Lucia',
5523         'MF': 'Saint Martin (French part)',
5524         'PM': 'Saint Pierre and Miquelon',
5525         'VC': 'Saint Vincent and the Grenadines',
5526         'WS': 'Samoa',
5527         'SM': 'San Marino',
5528         'ST': 'Sao Tome and Principe',
5529         'SA': 'Saudi Arabia',
5530         'SN': 'Senegal',
5531         'RS': 'Serbia',
5532         'SC': 'Seychelles',
5533         'SL': 'Sierra Leone',
5534         'SG': 'Singapore',
5535         'SX': 'Sint Maarten (Dutch part)',
5536         'SK': 'Slovakia',
5537         'SI': 'Slovenia',
5538         'SB': 'Solomon Islands',
5539         'SO': 'Somalia',
5540         'ZA': 'South Africa',
5541         'GS': 'South Georgia and the South Sandwich Islands',
5542         'SS': 'South Sudan',
5543         'ES': 'Spain',
5544         'LK': 'Sri Lanka',
5545         'SD': 'Sudan',
5546         'SR': 'Suriname',
5547         'SJ': 'Svalbard and Jan Mayen',
5548         'SZ': 'Swaziland',
5549         'SE': 'Sweden',
5550         'CH': 'Switzerland',
5551         'SY': 'Syrian Arab Republic',
5552         'TW': 'Taiwan, Province of China',
5553         'TJ': 'Tajikistan',
5554         'TZ': 'Tanzania, United Republic of',
5555         'TH': 'Thailand',
5556         'TL': 'Timor-Leste',
5557         'TG': 'Togo',
5558         'TK': 'Tokelau',
5559         'TO': 'Tonga',
5560         'TT': 'Trinidad and Tobago',
5561         'TN': 'Tunisia',
5562         'TR': 'Turkey',
5563         'TM': 'Turkmenistan',
5564         'TC': 'Turks and Caicos Islands',
5565         'TV': 'Tuvalu',
5566         'UG': 'Uganda',
5567         'UA': 'Ukraine',
5568         'AE': 'United Arab Emirates',
5569         'GB': 'United Kingdom',
5570         'US': 'United States',
5571         'UM': 'United States Minor Outlying Islands',
5572         'UY': 'Uruguay',
5573         'UZ': 'Uzbekistan',
5574         'VU': 'Vanuatu',
5575         'VE': 'Venezuela, Bolivarian Republic of',
5576         'VN': 'Viet Nam',
5577         'VG': 'Virgin Islands, British',
5578         'VI': 'Virgin Islands, U.S.',
5579         'WF': 'Wallis and Futuna',
5580         'EH': 'Western Sahara',
5581         'YE': 'Yemen',
5582         'ZM': 'Zambia',
5583         'ZW': 'Zimbabwe',
5584     }
5585
5586     @classmethod
5587     def short2full(cls, code):
5588         """Convert an ISO 3166-2 country code to the corresponding full name"""
5589         return cls._country_map.get(code.upper())
5590
5591
5592 class GeoUtils(object):
5593     # Major IPv4 address blocks per country
5594     _country_ip_map = {
5595         'AD': '46.172.224.0/19',
5596         'AE': '94.200.0.0/13',
5597         'AF': '149.54.0.0/17',
5598         'AG': '209.59.64.0/18',
5599         'AI': '204.14.248.0/21',
5600         'AL': '46.99.0.0/16',
5601         'AM': '46.70.0.0/15',
5602         'AO': '105.168.0.0/13',
5603         'AP': '182.50.184.0/21',
5604         'AQ': '23.154.160.0/24',
5605         'AR': '181.0.0.0/12',
5606         'AS': '202.70.112.0/20',
5607         'AT': '77.116.0.0/14',
5608         'AU': '1.128.0.0/11',
5609         'AW': '181.41.0.0/18',
5610         'AX': '185.217.4.0/22',
5611         'AZ': '5.197.0.0/16',
5612         'BA': '31.176.128.0/17',
5613         'BB': '65.48.128.0/17',
5614         'BD': '114.130.0.0/16',
5615         'BE': '57.0.0.0/8',
5616         'BF': '102.178.0.0/15',
5617         'BG': '95.42.0.0/15',
5618         'BH': '37.131.0.0/17',
5619         'BI': '154.117.192.0/18',
5620         'BJ': '137.255.0.0/16',
5621         'BL': '185.212.72.0/23',
5622         'BM': '196.12.64.0/18',
5623         'BN': '156.31.0.0/16',
5624         'BO': '161.56.0.0/16',
5625         'BQ': '161.0.80.0/20',
5626         'BR': '191.128.0.0/12',
5627         'BS': '24.51.64.0/18',
5628         'BT': '119.2.96.0/19',
5629         'BW': '168.167.0.0/16',
5630         'BY': '178.120.0.0/13',
5631         'BZ': '179.42.192.0/18',
5632         'CA': '99.224.0.0/11',
5633         'CD': '41.243.0.0/16',
5634         'CF': '197.242.176.0/21',
5635         'CG': '160.113.0.0/16',
5636         'CH': '85.0.0.0/13',
5637         'CI': '102.136.0.0/14',
5638         'CK': '202.65.32.0/19',
5639         'CL': '152.172.0.0/14',
5640         'CM': '102.244.0.0/14',
5641         'CN': '36.128.0.0/10',
5642         'CO': '181.240.0.0/12',
5643         'CR': '201.192.0.0/12',
5644         'CU': '152.206.0.0/15',
5645         'CV': '165.90.96.0/19',
5646         'CW': '190.88.128.0/17',
5647         'CY': '31.153.0.0/16',
5648         'CZ': '88.100.0.0/14',
5649         'DE': '53.0.0.0/8',
5650         'DJ': '197.241.0.0/17',
5651         'DK': '87.48.0.0/12',
5652         'DM': '192.243.48.0/20',
5653         'DO': '152.166.0.0/15',
5654         'DZ': '41.96.0.0/12',
5655         'EC': '186.68.0.0/15',
5656         'EE': '90.190.0.0/15',
5657         'EG': '156.160.0.0/11',
5658         'ER': '196.200.96.0/20',
5659         'ES': '88.0.0.0/11',
5660         'ET': '196.188.0.0/14',
5661         'EU': '2.16.0.0/13',
5662         'FI': '91.152.0.0/13',
5663         'FJ': '144.120.0.0/16',
5664         'FK': '80.73.208.0/21',
5665         'FM': '119.252.112.0/20',
5666         'FO': '88.85.32.0/19',
5667         'FR': '90.0.0.0/9',
5668         'GA': '41.158.0.0/15',
5669         'GB': '25.0.0.0/8',
5670         'GD': '74.122.88.0/21',
5671         'GE': '31.146.0.0/16',
5672         'GF': '161.22.64.0/18',
5673         'GG': '62.68.160.0/19',
5674         'GH': '154.160.0.0/12',
5675         'GI': '95.164.0.0/16',
5676         'GL': '88.83.0.0/19',
5677         'GM': '160.182.0.0/15',
5678         'GN': '197.149.192.0/18',
5679         'GP': '104.250.0.0/19',
5680         'GQ': '105.235.224.0/20',
5681         'GR': '94.64.0.0/13',
5682         'GT': '168.234.0.0/16',
5683         'GU': '168.123.0.0/16',
5684         'GW': '197.214.80.0/20',
5685         'GY': '181.41.64.0/18',
5686         'HK': '113.252.0.0/14',
5687         'HN': '181.210.0.0/16',
5688         'HR': '93.136.0.0/13',
5689         'HT': '148.102.128.0/17',
5690         'HU': '84.0.0.0/14',
5691         'ID': '39.192.0.0/10',
5692         'IE': '87.32.0.0/12',
5693         'IL': '79.176.0.0/13',
5694         'IM': '5.62.80.0/20',
5695         'IN': '117.192.0.0/10',
5696         'IO': '203.83.48.0/21',
5697         'IQ': '37.236.0.0/14',
5698         'IR': '2.176.0.0/12',
5699         'IS': '82.221.0.0/16',
5700         'IT': '79.0.0.0/10',
5701         'JE': '87.244.64.0/18',
5702         'JM': '72.27.0.0/17',
5703         'JO': '176.29.0.0/16',
5704         'JP': '133.0.0.0/8',
5705         'KE': '105.48.0.0/12',
5706         'KG': '158.181.128.0/17',
5707         'KH': '36.37.128.0/17',
5708         'KI': '103.25.140.0/22',
5709         'KM': '197.255.224.0/20',
5710         'KN': '198.167.192.0/19',
5711         'KP': '175.45.176.0/22',
5712         'KR': '175.192.0.0/10',
5713         'KW': '37.36.0.0/14',
5714         'KY': '64.96.0.0/15',
5715         'KZ': '2.72.0.0/13',
5716         'LA': '115.84.64.0/18',
5717         'LB': '178.135.0.0/16',
5718         'LC': '24.92.144.0/20',
5719         'LI': '82.117.0.0/19',
5720         'LK': '112.134.0.0/15',
5721         'LR': '102.183.0.0/16',
5722         'LS': '129.232.0.0/17',
5723         'LT': '78.56.0.0/13',
5724         'LU': '188.42.0.0/16',
5725         'LV': '46.109.0.0/16',
5726         'LY': '41.252.0.0/14',
5727         'MA': '105.128.0.0/11',
5728         'MC': '88.209.64.0/18',
5729         'MD': '37.246.0.0/16',
5730         'ME': '178.175.0.0/17',
5731         'MF': '74.112.232.0/21',
5732         'MG': '154.126.0.0/17',
5733         'MH': '117.103.88.0/21',
5734         'MK': '77.28.0.0/15',
5735         'ML': '154.118.128.0/18',
5736         'MM': '37.111.0.0/17',
5737         'MN': '49.0.128.0/17',
5738         'MO': '60.246.0.0/16',
5739         'MP': '202.88.64.0/20',
5740         'MQ': '109.203.224.0/19',
5741         'MR': '41.188.64.0/18',
5742         'MS': '208.90.112.0/22',
5743         'MT': '46.11.0.0/16',
5744         'MU': '105.16.0.0/12',
5745         'MV': '27.114.128.0/18',
5746         'MW': '102.70.0.0/15',
5747         'MX': '187.192.0.0/11',
5748         'MY': '175.136.0.0/13',
5749         'MZ': '197.218.0.0/15',
5750         'NA': '41.182.0.0/16',
5751         'NC': '101.101.0.0/18',
5752         'NE': '197.214.0.0/18',
5753         'NF': '203.17.240.0/22',
5754         'NG': '105.112.0.0/12',
5755         'NI': '186.76.0.0/15',
5756         'NL': '145.96.0.0/11',
5757         'NO': '84.208.0.0/13',
5758         'NP': '36.252.0.0/15',
5759         'NR': '203.98.224.0/19',
5760         'NU': '49.156.48.0/22',
5761         'NZ': '49.224.0.0/14',
5762         'OM': '5.36.0.0/15',
5763         'PA': '186.72.0.0/15',
5764         'PE': '186.160.0.0/14',
5765         'PF': '123.50.64.0/18',
5766         'PG': '124.240.192.0/19',
5767         'PH': '49.144.0.0/13',
5768         'PK': '39.32.0.0/11',
5769         'PL': '83.0.0.0/11',
5770         'PM': '70.36.0.0/20',
5771         'PR': '66.50.0.0/16',
5772         'PS': '188.161.0.0/16',
5773         'PT': '85.240.0.0/13',
5774         'PW': '202.124.224.0/20',
5775         'PY': '181.120.0.0/14',
5776         'QA': '37.210.0.0/15',
5777         'RE': '102.35.0.0/16',
5778         'RO': '79.112.0.0/13',
5779         'RS': '93.86.0.0/15',
5780         'RU': '5.136.0.0/13',
5781         'RW': '41.186.0.0/16',
5782         'SA': '188.48.0.0/13',
5783         'SB': '202.1.160.0/19',
5784         'SC': '154.192.0.0/11',
5785         'SD': '102.120.0.0/13',
5786         'SE': '78.64.0.0/12',
5787         'SG': '8.128.0.0/10',
5788         'SI': '188.196.0.0/14',
5789         'SK': '78.98.0.0/15',
5790         'SL': '102.143.0.0/17',
5791         'SM': '89.186.32.0/19',
5792         'SN': '41.82.0.0/15',
5793         'SO': '154.115.192.0/18',
5794         'SR': '186.179.128.0/17',
5795         'SS': '105.235.208.0/21',
5796         'ST': '197.159.160.0/19',
5797         'SV': '168.243.0.0/16',
5798         'SX': '190.102.0.0/20',
5799         'SY': '5.0.0.0/16',
5800         'SZ': '41.84.224.0/19',
5801         'TC': '65.255.48.0/20',
5802         'TD': '154.68.128.0/19',
5803         'TG': '196.168.0.0/14',
5804         'TH': '171.96.0.0/13',
5805         'TJ': '85.9.128.0/18',
5806         'TK': '27.96.24.0/21',
5807         'TL': '180.189.160.0/20',
5808         'TM': '95.85.96.0/19',
5809         'TN': '197.0.0.0/11',
5810         'TO': '175.176.144.0/21',
5811         'TR': '78.160.0.0/11',
5812         'TT': '186.44.0.0/15',
5813         'TV': '202.2.96.0/19',
5814         'TW': '120.96.0.0/11',
5815         'TZ': '156.156.0.0/14',
5816         'UA': '37.52.0.0/14',
5817         'UG': '102.80.0.0/13',
5818         'US': '6.0.0.0/8',
5819         'UY': '167.56.0.0/13',
5820         'UZ': '84.54.64.0/18',
5821         'VA': '212.77.0.0/19',
5822         'VC': '207.191.240.0/21',
5823         'VE': '186.88.0.0/13',
5824         'VG': '66.81.192.0/20',
5825         'VI': '146.226.0.0/16',
5826         'VN': '14.160.0.0/11',
5827         'VU': '202.80.32.0/20',
5828         'WF': '117.20.32.0/21',
5829         'WS': '202.4.32.0/19',
5830         'YE': '134.35.0.0/16',
5831         'YT': '41.242.116.0/22',
5832         'ZA': '41.0.0.0/11',
5833         'ZM': '102.144.0.0/13',
5834         'ZW': '102.177.192.0/18',
5835     }
5836
5837     @classmethod
5838     def random_ipv4(cls, code_or_block):
5839         if len(code_or_block) == 2:
5840             block = cls._country_ip_map.get(code_or_block.upper())
5841             if not block:
5842                 return None
5843         else:
5844             block = code_or_block
5845         addr, preflen = block.split('/')
5846         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5847         addr_max = addr_min | (0xffffffff >> int(preflen))
5848         return compat_str(socket.inet_ntoa(
5849             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5850
5851
5852 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5853     def __init__(self, proxies=None):
5854         # Set default handlers
5855         for type in ('http', 'https'):
5856             setattr(self, '%s_open' % type,
5857                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5858                         meth(r, proxy, type))
5859         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5860
5861     def proxy_open(self, req, proxy, type):
5862         req_proxy = req.headers.get('Ytdl-request-proxy')
5863         if req_proxy is not None:
5864             proxy = req_proxy
5865             del req.headers['Ytdl-request-proxy']
5866
5867         if proxy == '__noproxy__':
5868             return None  # No Proxy
5869         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5870             req.add_header('Ytdl-socks-proxy', proxy)
5871             # yt-dlp's http/https handlers do wrapping the socket with socks
5872             return None
5873         return compat_urllib_request.ProxyHandler.proxy_open(
5874             self, req, proxy, type)
5875
5876
5877 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5878 # released into Public Domain
5879 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5880
5881 def long_to_bytes(n, blocksize=0):
5882     """long_to_bytes(n:long, blocksize:int) : string
5883     Convert a long integer to a byte string.
5884
5885     If optional blocksize is given and greater than zero, pad the front of the
5886     byte string with binary zeros so that the length is a multiple of
5887     blocksize.
5888     """
5889     # after much testing, this algorithm was deemed to be the fastest
5890     s = b''
5891     n = int(n)
5892     while n > 0:
5893         s = compat_struct_pack('>I', n & 0xffffffff) + s
5894         n = n >> 32
5895     # strip off leading zeros
5896     for i in range(len(s)):
5897         if s[i] != b'\000'[0]:
5898             break
5899     else:
5900         # only happens when n == 0
5901         s = b'\000'
5902         i = 0
5903     s = s[i:]
5904     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5905     # de-padding being done above, but sigh...
5906     if blocksize > 0 and len(s) % blocksize:
5907         s = (blocksize - len(s) % blocksize) * b'\000' + s
5908     return s
5909
5910
5911 def bytes_to_long(s):
5912     """bytes_to_long(string) : long
5913     Convert a byte string to a long integer.
5914
5915     This is (essentially) the inverse of long_to_bytes().
5916     """
5917     acc = 0
5918     length = len(s)
5919     if length % 4:
5920         extra = (4 - length % 4)
5921         s = b'\000' * extra + s
5922         length = length + extra
5923     for i in range(0, length, 4):
5924         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5925     return acc
5926
5927
5928 def ohdave_rsa_encrypt(data, exponent, modulus):
5929     '''
5930     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5931
5932     Input:
5933         data: data to encrypt, bytes-like object
5934         exponent, modulus: parameter e and N of RSA algorithm, both integer
5935     Output: hex string of encrypted data
5936
5937     Limitation: supports one block encryption only
5938     '''
5939
5940     payload = int(binascii.hexlify(data[::-1]), 16)
5941     encrypted = pow(payload, exponent, modulus)
5942     return '%x' % encrypted
5943
5944
5945 def pkcs1pad(data, length):
5946     """
5947     Padding input data with PKCS#1 scheme
5948
5949     @param {int[]} data        input data
5950     @param {int}   length      target length
5951     @returns {int[]}           padded data
5952     """
5953     if len(data) > length - 11:
5954         raise ValueError('Input data too long for PKCS#1 padding')
5955
5956     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5957     return [0, 2] + pseudo_random + [0] + data
5958
5959
5960 def encode_base_n(num, n, table=None):
5961     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5962     if not table:
5963         table = FULL_TABLE[:n]
5964
5965     if n > len(table):
5966         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5967
5968     if num == 0:
5969         return table[0]
5970
5971     ret = ''
5972     while num:
5973         ret = table[num % n] + ret
5974         num = num // n
5975     return ret
5976
5977
5978 def decode_packed_codes(code):
5979     mobj = re.search(PACKED_CODES_RE, code)
5980     obfuscated_code, base, count, symbols = mobj.groups()
5981     base = int(base)
5982     count = int(count)
5983     symbols = symbols.split('|')
5984     symbol_table = {}
5985
5986     while count:
5987         count -= 1
5988         base_n_count = encode_base_n(count, base)
5989         symbol_table[base_n_count] = symbols[count] or base_n_count
5990
5991     return re.sub(
5992         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5993         obfuscated_code)
5994
5995
5996 def caesar(s, alphabet, shift):
5997     if shift == 0:
5998         return s
5999     l = len(alphabet)
6000     return ''.join(
6001         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6002         for c in s)
6003
6004
6005 def rot47(s):
6006     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6007
6008
6009 def parse_m3u8_attributes(attrib):
6010     info = {}
6011     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6012         if val.startswith('"'):
6013             val = val[1:-1]
6014         info[key] = val
6015     return info
6016
6017
6018 def urshift(val, n):
6019     return val >> n if val >= 0 else (val + 0x100000000) >> n
6020
6021
6022 # Based on png2str() written by @gdkchan and improved by @yokrysty
6023 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6024 def decode_png(png_data):
6025     # Reference: https://www.w3.org/TR/PNG/
6026     header = png_data[8:]
6027
6028     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6029         raise IOError('Not a valid PNG file.')
6030
6031     int_map = {1: '>B', 2: '>H', 4: '>I'}
6032     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6033
6034     chunks = []
6035
6036     while header:
6037         length = unpack_integer(header[:4])
6038         header = header[4:]
6039
6040         chunk_type = header[:4]
6041         header = header[4:]
6042
6043         chunk_data = header[:length]
6044         header = header[length:]
6045
6046         header = header[4:]  # Skip CRC
6047
6048         chunks.append({
6049             'type': chunk_type,
6050             'length': length,
6051             'data': chunk_data
6052         })
6053
6054     ihdr = chunks[0]['data']
6055
6056     width = unpack_integer(ihdr[:4])
6057     height = unpack_integer(ihdr[4:8])
6058
6059     idat = b''
6060
6061     for chunk in chunks:
6062         if chunk['type'] == b'IDAT':
6063             idat += chunk['data']
6064
6065     if not idat:
6066         raise IOError('Unable to read PNG data.')
6067
6068     decompressed_data = bytearray(zlib.decompress(idat))
6069
6070     stride = width * 3
6071     pixels = []
6072
6073     def _get_pixel(idx):
6074         x = idx % stride
6075         y = idx // stride
6076         return pixels[y][x]
6077
6078     for y in range(height):
6079         basePos = y * (1 + stride)
6080         filter_type = decompressed_data[basePos]
6081
6082         current_row = []
6083
6084         pixels.append(current_row)
6085
6086         for x in range(stride):
6087             color = decompressed_data[1 + basePos + x]
6088             basex = y * stride + x
6089             left = 0
6090             up = 0
6091
6092             if x > 2:
6093                 left = _get_pixel(basex - 3)
6094             if y > 0:
6095                 up = _get_pixel(basex - stride)
6096
6097             if filter_type == 1:  # Sub
6098                 color = (color + left) & 0xff
6099             elif filter_type == 2:  # Up
6100                 color = (color + up) & 0xff
6101             elif filter_type == 3:  # Average
6102                 color = (color + ((left + up) >> 1)) & 0xff
6103             elif filter_type == 4:  # Paeth
6104                 a = left
6105                 b = up
6106                 c = 0
6107
6108                 if x > 2 and y > 0:
6109                     c = _get_pixel(basex - stride - 3)
6110
6111                 p = a + b - c
6112
6113                 pa = abs(p - a)
6114                 pb = abs(p - b)
6115                 pc = abs(p - c)
6116
6117                 if pa <= pb and pa <= pc:
6118                     color = (color + a) & 0xff
6119                 elif pb <= pc:
6120                     color = (color + b) & 0xff
6121                 else:
6122                     color = (color + c) & 0xff
6123
6124             current_row.append(color)
6125
6126     return width, height, pixels
6127
6128
6129 def write_xattr(path, key, value):
6130     # This mess below finds the best xattr tool for the job
6131     try:
6132         # try the pyxattr module...
6133         import xattr
6134
6135         if hasattr(xattr, 'set'):  # pyxattr
6136             # Unicode arguments are not supported in python-pyxattr until
6137             # version 0.5.0
6138             # See https://github.com/ytdl-org/youtube-dl/issues/5498
6139             pyxattr_required_version = '0.5.0'
6140             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6141                 # TODO: fallback to CLI tools
6142                 raise XAttrUnavailableError(
6143                     'python-pyxattr is detected but is too old. '
6144                     'yt-dlp requires %s or above while your version is %s. '
6145                     'Falling back to other xattr implementations' % (
6146                         pyxattr_required_version, xattr.__version__))
6147
6148             setxattr = xattr.set
6149         else:  # xattr
6150             setxattr = xattr.setxattr
6151
6152         try:
6153             setxattr(path, key, value)
6154         except EnvironmentError as e:
6155             raise XAttrMetadataError(e.errno, e.strerror)
6156
6157     except ImportError:
6158         if compat_os_name == 'nt':
6159             # Write xattrs to NTFS Alternate Data Streams:
6160             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6161             assert ':' not in key
6162             assert os.path.exists(path)
6163
6164             ads_fn = path + ':' + key
6165             try:
6166                 with open(ads_fn, 'wb') as f:
6167                     f.write(value)
6168             except EnvironmentError as e:
6169                 raise XAttrMetadataError(e.errno, e.strerror)
6170         else:
6171             user_has_setfattr = check_executable('setfattr', ['--version'])
6172             user_has_xattr = check_executable('xattr', ['-h'])
6173
6174             if user_has_setfattr or user_has_xattr:
6175
6176                 value = value.decode('utf-8')
6177                 if user_has_setfattr:
6178                     executable = 'setfattr'
6179                     opts = ['-n', key, '-v', value]
6180                 elif user_has_xattr:
6181                     executable = 'xattr'
6182                     opts = ['-w', key, value]
6183
6184                 cmd = ([encodeFilename(executable, True)]
6185                        + [encodeArgument(o) for o in opts]
6186                        + [encodeFilename(path, True)])
6187
6188                 try:
6189                     p = Popen(
6190                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6191                 except EnvironmentError as e:
6192                     raise XAttrMetadataError(e.errno, e.strerror)
6193                 stdout, stderr = p.communicate_or_kill()
6194                 stderr = stderr.decode('utf-8', 'replace')
6195                 if p.returncode != 0:
6196                     raise XAttrMetadataError(p.returncode, stderr)
6197
6198             else:
6199                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6200                 if sys.platform.startswith('linux'):
6201                     raise XAttrUnavailableError(
6202                         "Couldn't find a tool to set the xattrs. "
6203                         "Install either the python 'pyxattr' or 'xattr' "
6204                         "modules, or the GNU 'attr' package "
6205                         "(which contains the 'setfattr' tool).")
6206                 else:
6207                     raise XAttrUnavailableError(
6208                         "Couldn't find a tool to set the xattrs. "
6209                         "Install either the python 'xattr' module, "
6210                         "or the 'xattr' binary.")
6211
6212
6213 def random_birthday(year_field, month_field, day_field):
6214     start_date = datetime.date(1950, 1, 1)
6215     end_date = datetime.date(1995, 12, 31)
6216     offset = random.randint(0, (end_date - start_date).days)
6217     random_date = start_date + datetime.timedelta(offset)
6218     return {
6219         year_field: str(random_date.year),
6220         month_field: str(random_date.month),
6221         day_field: str(random_date.day),
6222     }
6223
6224
6225 # Templates for internet shortcut files, which are plain text files.
6226 DOT_URL_LINK_TEMPLATE = '''
6227 [InternetShortcut]
6228 URL=%(url)s
6229 '''.lstrip()
6230
6231 DOT_WEBLOC_LINK_TEMPLATE = '''
6232 <?xml version="1.0" encoding="UTF-8"?>
6233 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6234 <plist version="1.0">
6235 <dict>
6236 \t<key>URL</key>
6237 \t<string>%(url)s</string>
6238 </dict>
6239 </plist>
6240 '''.lstrip()
6241
6242 DOT_DESKTOP_LINK_TEMPLATE = '''
6243 [Desktop Entry]
6244 Encoding=UTF-8
6245 Name=%(filename)s
6246 Type=Link
6247 URL=%(url)s
6248 Icon=text-html
6249 '''.lstrip()
6250
6251 LINK_TEMPLATES = {
6252     'url': DOT_URL_LINK_TEMPLATE,
6253     'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6254     'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6255 }
6256
6257
6258 def iri_to_uri(iri):
6259     """
6260     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6261
6262     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6263     """
6264
6265     iri_parts = compat_urllib_parse_urlparse(iri)
6266
6267     if '[' in iri_parts.netloc:
6268         raise ValueError('IPv6 URIs are not, yet, supported.')
6269         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6270
6271     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6272
6273     net_location = ''
6274     if iri_parts.username:
6275         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6276         if iri_parts.password is not None:
6277             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6278         net_location += '@'
6279
6280     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6281     # The 'idna' encoding produces ASCII text.
6282     if iri_parts.port is not None and iri_parts.port != 80:
6283         net_location += ':' + str(iri_parts.port)
6284
6285     return compat_urllib_parse_urlunparse(
6286         (iri_parts.scheme,
6287             net_location,
6288
6289             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6290
6291             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6292             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6293
6294             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6295             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6296
6297             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6298
6299     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6300
6301
6302 def to_high_limit_path(path):
6303     if sys.platform in ['win32', 'cygwin']:
6304         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6305         return r'\\?\ '.rstrip() + os.path.abspath(path)
6306
6307     return path
6308
6309
6310 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6311     if field is None:
6312         val = obj if obj is not None else default
6313     else:
6314         val = obj.get(field, default)
6315     if func and val not in ignore:
6316         val = func(val)
6317     return template % val if val not in ignore else default
6318
6319
6320 def clean_podcast_url(url):
6321     return re.sub(r'''(?x)
6322         (?:
6323             (?:
6324                 chtbl\.com/track|
6325                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6326                 play\.podtrac\.com
6327             )/[^/]+|
6328             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6329             flex\.acast\.com|
6330             pd(?:
6331                 cn\.co| # https://podcorn.com/analytics-prefix/
6332                 st\.fm # https://podsights.com/docs/
6333             )/e
6334         )/''', '', url)
6335
6336
6337 _HEX_TABLE = '0123456789abcdef'
6338
6339
6340 def random_uuidv4():
6341     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6342
6343
6344 def make_dir(path, to_screen=None):
6345     try:
6346         dn = os.path.dirname(path)
6347         if dn and not os.path.exists(dn):
6348             os.makedirs(dn)
6349         return True
6350     except (OSError, IOError) as err:
6351         if callable(to_screen) is not None:
6352             to_screen('unable to create directory ' + error_to_compat_str(err))
6353         return False
6354
6355
6356 def get_executable_path():
6357     from zipimport import zipimporter
6358     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6359         path = os.path.dirname(sys.executable)
6360     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6361         path = os.path.join(os.path.dirname(__file__), '../..')
6362     else:
6363         path = os.path.join(os.path.dirname(__file__), '..')
6364     return os.path.abspath(path)
6365
6366
6367 def load_plugins(name, suffix, namespace):
6368     classes = {}
6369     try:
6370         plugins_spec = importlib.util.spec_from_file_location(
6371             name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6372         plugins = importlib.util.module_from_spec(plugins_spec)
6373         sys.modules[plugins_spec.name] = plugins
6374         plugins_spec.loader.exec_module(plugins)
6375         for name in dir(plugins):
6376             if name in namespace:
6377                 continue
6378             if not name.endswith(suffix):
6379                 continue
6380             klass = getattr(plugins, name)
6381             classes[name] = namespace[name] = klass
6382     except FileNotFoundError:
6383         pass
6384     return classes
6385
6386
6387 def traverse_obj(
6388         obj, *path_list, default=None, expected_type=None, get_all=True,
6389         casesense=True, is_user_input=False, traverse_string=False):
6390     ''' Traverse nested list/dict/tuple
6391     @param path_list        A list of paths which are checked one by one.
6392                             Each path is a list of keys where each key is a string,
6393                             a function, a tuple of strings or "...".
6394                             When a fuction is given, it takes the key as argument and
6395                             returns whether the key matches or not. When a tuple is given,
6396                             all the keys given in the tuple are traversed, and
6397                             "..." traverses all the keys in the object
6398     @param default          Default value to return
6399     @param expected_type    Only accept final value of this type (Can also be any callable)
6400     @param get_all          Return all the values obtained from a path or only the first one
6401     @param casesense        Whether to consider dictionary keys as case sensitive
6402     @param is_user_input    Whether the keys are generated from user input. If True,
6403                             strings are converted to int/slice if necessary
6404     @param traverse_string  Whether to traverse inside strings. If True, any
6405                             non-compatible object will also be converted into a string
6406     # TODO: Write tests
6407     '''
6408     if not casesense:
6409         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6410         path_list = (map(_lower, variadic(path)) for path in path_list)
6411
6412     def _traverse_obj(obj, path, _current_depth=0):
6413         nonlocal depth
6414         if obj is None:
6415             return None
6416         path = tuple(variadic(path))
6417         for i, key in enumerate(path):
6418             if isinstance(key, (list, tuple)):
6419                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6420                 key = ...
6421             if key is ...:
6422                 obj = (obj.values() if isinstance(obj, dict)
6423                        else obj if isinstance(obj, (list, tuple, LazyList))
6424                        else str(obj) if traverse_string else [])
6425                 _current_depth += 1
6426                 depth = max(depth, _current_depth)
6427                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6428             elif callable(key):
6429                 if isinstance(obj, (list, tuple, LazyList)):
6430                     obj = enumerate(obj)
6431                 elif isinstance(obj, dict):
6432                     obj = obj.items()
6433                 else:
6434                     if not traverse_string:
6435                         return None
6436                     obj = str(obj)
6437                 _current_depth += 1
6438                 depth = max(depth, _current_depth)
6439                 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6440             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6441                 obj = (obj.get(key) if casesense or (key in obj)
6442                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6443             else:
6444                 if is_user_input:
6445                     key = (int_or_none(key) if ':' not in key
6446                            else slice(*map(int_or_none, key.split(':'))))
6447                     if key == slice(None):
6448                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6449                 if not isinstance(key, (int, slice)):
6450                     return None
6451                 if not isinstance(obj, (list, tuple, LazyList)):
6452                     if not traverse_string:
6453                         return None
6454                     obj = str(obj)
6455                 try:
6456                     obj = obj[key]
6457                 except IndexError:
6458                     return None
6459         return obj
6460
6461     if isinstance(expected_type, type):
6462         type_test = lambda val: val if isinstance(val, expected_type) else None
6463     elif expected_type is not None:
6464         type_test = expected_type
6465     else:
6466         type_test = lambda val: val
6467
6468     for path in path_list:
6469         depth = 0
6470         val = _traverse_obj(obj, path)
6471         if val is not None:
6472             if depth:
6473                 for _ in range(depth - 1):
6474                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6475                 val = [v for v in map(type_test, val) if v is not None]
6476                 if val:
6477                     return val if get_all else val[0]
6478             else:
6479                 val = type_test(val)
6480                 if val is not None:
6481                     return val
6482     return default
6483
6484
6485 def traverse_dict(dictn, keys, casesense=True):
6486     ''' For backward compatibility. Do not use '''
6487     return traverse_obj(dictn, keys, casesense=casesense,
6488                         is_user_input=True, traverse_string=True)
6489
6490
6491 def variadic(x, allowed_types=(str, bytes)):
6492     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6493
6494
6495 # create a JSON Web Signature (jws) with HS256 algorithm
6496 # the resulting format is in JWS Compact Serialization
6497 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6498 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6499 def jwt_encode_hs256(payload_data, key, headers={}):
6500     header_data = {
6501         'alg': 'HS256',
6502         'typ': 'JWT',
6503     }
6504     if headers:
6505         header_data.update(headers)
6506     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6507     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6508     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6509     signature_b64 = base64.b64encode(h.digest())
6510     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6511     return token
6512
6513
6514 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6515 def jwt_decode_hs256(jwt):
6516     header_b64, payload_b64, signature_b64 = jwt.split('.')
6517     payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6518     return payload_data
6519
6520
6521 def supports_terminal_sequences(stream):
6522     if compat_os_name == 'nt':
6523         if get_windows_version() < (10, 0, 10586):
6524             return False
6525     elif not os.getenv('TERM'):
6526         return False
6527     try:
6528         return stream.isatty()
6529     except BaseException:
6530         return False
6531
6532
6533 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
6534
6535
6536 def remove_terminal_sequences(string):
6537     return _terminal_sequences_re.sub('', string)
6538
6539
6540 def number_of_digits(number):
6541     return len('%d' % number)