yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y.%m.%d.',
1744     '%Y/%m/%d',
1745     '%Y/%m/%d %H:%M',
1746     '%Y/%m/%d %H:%M:%S',
1747     '%Y%m%d%H%M',
1748     '%Y%m%d%H%M%S',
1749     '%Y-%m-%d %H:%M',
1750     '%Y-%m-%d %H:%M:%S',
1751     '%Y-%m-%d %H:%M:%S.%f',
1752     '%Y-%m-%d %H:%M:%S:%f',
1753     '%d.%m.%Y %H:%M',
1754     '%d.%m.%Y %H.%M',
1755     '%Y-%m-%dT%H:%M:%SZ',
1756     '%Y-%m-%dT%H:%M:%S.%fZ',
1757     '%Y-%m-%dT%H:%M:%S.%f0Z',
1758     '%Y-%m-%dT%H:%M:%S',
1759     '%Y-%m-%dT%H:%M:%S.%f',
1760     '%Y-%m-%dT%H:%M',
1761     '%b %d %Y at %H:%M',
1762     '%b %d %Y at %H:%M:%S',
1763     '%B %d %Y at %H:%M',
1764     '%B %d %Y at %H:%M:%S',
1765     '%H:%M %d-%b-%Y',
1766 )
1767
1768 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1769 DATE_FORMATS_DAY_FIRST.extend([
1770     '%d-%m-%Y',
1771     '%d.%m.%Y',
1772     '%d.%m.%y',
1773     '%d/%m/%Y',
1774     '%d/%m/%y',
1775     '%d/%m/%Y %H:%M:%S',
1776 ])
1777
1778 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1779 DATE_FORMATS_MONTH_FIRST.extend([
1780     '%m-%d-%Y',
1781     '%m.%d.%Y',
1782     '%m/%d/%Y',
1783     '%m/%d/%y',
1784     '%m/%d/%Y %H:%M:%S',
1785 ])
1786
1787 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1788 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1789
1790
1791 def preferredencoding():
1792     """Get preferred encoding.
1793
1794     Returns the best encoding scheme for the system, based on
1795     locale.getpreferredencoding() and some further tweaks.
1796     """
1797     try:
1798         pref = locale.getpreferredencoding()
1799         'TEST'.encode(pref)
1800     except Exception:
1801         pref = 'UTF-8'
1802
1803     return pref
1804
1805
1806 def write_json_file(obj, fn):
1807     """ Encode obj as JSON and write it to fn, atomically if possible """
1808
1809     fn = encodeFilename(fn)
1810     if sys.version_info < (3, 0) and sys.platform != 'win32':
1811         encoding = get_filesystem_encoding()
1812         # os.path.basename returns a bytes object, but NamedTemporaryFile
1813         # will fail if the filename contains non ascii characters unless we
1814         # use a unicode object
1815         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1816         # the same for os.path.dirname
1817         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1818     else:
1819         path_basename = os.path.basename
1820         path_dirname = os.path.dirname
1821
1822     args = {
1823         'suffix': '.tmp',
1824         'prefix': path_basename(fn) + '.',
1825         'dir': path_dirname(fn),
1826         'delete': False,
1827     }
1828
1829     # In Python 2.x, json.dump expects a bytestream.
1830     # In Python 3.x, it writes to a character stream
1831     if sys.version_info < (3, 0):
1832         args['mode'] = 'wb'
1833     else:
1834         args.update({
1835             'mode': 'w',
1836             'encoding': 'utf-8',
1837         })
1838
1839     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1840
1841     try:
1842         with tf:
1843             json.dump(obj, tf)
1844         if sys.platform == 'win32':
1845             # Need to remove existing file on Windows, else os.rename raises
1846             # WindowsError or FileExistsError.
1847             try:
1848                 os.unlink(fn)
1849             except OSError:
1850                 pass
1851         try:
1852             mask = os.umask(0)
1853             os.umask(mask)
1854             os.chmod(tf.name, 0o666 & ~mask)
1855         except OSError:
1856             pass
1857         os.rename(tf.name, fn)
1858     except Exception:
1859         try:
1860             os.remove(tf.name)
1861         except OSError:
1862             pass
1863         raise
1864
1865
1866 if sys.version_info >= (2, 7):
1867     def find_xpath_attr(node, xpath, key, val=None):
1868         """ Find the xpath xpath[@key=val] """
1869         assert re.match(r'^[a-zA-Z_-]+$', key)
1870         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1871         return node.find(expr)
1872 else:
1873     def find_xpath_attr(node, xpath, key, val=None):
1874         for f in node.findall(compat_xpath(xpath)):
1875             if key not in f.attrib:
1876                 continue
1877             if val is None or f.attrib.get(key) == val:
1878                 return f
1879         return None
1880
1881 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1882 # the namespace parameter
1883
1884
1885 def xpath_with_ns(path, ns_map):
1886     components = [c.split(':') for c in path.split('/')]
1887     replaced = []
1888     for c in components:
1889         if len(c) == 1:
1890             replaced.append(c[0])
1891         else:
1892             ns, tag = c
1893             replaced.append('{%s}%s' % (ns_map[ns], tag))
1894     return '/'.join(replaced)
1895
1896
1897 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1898     def _find_xpath(xpath):
1899         return node.find(compat_xpath(xpath))
1900
1901     if isinstance(xpath, (str, compat_str)):
1902         n = _find_xpath(xpath)
1903     else:
1904         for xp in xpath:
1905             n = _find_xpath(xp)
1906             if n is not None:
1907                 break
1908
1909     if n is None:
1910         if default is not NO_DEFAULT:
1911             return default
1912         elif fatal:
1913             name = xpath if name is None else name
1914             raise ExtractorError('Could not find XML element %s' % name)
1915         else:
1916             return None
1917     return n
1918
1919
1920 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1921     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1922     if n is None or n == default:
1923         return n
1924     if n.text is None:
1925         if default is not NO_DEFAULT:
1926             return default
1927         elif fatal:
1928             name = xpath if name is None else name
1929             raise ExtractorError('Could not find XML element\'s text %s' % name)
1930         else:
1931             return None
1932     return n.text
1933
1934
1935 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1936     n = find_xpath_attr(node, xpath, key)
1937     if n is None:
1938         if default is not NO_DEFAULT:
1939             return default
1940         elif fatal:
1941             name = '%s[@%s]' % (xpath, key) if name is None else name
1942             raise ExtractorError('Could not find XML attribute %s' % name)
1943         else:
1944             return None
1945     return n.attrib[key]
1946
1947
1948 def get_element_by_id(id, html):
1949     """Return the content of the tag with the specified ID in the passed HTML document"""
1950     return get_element_by_attribute('id', id, html)
1951
1952
1953 def get_element_by_class(class_name, html):
1954     """Return the content of the first tag with the specified class in the passed HTML document"""
1955     retval = get_elements_by_class(class_name, html)
1956     return retval[0] if retval else None
1957
1958
1959 def get_element_by_attribute(attribute, value, html, escape_value=True):
1960     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1961     return retval[0] if retval else None
1962
1963
1964 def get_elements_by_class(class_name, html):
1965     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1966     return get_elements_by_attribute(
1967         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1968         html, escape_value=False)
1969
1970
1971 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1972     """Return the content of the tag with the specified attribute in the passed HTML document"""
1973
1974     value = re.escape(value) if escape_value else value
1975
1976     retlist = []
1977     for m in re.finditer(r'''(?xs)
1978         <([a-zA-Z0-9:._-]+)
1979          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1980          \s+%s=['"]?%s['"]?
1981          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982         \s*>
1983         (?P<content>.*?)
1984         </\1>
1985     ''' % (re.escape(attribute), value), html):
1986         res = m.group('content')
1987
1988         if res.startswith('"') or res.startswith("'"):
1989             res = res[1:-1]
1990
1991         retlist.append(unescapeHTML(res))
1992
1993     return retlist
1994
1995
1996 class HTMLAttributeParser(compat_HTMLParser):
1997     """Trivial HTML parser to gather the attributes for a single element"""
1998
1999     def __init__(self):
2000         self.attrs = {}
2001         compat_HTMLParser.__init__(self)
2002
2003     def handle_starttag(self, tag, attrs):
2004         self.attrs = dict(attrs)
2005
2006
2007 def extract_attributes(html_element):
2008     """Given a string for an HTML element such as
2009     <el
2010          a="foo" B="bar" c="&98;az" d=boz
2011          empty= noval entity="&amp;"
2012          sq='"' dq="'"
2013     >
2014     Decode and return a dictionary of attributes.
2015     {
2016         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2017         'empty': '', 'noval': None, 'entity': '&',
2018         'sq': '"', 'dq': '\''
2019     }.
2020     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2021     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2022     """
2023     parser = HTMLAttributeParser()
2024     try:
2025         parser.feed(html_element)
2026         parser.close()
2027     # Older Python may throw HTMLParseError in case of malformed HTML
2028     except compat_HTMLParseError:
2029         pass
2030     return parser.attrs
2031
2032
2033 def clean_html(html):
2034     """Clean an HTML snippet into a readable string"""
2035
2036     if html is None:  # Convenience for sanitizing descriptions etc.
2037         return html
2038
2039     # Newline vs <br />
2040     html = html.replace('\n', ' ')
2041     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2042     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2043     # Strip html tags
2044     html = re.sub('<.*?>', '', html)
2045     # Replace html entities
2046     html = unescapeHTML(html)
2047     return html.strip()
2048
2049
2050 def sanitize_open(filename, open_mode):
2051     """Try to open the given filename, and slightly tweak it if this fails.
2052
2053     Attempts to open the given filename. If this fails, it tries to change
2054     the filename slightly, step by step, until it's either able to open it
2055     or it fails and raises a final exception, like the standard open()
2056     function.
2057
2058     It returns the tuple (stream, definitive_file_name).
2059     """
2060     try:
2061         if filename == '-':
2062             if sys.platform == 'win32':
2063                 import msvcrt
2064                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2065             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2066         stream = open(encodeFilename(filename), open_mode)
2067         return (stream, filename)
2068     except (IOError, OSError) as err:
2069         if err.errno in (errno.EACCES,):
2070             raise
2071
2072         # In case of error, try to remove win32 forbidden chars
2073         alt_filename = sanitize_path(filename)
2074         if alt_filename == filename:
2075             raise
2076         else:
2077             # An exception here should be caught in the caller
2078             stream = open(encodeFilename(alt_filename), open_mode)
2079             return (stream, alt_filename)
2080
2081
2082 def timeconvert(timestr):
2083     """Convert RFC 2822 defined time string into system timestamp"""
2084     timestamp = None
2085     timetuple = email.utils.parsedate_tz(timestr)
2086     if timetuple is not None:
2087         timestamp = email.utils.mktime_tz(timetuple)
2088     return timestamp
2089
2090
2091 def sanitize_filename(s, restricted=False, is_id=False):
2092     """Sanitizes a string so it could be used as part of a filename.
2093     If restricted is set, use a stricter subset of allowed characters.
2094     Set is_id if this is not an arbitrary string, but an ID that should be kept
2095     if possible.
2096     """
2097     def replace_insane(char):
2098         if restricted and char in ACCENT_CHARS:
2099             return ACCENT_CHARS[char]
2100         if char == '?' or ord(char) < 32 or ord(char) == 127:
2101             return ''
2102         elif char == '"':
2103             return '' if restricted else '\''
2104         elif char == ':':
2105             return '_-' if restricted else ' -'
2106         elif char in '\\/|*<>':
2107             return '_'
2108         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2109             return '_'
2110         if restricted and ord(char) > 127:
2111             return '_'
2112         return char
2113
2114     if s == '':
2115         return ''
2116     # Handle timestamps
2117     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2118     result = ''.join(map(replace_insane, s))
2119     if not is_id:
2120         while '__' in result:
2121             result = result.replace('__', '_')
2122         result = result.strip('_')
2123         # Common case of "Foreign band name - English song title"
2124         if restricted and result.startswith('-_'):
2125             result = result[2:]
2126         if result.startswith('-'):
2127             result = '_' + result[len('-'):]
2128         result = result.lstrip('.')
2129         if not result:
2130             result = '_'
2131     return result
2132
2133
2134 def sanitize_path(s, force=False):
2135     """Sanitizes and normalizes path on Windows"""
2136     if sys.platform == 'win32':
2137         force = False
2138         drive_or_unc, _ = os.path.splitdrive(s)
2139         if sys.version_info < (2, 7) and not drive_or_unc:
2140             drive_or_unc, _ = os.path.splitunc(s)
2141     elif force:
2142         drive_or_unc = ''
2143     else:
2144         return s
2145
2146     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2147     if drive_or_unc:
2148         norm_path.pop(0)
2149     sanitized_path = [
2150         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2151         for path_part in norm_path]
2152     if drive_or_unc:
2153         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2154     elif force and s[0] == os.path.sep:
2155         sanitized_path.insert(0, os.path.sep)
2156     return os.path.join(*sanitized_path)
2157
2158
2159 def sanitize_url(url):
2160     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2161     # the number of unwanted failures due to missing protocol
2162     if url.startswith('//'):
2163         return 'http:%s' % url
2164     # Fix some common typos seen so far
2165     COMMON_TYPOS = (
2166         # https://github.com/ytdl-org/youtube-dl/issues/15649
2167         (r'^httpss://', r'https://'),
2168         # https://bx1.be/lives/direct-tv/
2169         (r'^rmtp([es]?)://', r'rtmp\1://'),
2170     )
2171     for mistake, fixup in COMMON_TYPOS:
2172         if re.match(mistake, url):
2173             return re.sub(mistake, fixup, url)
2174     return url
2175
2176
2177 def extract_basic_auth(url):
2178     parts = compat_urlparse.urlsplit(url)
2179     if parts.username is None:
2180         return url, None
2181     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2182         parts.hostname if parts.port is None
2183         else '%s:%d' % (parts.hostname, parts.port))))
2184     auth_payload = base64.b64encode(
2185         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2186     return url, 'Basic ' + auth_payload.decode('utf-8')
2187
2188
2189 def sanitized_Request(url, *args, **kwargs):
2190     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2191     if auth_header is not None:
2192         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2193         headers['Authorization'] = auth_header
2194     return compat_urllib_request.Request(url, *args, **kwargs)
2195
2196
2197 def expand_path(s):
2198     """Expand shell variables and ~"""
2199     return os.path.expandvars(compat_expanduser(s))
2200
2201
2202 def orderedSet(iterable):
2203     """ Remove all duplicates from the input iterable """
2204     res = []
2205     for el in iterable:
2206         if el not in res:
2207             res.append(el)
2208     return res
2209
2210
2211 def _htmlentity_transform(entity_with_semicolon):
2212     """Transforms an HTML entity to a character."""
2213     entity = entity_with_semicolon[:-1]
2214
2215     # Known non-numeric HTML entity
2216     if entity in compat_html_entities.name2codepoint:
2217         return compat_chr(compat_html_entities.name2codepoint[entity])
2218
2219     # TODO: HTML5 allows entities without a semicolon. For example,
2220     # '&Eacuteric' should be decoded as 'Éric'.
2221     if entity_with_semicolon in compat_html_entities_html5:
2222         return compat_html_entities_html5[entity_with_semicolon]
2223
2224     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2225     if mobj is not None:
2226         numstr = mobj.group(1)
2227         if numstr.startswith('x'):
2228             base = 16
2229             numstr = '0%s' % numstr
2230         else:
2231             base = 10
2232         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2233         try:
2234             return compat_chr(int(numstr, base))
2235         except ValueError:
2236             pass
2237
2238     # Unknown entity in name, return its literal representation
2239     return '&%s;' % entity
2240
2241
2242 def unescapeHTML(s):
2243     if s is None:
2244         return None
2245     assert type(s) == compat_str
2246
2247     return re.sub(
2248         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2249
2250
2251 def escapeHTML(text):
2252     return (
2253         text
2254         .replace('&', '&amp;')
2255         .replace('<', '&lt;')
2256         .replace('>', '&gt;')
2257         .replace('"', '&quot;')
2258         .replace("'", '&#39;')
2259     )
2260
2261
2262 def process_communicate_or_kill(p, *args, **kwargs):
2263     try:
2264         return p.communicate(*args, **kwargs)
2265     except BaseException:  # Including KeyboardInterrupt
2266         p.kill()
2267         p.wait()
2268         raise
2269
2270
2271 def get_subprocess_encoding():
2272     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2273         # For subprocess calls, encode with locale encoding
2274         # Refer to http://stackoverflow.com/a/9951851/35070
2275         encoding = preferredencoding()
2276     else:
2277         encoding = sys.getfilesystemencoding()
2278     if encoding is None:
2279         encoding = 'utf-8'
2280     return encoding
2281
2282
2283 def encodeFilename(s, for_subprocess=False):
2284     """
2285     @param s The name of the file
2286     """
2287
2288     assert type(s) == compat_str
2289
2290     # Python 3 has a Unicode API
2291     if sys.version_info >= (3, 0):
2292         return s
2293
2294     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2295     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2296     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2297     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2298         return s
2299
2300     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2301     if sys.platform.startswith('java'):
2302         return s
2303
2304     return s.encode(get_subprocess_encoding(), 'ignore')
2305
2306
2307 def decodeFilename(b, for_subprocess=False):
2308
2309     if sys.version_info >= (3, 0):
2310         return b
2311
2312     if not isinstance(b, bytes):
2313         return b
2314
2315     return b.decode(get_subprocess_encoding(), 'ignore')
2316
2317
2318 def encodeArgument(s):
2319     if not isinstance(s, compat_str):
2320         # Legacy code that uses byte strings
2321         # Uncomment the following line after fixing all post processors
2322         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2323         s = s.decode('ascii')
2324     return encodeFilename(s, True)
2325
2326
2327 def decodeArgument(b):
2328     return decodeFilename(b, True)
2329
2330
2331 def decodeOption(optval):
2332     if optval is None:
2333         return optval
2334     if isinstance(optval, bytes):
2335         optval = optval.decode(preferredencoding())
2336
2337     assert isinstance(optval, compat_str)
2338     return optval
2339
2340
2341 def formatSeconds(secs, delim=':', msec=False):
2342     if secs > 3600:
2343         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2344     elif secs > 60:
2345         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2346     else:
2347         ret = '%d' % secs
2348     return '%s.%03d' % (ret, secs % 1) if msec else ret
2349
2350
2351 def make_HTTPS_handler(params, **kwargs):
2352     opts_no_check_certificate = params.get('nocheckcertificate', False)
2353     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2354         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2355         if opts_no_check_certificate:
2356             context.check_hostname = False
2357             context.verify_mode = ssl.CERT_NONE
2358         try:
2359             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2360         except TypeError:
2361             # Python 2.7.8
2362             # (create_default_context present but HTTPSHandler has no context=)
2363             pass
2364
2365     if sys.version_info < (3, 2):
2366         return YoutubeDLHTTPSHandler(params, **kwargs)
2367     else:  # Python < 3.4
2368         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2369         context.verify_mode = (ssl.CERT_NONE
2370                                if opts_no_check_certificate
2371                                else ssl.CERT_REQUIRED)
2372         context.set_default_verify_paths()
2373         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2374
2375
2376 def bug_reports_message(before=';'):
2377     if ytdl_is_updateable():
2378         update_cmd = 'type  yt-dlp -U  to update'
2379     else:
2380         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2381     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2382     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2383     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2384
2385     before = before.rstrip()
2386     if not before or before.endswith(('.', '!', '?')):
2387         msg = msg[0].title() + msg[1:]
2388
2389     return (before + ' ' if before else '') + msg
2390
2391
2392 class YoutubeDLError(Exception):
2393     """Base exception for YoutubeDL errors."""
2394     pass
2395
2396
2397 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2398 if hasattr(ssl, 'CertificateError'):
2399     network_exceptions.append(ssl.CertificateError)
2400 network_exceptions = tuple(network_exceptions)
2401
2402
2403 class ExtractorError(YoutubeDLError):
2404     """Error during info extraction."""
2405
2406     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2407         """ tb, if given, is the original traceback (so that it can be printed out).
2408         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2409         """
2410         if sys.exc_info()[0] in network_exceptions:
2411             expected = True
2412
2413         self.msg = str(msg)
2414         self.traceback = tb
2415         self.expected = expected
2416         self.cause = cause
2417         self.video_id = video_id
2418         self.ie = ie
2419         self.exc_info = sys.exc_info()  # preserve original exception
2420
2421         super(ExtractorError, self).__init__(''.join((
2422             format_field(ie, template='[%s] '),
2423             format_field(video_id, template='%s: '),
2424             self.msg,
2425             format_field(cause, template=' (caused by %r)'),
2426             '' if expected else bug_reports_message())))
2427
2428     def format_traceback(self):
2429         if self.traceback is None:
2430             return None
2431         return ''.join(traceback.format_tb(self.traceback))
2432
2433
2434 class UnsupportedError(ExtractorError):
2435     def __init__(self, url):
2436         super(UnsupportedError, self).__init__(
2437             'Unsupported URL: %s' % url, expected=True)
2438         self.url = url
2439
2440
2441 class RegexNotFoundError(ExtractorError):
2442     """Error when a regex didn't match"""
2443     pass
2444
2445
2446 class GeoRestrictedError(ExtractorError):
2447     """Geographic restriction Error exception.
2448
2449     This exception may be thrown when a video is not available from your
2450     geographic location due to geographic restrictions imposed by a website.
2451     """
2452
2453     def __init__(self, msg, countries=None):
2454         super(GeoRestrictedError, self).__init__(msg, expected=True)
2455         self.msg = msg
2456         self.countries = countries
2457
2458
2459 class DownloadError(YoutubeDLError):
2460     """Download Error exception.
2461
2462     This exception may be thrown by FileDownloader objects if they are not
2463     configured to continue on errors. They will contain the appropriate
2464     error message.
2465     """
2466
2467     def __init__(self, msg, exc_info=None):
2468         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2469         super(DownloadError, self).__init__(msg)
2470         self.exc_info = exc_info
2471
2472
2473 class EntryNotInPlaylist(YoutubeDLError):
2474     """Entry not in playlist exception.
2475
2476     This exception will be thrown by YoutubeDL when a requested entry
2477     is not found in the playlist info_dict
2478     """
2479     pass
2480
2481
2482 class SameFileError(YoutubeDLError):
2483     """Same File exception.
2484
2485     This exception will be thrown by FileDownloader objects if they detect
2486     multiple files would have to be downloaded to the same file on disk.
2487     """
2488     pass
2489
2490
2491 class PostProcessingError(YoutubeDLError):
2492     """Post Processing exception.
2493
2494     This exception may be raised by PostProcessor's .run() method to
2495     indicate an error in the postprocessing task.
2496     """
2497
2498     def __init__(self, msg):
2499         super(PostProcessingError, self).__init__(msg)
2500         self.msg = msg
2501
2502
2503 class ExistingVideoReached(YoutubeDLError):
2504     """ --max-downloads limit has been reached. """
2505     pass
2506
2507
2508 class RejectedVideoReached(YoutubeDLError):
2509     """ --max-downloads limit has been reached. """
2510     pass
2511
2512
2513 class ThrottledDownload(YoutubeDLError):
2514     """ Download speed below --throttled-rate. """
2515     pass
2516
2517
2518 class MaxDownloadsReached(YoutubeDLError):
2519     """ --max-downloads limit has been reached. """
2520     pass
2521
2522
2523 class UnavailableVideoError(YoutubeDLError):
2524     """Unavailable Format exception.
2525
2526     This exception will be thrown when a video is requested
2527     in a format that is not available for that video.
2528     """
2529     pass
2530
2531
2532 class ContentTooShortError(YoutubeDLError):
2533     """Content Too Short exception.
2534
2535     This exception may be raised by FileDownloader objects when a file they
2536     download is too small for what the server announced first, indicating
2537     the connection was probably interrupted.
2538     """
2539
2540     def __init__(self, downloaded, expected):
2541         super(ContentTooShortError, self).__init__(
2542             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2543         )
2544         # Both in bytes
2545         self.downloaded = downloaded
2546         self.expected = expected
2547
2548
2549 class XAttrMetadataError(YoutubeDLError):
2550     def __init__(self, code=None, msg='Unknown error'):
2551         super(XAttrMetadataError, self).__init__(msg)
2552         self.code = code
2553         self.msg = msg
2554
2555         # Parsing code and msg
2556         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2557                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2558             self.reason = 'NO_SPACE'
2559         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2560             self.reason = 'VALUE_TOO_LONG'
2561         else:
2562             self.reason = 'NOT_SUPPORTED'
2563
2564
2565 class XAttrUnavailableError(YoutubeDLError):
2566     pass
2567
2568
2569 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2570     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2571     # expected HTTP responses to meet HTTP/1.0 or later (see also
2572     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2573     if sys.version_info < (3, 0):
2574         kwargs['strict'] = True
2575     hc = http_class(*args, **compat_kwargs(kwargs))
2576     source_address = ydl_handler._params.get('source_address')
2577
2578     if source_address is not None:
2579         # This is to workaround _create_connection() from socket where it will try all
2580         # address data from getaddrinfo() including IPv6. This filters the result from
2581         # getaddrinfo() based on the source_address value.
2582         # This is based on the cpython socket.create_connection() function.
2583         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2584         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2585             host, port = address
2586             err = None
2587             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2588             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2589             ip_addrs = [addr for addr in addrs if addr[0] == af]
2590             if addrs and not ip_addrs:
2591                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2592                 raise socket.error(
2593                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2594                     % (ip_version, source_address[0]))
2595             for res in ip_addrs:
2596                 af, socktype, proto, canonname, sa = res
2597                 sock = None
2598                 try:
2599                     sock = socket.socket(af, socktype, proto)
2600                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2601                         sock.settimeout(timeout)
2602                     sock.bind(source_address)
2603                     sock.connect(sa)
2604                     err = None  # Explicitly break reference cycle
2605                     return sock
2606                 except socket.error as _:
2607                     err = _
2608                     if sock is not None:
2609                         sock.close()
2610             if err is not None:
2611                 raise err
2612             else:
2613                 raise socket.error('getaddrinfo returns an empty list')
2614         if hasattr(hc, '_create_connection'):
2615             hc._create_connection = _create_connection
2616         sa = (source_address, 0)
2617         if hasattr(hc, 'source_address'):  # Python 2.7+
2618             hc.source_address = sa
2619         else:  # Python 2.6
2620             def _hc_connect(self, *args, **kwargs):
2621                 sock = _create_connection(
2622                     (self.host, self.port), self.timeout, sa)
2623                 if is_https:
2624                     self.sock = ssl.wrap_socket(
2625                         sock, self.key_file, self.cert_file,
2626                         ssl_version=ssl.PROTOCOL_TLSv1)
2627                 else:
2628                     self.sock = sock
2629             hc.connect = functools.partial(_hc_connect, hc)
2630
2631     return hc
2632
2633
2634 def handle_youtubedl_headers(headers):
2635     filtered_headers = headers
2636
2637     if 'Youtubedl-no-compression' in filtered_headers:
2638         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2639         del filtered_headers['Youtubedl-no-compression']
2640
2641     return filtered_headers
2642
2643
2644 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2645     """Handler for HTTP requests and responses.
2646
2647     This class, when installed with an OpenerDirector, automatically adds
2648     the standard headers to every HTTP request and handles gzipped and
2649     deflated responses from web servers. If compression is to be avoided in
2650     a particular request, the original request in the program code only has
2651     to include the HTTP header "Youtubedl-no-compression", which will be
2652     removed before making the real request.
2653
2654     Part of this code was copied from:
2655
2656     http://techknack.net/python-urllib2-handlers/
2657
2658     Andrew Rowls, the author of that code, agreed to release it to the
2659     public domain.
2660     """
2661
2662     def __init__(self, params, *args, **kwargs):
2663         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2664         self._params = params
2665
2666     def http_open(self, req):
2667         conn_class = compat_http_client.HTTPConnection
2668
2669         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2670         if socks_proxy:
2671             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2672             del req.headers['Ytdl-socks-proxy']
2673
2674         return self.do_open(functools.partial(
2675             _create_http_connection, self, conn_class, False),
2676             req)
2677
2678     @staticmethod
2679     def deflate(data):
2680         if not data:
2681             return data
2682         try:
2683             return zlib.decompress(data, -zlib.MAX_WBITS)
2684         except zlib.error:
2685             return zlib.decompress(data)
2686
2687     def http_request(self, req):
2688         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2689         # always respected by websites, some tend to give out URLs with non percent-encoded
2690         # non-ASCII characters (see telemb.py, ard.py [#3412])
2691         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2692         # To work around aforementioned issue we will replace request's original URL with
2693         # percent-encoded one
2694         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2695         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2696         url = req.get_full_url()
2697         url_escaped = escape_url(url)
2698
2699         # Substitute URL if any change after escaping
2700         if url != url_escaped:
2701             req = update_Request(req, url=url_escaped)
2702
2703         for h, v in std_headers.items():
2704             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2705             # The dict keys are capitalized because of this bug by urllib
2706             if h.capitalize() not in req.headers:
2707                 req.add_header(h, v)
2708
2709         req.headers = handle_youtubedl_headers(req.headers)
2710
2711         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2712             # Python 2.6 is brain-dead when it comes to fragments
2713             req._Request__original = req._Request__original.partition('#')[0]
2714             req._Request__r_type = req._Request__r_type.partition('#')[0]
2715
2716         return req
2717
2718     def http_response(self, req, resp):
2719         old_resp = resp
2720         # gzip
2721         if resp.headers.get('Content-encoding', '') == 'gzip':
2722             content = resp.read()
2723             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2724             try:
2725                 uncompressed = io.BytesIO(gz.read())
2726             except IOError as original_ioerror:
2727                 # There may be junk add the end of the file
2728                 # See http://stackoverflow.com/q/4928560/35070 for details
2729                 for i in range(1, 1024):
2730                     try:
2731                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2732                         uncompressed = io.BytesIO(gz.read())
2733                     except IOError:
2734                         continue
2735                     break
2736                 else:
2737                     raise original_ioerror
2738             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2739             resp.msg = old_resp.msg
2740             del resp.headers['Content-encoding']
2741         # deflate
2742         if resp.headers.get('Content-encoding', '') == 'deflate':
2743             gz = io.BytesIO(self.deflate(resp.read()))
2744             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2745             resp.msg = old_resp.msg
2746             del resp.headers['Content-encoding']
2747         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2748         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2749         if 300 <= resp.code < 400:
2750             location = resp.headers.get('Location')
2751             if location:
2752                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2753                 if sys.version_info >= (3, 0):
2754                     location = location.encode('iso-8859-1').decode('utf-8')
2755                 else:
2756                     location = location.decode('utf-8')
2757                 location_escaped = escape_url(location)
2758                 if location != location_escaped:
2759                     del resp.headers['Location']
2760                     if sys.version_info < (3, 0):
2761                         location_escaped = location_escaped.encode('utf-8')
2762                     resp.headers['Location'] = location_escaped
2763         return resp
2764
2765     https_request = http_request
2766     https_response = http_response
2767
2768
2769 def make_socks_conn_class(base_class, socks_proxy):
2770     assert issubclass(base_class, (
2771         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2772
2773     url_components = compat_urlparse.urlparse(socks_proxy)
2774     if url_components.scheme.lower() == 'socks5':
2775         socks_type = ProxyType.SOCKS5
2776     elif url_components.scheme.lower() in ('socks', 'socks4'):
2777         socks_type = ProxyType.SOCKS4
2778     elif url_components.scheme.lower() == 'socks4a':
2779         socks_type = ProxyType.SOCKS4A
2780
2781     def unquote_if_non_empty(s):
2782         if not s:
2783             return s
2784         return compat_urllib_parse_unquote_plus(s)
2785
2786     proxy_args = (
2787         socks_type,
2788         url_components.hostname, url_components.port or 1080,
2789         True,  # Remote DNS
2790         unquote_if_non_empty(url_components.username),
2791         unquote_if_non_empty(url_components.password),
2792     )
2793
2794     class SocksConnection(base_class):
2795         def connect(self):
2796             self.sock = sockssocket()
2797             self.sock.setproxy(*proxy_args)
2798             if type(self.timeout) in (int, float):
2799                 self.sock.settimeout(self.timeout)
2800             self.sock.connect((self.host, self.port))
2801
2802             if isinstance(self, compat_http_client.HTTPSConnection):
2803                 if hasattr(self, '_context'):  # Python > 2.6
2804                     self.sock = self._context.wrap_socket(
2805                         self.sock, server_hostname=self.host)
2806                 else:
2807                     self.sock = ssl.wrap_socket(self.sock)
2808
2809     return SocksConnection
2810
2811
2812 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2813     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2814         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2815         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2816         self._params = params
2817
2818     def https_open(self, req):
2819         kwargs = {}
2820         conn_class = self._https_conn_class
2821
2822         if hasattr(self, '_context'):  # python > 2.6
2823             kwargs['context'] = self._context
2824         if hasattr(self, '_check_hostname'):  # python 3.x
2825             kwargs['check_hostname'] = self._check_hostname
2826
2827         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2828         if socks_proxy:
2829             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2830             del req.headers['Ytdl-socks-proxy']
2831
2832         return self.do_open(functools.partial(
2833             _create_http_connection, self, conn_class, True),
2834             req, **kwargs)
2835
2836
2837 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2838     """
2839     See [1] for cookie file format.
2840
2841     1. https://curl.haxx.se/docs/http-cookies.html
2842     """
2843     _HTTPONLY_PREFIX = '#HttpOnly_'
2844     _ENTRY_LEN = 7
2845     _HEADER = '''# Netscape HTTP Cookie File
2846 # This file is generated by yt-dlp.  Do not edit.
2847
2848 '''
2849     _CookieFileEntry = collections.namedtuple(
2850         'CookieFileEntry',
2851         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2852
2853     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2854         """
2855         Save cookies to a file.
2856
2857         Most of the code is taken from CPython 3.8 and slightly adapted
2858         to support cookie files with UTF-8 in both python 2 and 3.
2859         """
2860         if filename is None:
2861             if self.filename is not None:
2862                 filename = self.filename
2863             else:
2864                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2865
2866         # Store session cookies with `expires` set to 0 instead of an empty
2867         # string
2868         for cookie in self:
2869             if cookie.expires is None:
2870                 cookie.expires = 0
2871
2872         with io.open(filename, 'w', encoding='utf-8') as f:
2873             f.write(self._HEADER)
2874             now = time.time()
2875             for cookie in self:
2876                 if not ignore_discard and cookie.discard:
2877                     continue
2878                 if not ignore_expires and cookie.is_expired(now):
2879                     continue
2880                 if cookie.secure:
2881                     secure = 'TRUE'
2882                 else:
2883                     secure = 'FALSE'
2884                 if cookie.domain.startswith('.'):
2885                     initial_dot = 'TRUE'
2886                 else:
2887                     initial_dot = 'FALSE'
2888                 if cookie.expires is not None:
2889                     expires = compat_str(cookie.expires)
2890                 else:
2891                     expires = ''
2892                 if cookie.value is None:
2893                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2894                     # with no name, whereas http.cookiejar regards it as a
2895                     # cookie with no value.
2896                     name = ''
2897                     value = cookie.name
2898                 else:
2899                     name = cookie.name
2900                     value = cookie.value
2901                 f.write(
2902                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2903                                secure, expires, name, value]) + '\n')
2904
2905     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2906         """Load cookies from a file."""
2907         if filename is None:
2908             if self.filename is not None:
2909                 filename = self.filename
2910             else:
2911                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2912
2913         def prepare_line(line):
2914             if line.startswith(self._HTTPONLY_PREFIX):
2915                 line = line[len(self._HTTPONLY_PREFIX):]
2916             # comments and empty lines are fine
2917             if line.startswith('#') or not line.strip():
2918                 return line
2919             cookie_list = line.split('\t')
2920             if len(cookie_list) != self._ENTRY_LEN:
2921                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2922             cookie = self._CookieFileEntry(*cookie_list)
2923             if cookie.expires_at and not cookie.expires_at.isdigit():
2924                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2925             return line
2926
2927         cf = io.StringIO()
2928         with io.open(filename, encoding='utf-8') as f:
2929             for line in f:
2930                 try:
2931                     cf.write(prepare_line(line))
2932                 except compat_cookiejar.LoadError as e:
2933                     write_string(
2934                         'WARNING: skipping cookie file entry due to %s: %r\n'
2935                         % (e, line), sys.stderr)
2936                     continue
2937         cf.seek(0)
2938         self._really_load(cf, filename, ignore_discard, ignore_expires)
2939         # Session cookies are denoted by either `expires` field set to
2940         # an empty string or 0. MozillaCookieJar only recognizes the former
2941         # (see [1]). So we need force the latter to be recognized as session
2942         # cookies on our own.
2943         # Session cookies may be important for cookies-based authentication,
2944         # e.g. usually, when user does not check 'Remember me' check box while
2945         # logging in on a site, some important cookies are stored as session
2946         # cookies so that not recognizing them will result in failed login.
2947         # 1. https://bugs.python.org/issue17164
2948         for cookie in self:
2949             # Treat `expires=0` cookies as session cookies
2950             if cookie.expires == 0:
2951                 cookie.expires = None
2952                 cookie.discard = True
2953
2954
2955 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2956     def __init__(self, cookiejar=None):
2957         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2958
2959     def http_response(self, request, response):
2960         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2961         # characters in Set-Cookie HTTP header of last response (see
2962         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2963         # In order to at least prevent crashing we will percent encode Set-Cookie
2964         # header before HTTPCookieProcessor starts processing it.
2965         # if sys.version_info < (3, 0) and response.headers:
2966         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2967         #         set_cookie = response.headers.get(set_cookie_header)
2968         #         if set_cookie:
2969         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2970         #             if set_cookie != set_cookie_escaped:
2971         #                 del response.headers[set_cookie_header]
2972         #                 response.headers[set_cookie_header] = set_cookie_escaped
2973         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2974
2975     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2976     https_response = http_response
2977
2978
2979 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2980     """YoutubeDL redirect handler
2981
2982     The code is based on HTTPRedirectHandler implementation from CPython [1].
2983
2984     This redirect handler solves two issues:
2985      - ensures redirect URL is always unicode under python 2
2986      - introduces support for experimental HTTP response status code
2987        308 Permanent Redirect [2] used by some sites [3]
2988
2989     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2990     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2991     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2992     """
2993
2994     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2995
2996     def redirect_request(self, req, fp, code, msg, headers, newurl):
2997         """Return a Request or None in response to a redirect.
2998
2999         This is called by the http_error_30x methods when a
3000         redirection response is received.  If a redirection should
3001         take place, return a new Request to allow http_error_30x to
3002         perform the redirect.  Otherwise, raise HTTPError if no-one
3003         else should try to handle this url.  Return None if you can't
3004         but another Handler might.
3005         """
3006         m = req.get_method()
3007         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3008                  or code in (301, 302, 303) and m == "POST")):
3009             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3010         # Strictly (according to RFC 2616), 301 or 302 in response to
3011         # a POST MUST NOT cause a redirection without confirmation
3012         # from the user (of urllib.request, in this case).  In practice,
3013         # essentially all clients do redirect in this case, so we do
3014         # the same.
3015
3016         # On python 2 urlh.geturl() may sometimes return redirect URL
3017         # as byte string instead of unicode. This workaround allows
3018         # to force it always return unicode.
3019         if sys.version_info[0] < 3:
3020             newurl = compat_str(newurl)
3021
3022         # Be conciliant with URIs containing a space.  This is mainly
3023         # redundant with the more complete encoding done in http_error_302(),
3024         # but it is kept for compatibility with other callers.
3025         newurl = newurl.replace(' ', '%20')
3026
3027         CONTENT_HEADERS = ("content-length", "content-type")
3028         # NB: don't use dict comprehension for python 2.6 compatibility
3029         newheaders = dict((k, v) for k, v in req.headers.items()
3030                           if k.lower() not in CONTENT_HEADERS)
3031         return compat_urllib_request.Request(
3032             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3033             unverifiable=True)
3034
3035
3036 def extract_timezone(date_str):
3037     m = re.search(
3038         r'''(?x)
3039             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
3040             (?P<tz>Z|                                            # just the UTC Z, or
3041                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
3042                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3043                    [ ]?                                          # optional space
3044                 (?P<sign>\+|-)                                   # +/-
3045                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
3046             $)
3047         ''', date_str)
3048     if not m:
3049         timezone = datetime.timedelta()
3050     else:
3051         date_str = date_str[:-len(m.group('tz'))]
3052         if not m.group('sign'):
3053             timezone = datetime.timedelta()
3054         else:
3055             sign = 1 if m.group('sign') == '+' else -1
3056             timezone = datetime.timedelta(
3057                 hours=sign * int(m.group('hours')),
3058                 minutes=sign * int(m.group('minutes')))
3059     return timezone, date_str
3060
3061
3062 def parse_iso8601(date_str, delimiter='T', timezone=None):
3063     """ Return a UNIX timestamp from the given date """
3064
3065     if date_str is None:
3066         return None
3067
3068     date_str = re.sub(r'\.[0-9]+', '', date_str)
3069
3070     if timezone is None:
3071         timezone, date_str = extract_timezone(date_str)
3072
3073     try:
3074         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3075         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3076         return calendar.timegm(dt.timetuple())
3077     except ValueError:
3078         pass
3079
3080
3081 def date_formats(day_first=True):
3082     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3083
3084
3085 def unified_strdate(date_str, day_first=True):
3086     """Return a string with the date in the format YYYYMMDD"""
3087
3088     if date_str is None:
3089         return None
3090     upload_date = None
3091     # Replace commas
3092     date_str = date_str.replace(',', ' ')
3093     # Remove AM/PM + timezone
3094     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3095     _, date_str = extract_timezone(date_str)
3096
3097     for expression in date_formats(day_first):
3098         try:
3099             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3100         except ValueError:
3101             pass
3102     if upload_date is None:
3103         timetuple = email.utils.parsedate_tz(date_str)
3104         if timetuple:
3105             try:
3106                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3107             except ValueError:
3108                 pass
3109     if upload_date is not None:
3110         return compat_str(upload_date)
3111
3112
3113 def unified_timestamp(date_str, day_first=True):
3114     if date_str is None:
3115         return None
3116
3117     date_str = re.sub(r'[,|]', '', date_str)
3118
3119     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3120     timezone, date_str = extract_timezone(date_str)
3121
3122     # Remove AM/PM + timezone
3123     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3124
3125     # Remove unrecognized timezones from ISO 8601 alike timestamps
3126     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3127     if m:
3128         date_str = date_str[:-len(m.group('tz'))]
3129
3130     # Python only supports microseconds, so remove nanoseconds
3131     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3132     if m:
3133         date_str = m.group(1)
3134
3135     for expression in date_formats(day_first):
3136         try:
3137             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3138             return calendar.timegm(dt.timetuple())
3139         except ValueError:
3140             pass
3141     timetuple = email.utils.parsedate_tz(date_str)
3142     if timetuple:
3143         return calendar.timegm(timetuple) + pm_delta * 3600
3144
3145
3146 def determine_ext(url, default_ext='unknown_video'):
3147     if url is None or '.' not in url:
3148         return default_ext
3149     guess = url.partition('?')[0].rpartition('.')[2]
3150     if re.match(r'^[A-Za-z0-9]+$', guess):
3151         return guess
3152     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3153     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3154         return guess.rstrip('/')
3155     else:
3156         return default_ext
3157
3158
3159 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3160     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3161
3162
3163 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3164     """
3165     Return a datetime object from a string in the format YYYYMMDD or
3166     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3167
3168     format: string date format used to return datetime object from
3169     precision: round the time portion of a datetime object.
3170                 auto|microsecond|second|minute|hour|day.
3171                 auto: round to the unit provided in date_str (if applicable).
3172     """
3173     auto_precision = False
3174     if precision == 'auto':
3175         auto_precision = True
3176         precision = 'microsecond'
3177     today = datetime_round(datetime.datetime.now(), precision)
3178     if date_str in ('now', 'today'):
3179         return today
3180     if date_str == 'yesterday':
3181         return today - datetime.timedelta(days=1)
3182     match = re.match(
3183         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3184         date_str)
3185     if match is not None:
3186         start_time = datetime_from_str(match.group('start'), precision, format)
3187         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3188         unit = match.group('unit')
3189         if unit == 'month' or unit == 'year':
3190             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3191             unit = 'day'
3192         else:
3193             if unit == 'week':
3194                 unit = 'day'
3195                 time *= 7
3196             delta = datetime.timedelta(**{unit + 's': time})
3197             new_date = start_time + delta
3198         if auto_precision:
3199             return datetime_round(new_date, unit)
3200         return new_date
3201
3202     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3203
3204
3205 def date_from_str(date_str, format='%Y%m%d'):
3206     """
3207     Return a datetime object from a string in the format YYYYMMDD or
3208     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3209
3210     format: string date format used to return datetime object from
3211     """
3212     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3213
3214
3215 def datetime_add_months(dt, months):
3216     """Increment/Decrement a datetime object by months."""
3217     month = dt.month + months - 1
3218     year = dt.year + month // 12
3219     month = month % 12 + 1
3220     day = min(dt.day, calendar.monthrange(year, month)[1])
3221     return dt.replace(year, month, day)
3222
3223
3224 def datetime_round(dt, precision='day'):
3225     """
3226     Round a datetime object's time to a specific precision
3227     """
3228     if precision == 'microsecond':
3229         return dt
3230
3231     unit_seconds = {
3232         'day': 86400,
3233         'hour': 3600,
3234         'minute': 60,
3235         'second': 1,
3236     }
3237     roundto = lambda x, n: ((x + n / 2) // n) * n
3238     timestamp = calendar.timegm(dt.timetuple())
3239     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3240
3241
3242 def hyphenate_date(date_str):
3243     """
3244     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3245     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3246     if match is not None:
3247         return '-'.join(match.groups())
3248     else:
3249         return date_str
3250
3251
3252 class DateRange(object):
3253     """Represents a time interval between two dates"""
3254
3255     def __init__(self, start=None, end=None):
3256         """start and end must be strings in the format accepted by date"""
3257         if start is not None:
3258             self.start = date_from_str(start)
3259         else:
3260             self.start = datetime.datetime.min.date()
3261         if end is not None:
3262             self.end = date_from_str(end)
3263         else:
3264             self.end = datetime.datetime.max.date()
3265         if self.start > self.end:
3266             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3267
3268     @classmethod
3269     def day(cls, day):
3270         """Returns a range that only contains the given day"""
3271         return cls(day, day)
3272
3273     def __contains__(self, date):
3274         """Check if the date is in the range"""
3275         if not isinstance(date, datetime.date):
3276             date = date_from_str(date)
3277         return self.start <= date <= self.end
3278
3279     def __str__(self):
3280         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3281
3282
3283 def platform_name():
3284     """ Returns the platform name as a compat_str """
3285     res = platform.platform()
3286     if isinstance(res, bytes):
3287         res = res.decode(preferredencoding())
3288
3289     assert isinstance(res, compat_str)
3290     return res
3291
3292
3293 def _windows_write_string(s, out):
3294     """ Returns True if the string was written using special methods,
3295     False if it has yet to be written out."""
3296     # Adapted from http://stackoverflow.com/a/3259271/35070
3297
3298     import ctypes
3299     import ctypes.wintypes
3300
3301     WIN_OUTPUT_IDS = {
3302         1: -11,
3303         2: -12,
3304     }
3305
3306     try:
3307         fileno = out.fileno()
3308     except AttributeError:
3309         # If the output stream doesn't have a fileno, it's virtual
3310         return False
3311     except io.UnsupportedOperation:
3312         # Some strange Windows pseudo files?
3313         return False
3314     if fileno not in WIN_OUTPUT_IDS:
3315         return False
3316
3317     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3318         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3319         ('GetStdHandle', ctypes.windll.kernel32))
3320     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3321
3322     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3323         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3324         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3325         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3326     written = ctypes.wintypes.DWORD(0)
3327
3328     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3329     FILE_TYPE_CHAR = 0x0002
3330     FILE_TYPE_REMOTE = 0x8000
3331     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3332         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3333         ctypes.POINTER(ctypes.wintypes.DWORD))(
3334         ('GetConsoleMode', ctypes.windll.kernel32))
3335     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3336
3337     def not_a_console(handle):
3338         if handle == INVALID_HANDLE_VALUE or handle is None:
3339             return True
3340         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3341                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3342
3343     if not_a_console(h):
3344         return False
3345
3346     def next_nonbmp_pos(s):
3347         try:
3348             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3349         except StopIteration:
3350             return len(s)
3351
3352     while s:
3353         count = min(next_nonbmp_pos(s), 1024)
3354
3355         ret = WriteConsoleW(
3356             h, s, count if count else 2, ctypes.byref(written), None)
3357         if ret == 0:
3358             raise OSError('Failed to write string')
3359         if not count:  # We just wrote a non-BMP character
3360             assert written.value == 2
3361             s = s[1:]
3362         else:
3363             assert written.value > 0
3364             s = s[written.value:]
3365     return True
3366
3367
3368 def write_string(s, out=None, encoding=None):
3369     if out is None:
3370         out = sys.stderr
3371     assert type(s) == compat_str
3372
3373     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3374         if _windows_write_string(s, out):
3375             return
3376
3377     if ('b' in getattr(out, 'mode', '')
3378             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3379         byt = s.encode(encoding or preferredencoding(), 'ignore')
3380         out.write(byt)
3381     elif hasattr(out, 'buffer'):
3382         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3383         byt = s.encode(enc, 'ignore')
3384         out.buffer.write(byt)
3385     else:
3386         out.write(s)
3387     out.flush()
3388
3389
3390 def bytes_to_intlist(bs):
3391     if not bs:
3392         return []
3393     if isinstance(bs[0], int):  # Python 3
3394         return list(bs)
3395     else:
3396         return [ord(c) for c in bs]
3397
3398
3399 def intlist_to_bytes(xs):
3400     if not xs:
3401         return b''
3402     return compat_struct_pack('%dB' % len(xs), *xs)
3403
3404
3405 # Cross-platform file locking
3406 if sys.platform == 'win32':
3407     import ctypes.wintypes
3408     import msvcrt
3409
3410     class OVERLAPPED(ctypes.Structure):
3411         _fields_ = [
3412             ('Internal', ctypes.wintypes.LPVOID),
3413             ('InternalHigh', ctypes.wintypes.LPVOID),
3414             ('Offset', ctypes.wintypes.DWORD),
3415             ('OffsetHigh', ctypes.wintypes.DWORD),
3416             ('hEvent', ctypes.wintypes.HANDLE),
3417         ]
3418
3419     kernel32 = ctypes.windll.kernel32
3420     LockFileEx = kernel32.LockFileEx
3421     LockFileEx.argtypes = [
3422         ctypes.wintypes.HANDLE,     # hFile
3423         ctypes.wintypes.DWORD,      # dwFlags
3424         ctypes.wintypes.DWORD,      # dwReserved
3425         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3426         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3427         ctypes.POINTER(OVERLAPPED)  # Overlapped
3428     ]
3429     LockFileEx.restype = ctypes.wintypes.BOOL
3430     UnlockFileEx = kernel32.UnlockFileEx
3431     UnlockFileEx.argtypes = [
3432         ctypes.wintypes.HANDLE,     # hFile
3433         ctypes.wintypes.DWORD,      # dwReserved
3434         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3435         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3436         ctypes.POINTER(OVERLAPPED)  # Overlapped
3437     ]
3438     UnlockFileEx.restype = ctypes.wintypes.BOOL
3439     whole_low = 0xffffffff
3440     whole_high = 0x7fffffff
3441
3442     def _lock_file(f, exclusive):
3443         overlapped = OVERLAPPED()
3444         overlapped.Offset = 0
3445         overlapped.OffsetHigh = 0
3446         overlapped.hEvent = 0
3447         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3448         handle = msvcrt.get_osfhandle(f.fileno())
3449         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3450                           whole_low, whole_high, f._lock_file_overlapped_p):
3451             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3452
3453     def _unlock_file(f):
3454         assert f._lock_file_overlapped_p
3455         handle = msvcrt.get_osfhandle(f.fileno())
3456         if not UnlockFileEx(handle, 0,
3457                             whole_low, whole_high, f._lock_file_overlapped_p):
3458             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3459
3460 else:
3461     # Some platforms, such as Jython, is missing fcntl
3462     try:
3463         import fcntl
3464
3465         def _lock_file(f, exclusive):
3466             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3467
3468         def _unlock_file(f):
3469             fcntl.flock(f, fcntl.LOCK_UN)
3470     except ImportError:
3471         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3472
3473         def _lock_file(f, exclusive):
3474             raise IOError(UNSUPPORTED_MSG)
3475
3476         def _unlock_file(f):
3477             raise IOError(UNSUPPORTED_MSG)
3478
3479
3480 class locked_file(object):
3481     def __init__(self, filename, mode, encoding=None):
3482         assert mode in ['r', 'a', 'w']
3483         self.f = io.open(filename, mode, encoding=encoding)
3484         self.mode = mode
3485
3486     def __enter__(self):
3487         exclusive = self.mode != 'r'
3488         try:
3489             _lock_file(self.f, exclusive)
3490         except IOError:
3491             self.f.close()
3492             raise
3493         return self
3494
3495     def __exit__(self, etype, value, traceback):
3496         try:
3497             _unlock_file(self.f)
3498         finally:
3499             self.f.close()
3500
3501     def __iter__(self):
3502         return iter(self.f)
3503
3504     def write(self, *args):
3505         return self.f.write(*args)
3506
3507     def read(self, *args):
3508         return self.f.read(*args)
3509
3510
3511 def get_filesystem_encoding():
3512     encoding = sys.getfilesystemencoding()
3513     return encoding if encoding is not None else 'utf-8'
3514
3515
3516 def shell_quote(args):
3517     quoted_args = []
3518     encoding = get_filesystem_encoding()
3519     for a in args:
3520         if isinstance(a, bytes):
3521             # We may get a filename encoded with 'encodeFilename'
3522             a = a.decode(encoding)
3523         quoted_args.append(compat_shlex_quote(a))
3524     return ' '.join(quoted_args)
3525
3526
3527 def smuggle_url(url, data):
3528     """ Pass additional data in a URL for internal use. """
3529
3530     url, idata = unsmuggle_url(url, {})
3531     data.update(idata)
3532     sdata = compat_urllib_parse_urlencode(
3533         {'__youtubedl_smuggle': json.dumps(data)})
3534     return url + '#' + sdata
3535
3536
3537 def unsmuggle_url(smug_url, default=None):
3538     if '#__youtubedl_smuggle' not in smug_url:
3539         return smug_url, default
3540     url, _, sdata = smug_url.rpartition('#')
3541     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3542     data = json.loads(jsond)
3543     return url, data
3544
3545
3546 def format_bytes(bytes):
3547     if bytes is None:
3548         return 'N/A'
3549     if type(bytes) is str:
3550         bytes = float(bytes)
3551     if bytes == 0.0:
3552         exponent = 0
3553     else:
3554         exponent = int(math.log(bytes, 1024.0))
3555     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3556     converted = float(bytes) / float(1024 ** exponent)
3557     return '%.2f%s' % (converted, suffix)
3558
3559
3560 def lookup_unit_table(unit_table, s):
3561     units_re = '|'.join(re.escape(u) for u in unit_table)
3562     m = re.match(
3563         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3564     if not m:
3565         return None
3566     num_str = m.group('num').replace(',', '.')
3567     mult = unit_table[m.group('unit')]
3568     return int(float(num_str) * mult)
3569
3570
3571 def parse_filesize(s):
3572     if s is None:
3573         return None
3574
3575     # The lower-case forms are of course incorrect and unofficial,
3576     # but we support those too
3577     _UNIT_TABLE = {
3578         'B': 1,
3579         'b': 1,
3580         'bytes': 1,
3581         'KiB': 1024,
3582         'KB': 1000,
3583         'kB': 1024,
3584         'Kb': 1000,
3585         'kb': 1000,
3586         'kilobytes': 1000,
3587         'kibibytes': 1024,
3588         'MiB': 1024 ** 2,
3589         'MB': 1000 ** 2,
3590         'mB': 1024 ** 2,
3591         'Mb': 1000 ** 2,
3592         'mb': 1000 ** 2,
3593         'megabytes': 1000 ** 2,
3594         'mebibytes': 1024 ** 2,
3595         'GiB': 1024 ** 3,
3596         'GB': 1000 ** 3,
3597         'gB': 1024 ** 3,
3598         'Gb': 1000 ** 3,
3599         'gb': 1000 ** 3,
3600         'gigabytes': 1000 ** 3,
3601         'gibibytes': 1024 ** 3,
3602         'TiB': 1024 ** 4,
3603         'TB': 1000 ** 4,
3604         'tB': 1024 ** 4,
3605         'Tb': 1000 ** 4,
3606         'tb': 1000 ** 4,
3607         'terabytes': 1000 ** 4,
3608         'tebibytes': 1024 ** 4,
3609         'PiB': 1024 ** 5,
3610         'PB': 1000 ** 5,
3611         'pB': 1024 ** 5,
3612         'Pb': 1000 ** 5,
3613         'pb': 1000 ** 5,
3614         'petabytes': 1000 ** 5,
3615         'pebibytes': 1024 ** 5,
3616         'EiB': 1024 ** 6,
3617         'EB': 1000 ** 6,
3618         'eB': 1024 ** 6,
3619         'Eb': 1000 ** 6,
3620         'eb': 1000 ** 6,
3621         'exabytes': 1000 ** 6,
3622         'exbibytes': 1024 ** 6,
3623         'ZiB': 1024 ** 7,
3624         'ZB': 1000 ** 7,
3625         'zB': 1024 ** 7,
3626         'Zb': 1000 ** 7,
3627         'zb': 1000 ** 7,
3628         'zettabytes': 1000 ** 7,
3629         'zebibytes': 1024 ** 7,
3630         'YiB': 1024 ** 8,
3631         'YB': 1000 ** 8,
3632         'yB': 1024 ** 8,
3633         'Yb': 1000 ** 8,
3634         'yb': 1000 ** 8,
3635         'yottabytes': 1000 ** 8,
3636         'yobibytes': 1024 ** 8,
3637     }
3638
3639     return lookup_unit_table(_UNIT_TABLE, s)
3640
3641
3642 def parse_count(s):
3643     if s is None:
3644         return None
3645
3646     s = s.strip()
3647
3648     if re.match(r'^[\d,.]+$', s):
3649         return str_to_int(s)
3650
3651     _UNIT_TABLE = {
3652         'k': 1000,
3653         'K': 1000,
3654         'm': 1000 ** 2,
3655         'M': 1000 ** 2,
3656         'kk': 1000 ** 2,
3657         'KK': 1000 ** 2,
3658     }
3659
3660     return lookup_unit_table(_UNIT_TABLE, s)
3661
3662
3663 def parse_resolution(s):
3664     if s is None:
3665         return {}
3666
3667     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3668     if mobj:
3669         return {
3670             'width': int(mobj.group('w')),
3671             'height': int(mobj.group('h')),
3672         }
3673
3674     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3675     if mobj:
3676         return {'height': int(mobj.group(1))}
3677
3678     mobj = re.search(r'\b([48])[kK]\b', s)
3679     if mobj:
3680         return {'height': int(mobj.group(1)) * 540}
3681
3682     return {}
3683
3684
3685 def parse_bitrate(s):
3686     if not isinstance(s, compat_str):
3687         return
3688     mobj = re.search(r'\b(\d+)\s*kbps', s)
3689     if mobj:
3690         return int(mobj.group(1))
3691
3692
3693 def month_by_name(name, lang='en'):
3694     """ Return the number of a month by (locale-independently) English name """
3695
3696     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3697
3698     try:
3699         return month_names.index(name) + 1
3700     except ValueError:
3701         return None
3702
3703
3704 def month_by_abbreviation(abbrev):
3705     """ Return the number of a month by (locale-independently) English
3706         abbreviations """
3707
3708     try:
3709         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3710     except ValueError:
3711         return None
3712
3713
3714 def fix_xml_ampersands(xml_str):
3715     """Replace all the '&' by '&amp;' in XML"""
3716     return re.sub(
3717         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3718         '&amp;',
3719         xml_str)
3720
3721
3722 def setproctitle(title):
3723     assert isinstance(title, compat_str)
3724
3725     # ctypes in Jython is not complete
3726     # http://bugs.jython.org/issue2148
3727     if sys.platform.startswith('java'):
3728         return
3729
3730     try:
3731         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3732     except OSError:
3733         return
3734     except TypeError:
3735         # LoadLibrary in Windows Python 2.7.13 only expects
3736         # a bytestring, but since unicode_literals turns
3737         # every string into a unicode string, it fails.
3738         return
3739     title_bytes = title.encode('utf-8')
3740     buf = ctypes.create_string_buffer(len(title_bytes))
3741     buf.value = title_bytes
3742     try:
3743         libc.prctl(15, buf, 0, 0, 0)
3744     except AttributeError:
3745         return  # Strange libc, just skip this
3746
3747
3748 def remove_start(s, start):
3749     return s[len(start):] if s is not None and s.startswith(start) else s
3750
3751
3752 def remove_end(s, end):
3753     return s[:-len(end)] if s is not None and s.endswith(end) else s
3754
3755
3756 def remove_quotes(s):
3757     if s is None or len(s) < 2:
3758         return s
3759     for quote in ('"', "'", ):
3760         if s[0] == quote and s[-1] == quote:
3761             return s[1:-1]
3762     return s
3763
3764
3765 def get_domain(url):
3766     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3767     return domain.group('domain') if domain else None
3768
3769
3770 def url_basename(url):
3771     path = compat_urlparse.urlparse(url).path
3772     return path.strip('/').split('/')[-1]
3773
3774
3775 def base_url(url):
3776     return re.match(r'https?://[^?#&]+/', url).group()
3777
3778
3779 def urljoin(base, path):
3780     if isinstance(path, bytes):
3781         path = path.decode('utf-8')
3782     if not isinstance(path, compat_str) or not path:
3783         return None
3784     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3785         return path
3786     if isinstance(base, bytes):
3787         base = base.decode('utf-8')
3788     if not isinstance(base, compat_str) or not re.match(
3789             r'^(?:https?:)?//', base):
3790         return None
3791     return compat_urlparse.urljoin(base, path)
3792
3793
3794 class HEADRequest(compat_urllib_request.Request):
3795     def get_method(self):
3796         return 'HEAD'
3797
3798
3799 class PUTRequest(compat_urllib_request.Request):
3800     def get_method(self):
3801         return 'PUT'
3802
3803
3804 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3805     if get_attr:
3806         if v is not None:
3807             v = getattr(v, get_attr, None)
3808     if v == '':
3809         v = None
3810     if v is None:
3811         return default
3812     try:
3813         return int(v) * invscale // scale
3814     except (ValueError, TypeError):
3815         return default
3816
3817
3818 def str_or_none(v, default=None):
3819     return default if v is None else compat_str(v)
3820
3821
3822 def str_to_int(int_str):
3823     """ A more relaxed version of int_or_none """
3824     if isinstance(int_str, compat_integer_types):
3825         return int_str
3826     elif isinstance(int_str, compat_str):
3827         int_str = re.sub(r'[,\.\+]', '', int_str)
3828         return int_or_none(int_str)
3829
3830
3831 def float_or_none(v, scale=1, invscale=1, default=None):
3832     if v is None:
3833         return default
3834     try:
3835         return float(v) * invscale / scale
3836     except (ValueError, TypeError):
3837         return default
3838
3839
3840 def bool_or_none(v, default=None):
3841     return v if isinstance(v, bool) else default
3842
3843
3844 def strip_or_none(v, default=None):
3845     return v.strip() if isinstance(v, compat_str) else default
3846
3847
3848 def url_or_none(url):
3849     if not url or not isinstance(url, compat_str):
3850         return None
3851     url = url.strip()
3852     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3853
3854
3855 def strftime_or_none(timestamp, date_format, default=None):
3856     datetime_object = None
3857     try:
3858         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3859             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3860         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3861             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3862         return datetime_object.strftime(date_format)
3863     except (ValueError, TypeError, AttributeError):
3864         return default
3865
3866
3867 def parse_duration(s):
3868     if not isinstance(s, compat_basestring):
3869         return None
3870
3871     s = s.strip()
3872
3873     days, hours, mins, secs, ms = [None] * 5
3874     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3875     if m:
3876         days, hours, mins, secs, ms = m.groups()
3877     else:
3878         m = re.match(
3879             r'''(?ix)(?:P?
3880                 (?:
3881                     [0-9]+\s*y(?:ears?)?\s*
3882                 )?
3883                 (?:
3884                     [0-9]+\s*m(?:onths?)?\s*
3885                 )?
3886                 (?:
3887                     [0-9]+\s*w(?:eeks?)?\s*
3888                 )?
3889                 (?:
3890                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3891                 )?
3892                 T)?
3893                 (?:
3894                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3895                 )?
3896                 (?:
3897                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3898                 )?
3899                 (?:
3900                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3901                 )?Z?$''', s)
3902         if m:
3903             days, hours, mins, secs, ms = m.groups()
3904         else:
3905             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3906             if m:
3907                 hours, mins = m.groups()
3908             else:
3909                 return None
3910
3911     duration = 0
3912     if secs:
3913         duration += float(secs)
3914     if mins:
3915         duration += float(mins) * 60
3916     if hours:
3917         duration += float(hours) * 60 * 60
3918     if days:
3919         duration += float(days) * 24 * 60 * 60
3920     if ms:
3921         duration += float(ms)
3922     return duration
3923
3924
3925 def prepend_extension(filename, ext, expected_real_ext=None):
3926     name, real_ext = os.path.splitext(filename)
3927     return (
3928         '{0}.{1}{2}'.format(name, ext, real_ext)
3929         if not expected_real_ext or real_ext[1:] == expected_real_ext
3930         else '{0}.{1}'.format(filename, ext))
3931
3932
3933 def replace_extension(filename, ext, expected_real_ext=None):
3934     name, real_ext = os.path.splitext(filename)
3935     return '{0}.{1}'.format(
3936         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3937         ext)
3938
3939
3940 def check_executable(exe, args=[]):
3941     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3942     args can be a list of arguments for a short output (like -version) """
3943     try:
3944         process_communicate_or_kill(subprocess.Popen(
3945             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3946     except OSError:
3947         return False
3948     return exe
3949
3950
3951 def get_exe_version(exe, args=['--version'],
3952                     version_re=None, unrecognized='present'):
3953     """ Returns the version of the specified executable,
3954     or False if the executable is not present """
3955     try:
3956         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3957         # SIGTTOU if yt-dlp is run in the background.
3958         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3959         out, _ = process_communicate_or_kill(subprocess.Popen(
3960             [encodeArgument(exe)] + args,
3961             stdin=subprocess.PIPE,
3962             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3963     except OSError:
3964         return False
3965     if isinstance(out, bytes):  # Python 2.x
3966         out = out.decode('ascii', 'ignore')
3967     return detect_exe_version(out, version_re, unrecognized)
3968
3969
3970 def detect_exe_version(output, version_re=None, unrecognized='present'):
3971     assert isinstance(output, compat_str)
3972     if version_re is None:
3973         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3974     m = re.search(version_re, output)
3975     if m:
3976         return m.group(1)
3977     else:
3978         return unrecognized
3979
3980
3981 class LazyList(collections.abc.Sequence):
3982     ''' Lazy immutable list from an iterable
3983     Note that slices of a LazyList are lists and not LazyList'''
3984
3985     class IndexError(IndexError):
3986         pass
3987
3988     def __init__(self, iterable):
3989         self.__iterable = iter(iterable)
3990         self.__cache = []
3991         self.__reversed = False
3992
3993     def __iter__(self):
3994         if self.__reversed:
3995             # We need to consume the entire iterable to iterate in reverse
3996             yield from self.exhaust()
3997             return
3998         yield from self.__cache
3999         for item in self.__iterable:
4000             self.__cache.append(item)
4001             yield item
4002
4003     def __exhaust(self):
4004         self.__cache.extend(self.__iterable)
4005         return self.__cache
4006
4007     def exhaust(self):
4008         ''' Evaluate the entire iterable '''
4009         return self.__exhaust()[::-1 if self.__reversed else 1]
4010
4011     @staticmethod
4012     def __reverse_index(x):
4013         return None if x is None else -(x + 1)
4014
4015     def __getitem__(self, idx):
4016         if isinstance(idx, slice):
4017             if self.__reversed:
4018                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4019             start, stop, step = idx.start, idx.stop, idx.step or 1
4020         elif isinstance(idx, int):
4021             if self.__reversed:
4022                 idx = self.__reverse_index(idx)
4023             start, stop, step = idx, idx, 0
4024         else:
4025             raise TypeError('indices must be integers or slices')
4026         if ((start or 0) < 0 or (stop or 0) < 0
4027                 or (start is None and step < 0)
4028                 or (stop is None and step > 0)):
4029             # We need to consume the entire iterable to be able to slice from the end
4030             # Obviously, never use this with infinite iterables
4031             self.__exhaust()
4032             try:
4033                 return self.__cache[idx]
4034             except IndexError as e:
4035                 raise self.IndexError(e) from e
4036         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4037         if n > 0:
4038             self.__cache.extend(itertools.islice(self.__iterable, n))
4039         try:
4040             return self.__cache[idx]
4041         except IndexError as e:
4042             raise self.IndexError(e) from e
4043
4044     def __bool__(self):
4045         try:
4046             self[-1] if self.__reversed else self[0]
4047         except self.IndexError:
4048             return False
4049         return True
4050
4051     def __len__(self):
4052         self.__exhaust()
4053         return len(self.__cache)
4054
4055     def reverse(self):
4056         self.__reversed = not self.__reversed
4057         return self
4058
4059     def __repr__(self):
4060         # repr and str should mimic a list. So we exhaust the iterable
4061         return repr(self.exhaust())
4062
4063     def __str__(self):
4064         return repr(self.exhaust())
4065
4066
4067 class PagedList:
4068     def __len__(self):
4069         # This is only useful for tests
4070         return len(self.getslice())
4071
4072     def __init__(self, pagefunc, pagesize, use_cache=True):
4073         self._pagefunc = pagefunc
4074         self._pagesize = pagesize
4075         self._use_cache = use_cache
4076         self._cache = {}
4077
4078     def getpage(self, pagenum):
4079         page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4080         if self._use_cache:
4081             self._cache[pagenum] = page_results
4082         return page_results
4083
4084     def getslice(self, start=0, end=None):
4085         return list(self._getslice(start, end))
4086
4087     def _getslice(self, start, end):
4088         raise NotImplementedError('This method must be implemented by subclasses')
4089
4090     def __getitem__(self, idx):
4091         # NOTE: cache must be enabled if this is used
4092         if not isinstance(idx, int) or idx < 0:
4093             raise TypeError('indices must be non-negative integers')
4094         entries = self.getslice(idx, idx + 1)
4095         return entries[0] if entries else None
4096
4097
4098 class OnDemandPagedList(PagedList):
4099     def _getslice(self, start, end):
4100         for pagenum in itertools.count(start // self._pagesize):
4101             firstid = pagenum * self._pagesize
4102             nextfirstid = pagenum * self._pagesize + self._pagesize
4103             if start >= nextfirstid:
4104                 continue
4105
4106             startv = (
4107                 start % self._pagesize
4108                 if firstid <= start < nextfirstid
4109                 else 0)
4110             endv = (
4111                 ((end - 1) % self._pagesize) + 1
4112                 if (end is not None and firstid <= end <= nextfirstid)
4113                 else None)
4114
4115             page_results = self.getpage(pagenum)
4116             if startv != 0 or endv is not None:
4117                 page_results = page_results[startv:endv]
4118             yield from page_results
4119
4120             # A little optimization - if current page is not "full", ie. does
4121             # not contain page_size videos then we can assume that this page
4122             # is the last one - there are no more ids on further pages -
4123             # i.e. no need to query again.
4124             if len(page_results) + startv < self._pagesize:
4125                 break
4126
4127             # If we got the whole page, but the next page is not interesting,
4128             # break out early as well
4129             if end == nextfirstid:
4130                 break
4131
4132
4133 class InAdvancePagedList(PagedList):
4134     def __init__(self, pagefunc, pagecount, pagesize):
4135         self._pagecount = pagecount
4136         PagedList.__init__(self, pagefunc, pagesize, True)
4137
4138     def _getslice(self, start, end):
4139         start_page = start // self._pagesize
4140         end_page = (
4141             self._pagecount if end is None else (end // self._pagesize + 1))
4142         skip_elems = start - start_page * self._pagesize
4143         only_more = None if end is None else end - start
4144         for pagenum in range(start_page, end_page):
4145             page_results = self.getpage(pagenum)
4146             if skip_elems:
4147                 page_results = page_results[skip_elems:]
4148                 skip_elems = None
4149             if only_more is not None:
4150                 if len(page_results) < only_more:
4151                     only_more -= len(page_results)
4152                 else:
4153                     yield from page_results[:only_more]
4154                     break
4155             yield from page_results
4156
4157
4158 def uppercase_escape(s):
4159     unicode_escape = codecs.getdecoder('unicode_escape')
4160     return re.sub(
4161         r'\\U[0-9a-fA-F]{8}',
4162         lambda m: unicode_escape(m.group(0))[0],
4163         s)
4164
4165
4166 def lowercase_escape(s):
4167     unicode_escape = codecs.getdecoder('unicode_escape')
4168     return re.sub(
4169         r'\\u[0-9a-fA-F]{4}',
4170         lambda m: unicode_escape(m.group(0))[0],
4171         s)
4172
4173
4174 def escape_rfc3986(s):
4175     """Escape non-ASCII characters as suggested by RFC 3986"""
4176     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4177         s = s.encode('utf-8')
4178     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4179
4180
4181 def escape_url(url):
4182     """Escape URL as suggested by RFC 3986"""
4183     url_parsed = compat_urllib_parse_urlparse(url)
4184     return url_parsed._replace(
4185         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4186         path=escape_rfc3986(url_parsed.path),
4187         params=escape_rfc3986(url_parsed.params),
4188         query=escape_rfc3986(url_parsed.query),
4189         fragment=escape_rfc3986(url_parsed.fragment)
4190     ).geturl()
4191
4192
4193 def parse_qs(url):
4194     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4195
4196
4197 def read_batch_urls(batch_fd):
4198     def fixup(url):
4199         if not isinstance(url, compat_str):
4200             url = url.decode('utf-8', 'replace')
4201         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4202         for bom in BOM_UTF8:
4203             if url.startswith(bom):
4204                 url = url[len(bom):]
4205         url = url.lstrip()
4206         if not url or url.startswith(('#', ';', ']')):
4207             return False
4208         # "#" cannot be stripped out since it is part of the URI
4209         # However, it can be safely stipped out if follwing a whitespace
4210         return re.split(r'\s#', url, 1)[0].rstrip()
4211
4212     with contextlib.closing(batch_fd) as fd:
4213         return [url for url in map(fixup, fd) if url]
4214
4215
4216 def urlencode_postdata(*args, **kargs):
4217     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4218
4219
4220 def update_url_query(url, query):
4221     if not query:
4222         return url
4223     parsed_url = compat_urlparse.urlparse(url)
4224     qs = compat_parse_qs(parsed_url.query)
4225     qs.update(query)
4226     return compat_urlparse.urlunparse(parsed_url._replace(
4227         query=compat_urllib_parse_urlencode(qs, True)))
4228
4229
4230 def update_Request(req, url=None, data=None, headers={}, query={}):
4231     req_headers = req.headers.copy()
4232     req_headers.update(headers)
4233     req_data = data or req.data
4234     req_url = update_url_query(url or req.get_full_url(), query)
4235     req_get_method = req.get_method()
4236     if req_get_method == 'HEAD':
4237         req_type = HEADRequest
4238     elif req_get_method == 'PUT':
4239         req_type = PUTRequest
4240     else:
4241         req_type = compat_urllib_request.Request
4242     new_req = req_type(
4243         req_url, data=req_data, headers=req_headers,
4244         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4245     if hasattr(req, 'timeout'):
4246         new_req.timeout = req.timeout
4247     return new_req
4248
4249
4250 def _multipart_encode_impl(data, boundary):
4251     content_type = 'multipart/form-data; boundary=%s' % boundary
4252
4253     out = b''
4254     for k, v in data.items():
4255         out += b'--' + boundary.encode('ascii') + b'\r\n'
4256         if isinstance(k, compat_str):
4257             k = k.encode('utf-8')
4258         if isinstance(v, compat_str):
4259             v = v.encode('utf-8')
4260         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4261         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4262         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4263         if boundary.encode('ascii') in content:
4264             raise ValueError('Boundary overlaps with data')
4265         out += content
4266
4267     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4268
4269     return out, content_type
4270
4271
4272 def multipart_encode(data, boundary=None):
4273     '''
4274     Encode a dict to RFC 7578-compliant form-data
4275
4276     data:
4277         A dict where keys and values can be either Unicode or bytes-like
4278         objects.
4279     boundary:
4280         If specified a Unicode object, it's used as the boundary. Otherwise
4281         a random boundary is generated.
4282
4283     Reference: https://tools.ietf.org/html/rfc7578
4284     '''
4285     has_specified_boundary = boundary is not None
4286
4287     while True:
4288         if boundary is None:
4289             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4290
4291         try:
4292             out, content_type = _multipart_encode_impl(data, boundary)
4293             break
4294         except ValueError:
4295             if has_specified_boundary:
4296                 raise
4297             boundary = None
4298
4299     return out, content_type
4300
4301
4302 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4303     if isinstance(key_or_keys, (list, tuple)):
4304         for key in key_or_keys:
4305             if key not in d or d[key] is None or skip_false_values and not d[key]:
4306                 continue
4307             return d[key]
4308         return default
4309     return d.get(key_or_keys, default)
4310
4311
4312 def try_get(src, getter, expected_type=None):
4313     for get in variadic(getter):
4314         try:
4315             v = get(src)
4316         except (AttributeError, KeyError, TypeError, IndexError):
4317             pass
4318         else:
4319             if expected_type is None or isinstance(v, expected_type):
4320                 return v
4321
4322
4323 def merge_dicts(*dicts):
4324     merged = {}
4325     for a_dict in dicts:
4326         for k, v in a_dict.items():
4327             if v is None:
4328                 continue
4329             if (k not in merged
4330                     or (isinstance(v, compat_str) and v
4331                         and isinstance(merged[k], compat_str)
4332                         and not merged[k])):
4333                 merged[k] = v
4334     return merged
4335
4336
4337 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4338     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4339
4340
4341 US_RATINGS = {
4342     'G': 0,
4343     'PG': 10,
4344     'PG-13': 13,
4345     'R': 16,
4346     'NC': 18,
4347 }
4348
4349
4350 TV_PARENTAL_GUIDELINES = {
4351     'TV-Y': 0,
4352     'TV-Y7': 7,
4353     'TV-G': 0,
4354     'TV-PG': 0,
4355     'TV-14': 14,
4356     'TV-MA': 17,
4357 }
4358
4359
4360 def parse_age_limit(s):
4361     if type(s) == int:
4362         return s if 0 <= s <= 21 else None
4363     if not isinstance(s, compat_basestring):
4364         return None
4365     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4366     if m:
4367         return int(m.group('age'))
4368     s = s.upper()
4369     if s in US_RATINGS:
4370         return US_RATINGS[s]
4371     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4372     if m:
4373         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4374     return None
4375
4376
4377 def strip_jsonp(code):
4378     return re.sub(
4379         r'''(?sx)^
4380             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4381             (?:\s*&&\s*(?P=func_name))?
4382             \s*\(\s*(?P<callback_data>.*)\);?
4383             \s*?(?://[^\n]*)*$''',
4384         r'\g<callback_data>', code)
4385
4386
4387 def js_to_json(code, vars={}):
4388     # vars is a dict of var, val pairs to substitute
4389     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4390     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4391     INTEGER_TABLE = (
4392         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4393         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4394     )
4395
4396     def fix_kv(m):
4397         v = m.group(0)
4398         if v in ('true', 'false', 'null'):
4399             return v
4400         elif v in ('undefined', 'void 0'):
4401             return 'null'
4402         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4403             return ""
4404
4405         if v[0] in ("'", '"'):
4406             v = re.sub(r'(?s)\\.|"', lambda m: {
4407                 '"': '\\"',
4408                 "\\'": "'",
4409                 '\\\n': '',
4410                 '\\x': '\\u00',
4411             }.get(m.group(0), m.group(0)), v[1:-1])
4412         else:
4413             for regex, base in INTEGER_TABLE:
4414                 im = re.match(regex, v)
4415                 if im:
4416                     i = int(im.group(1), base)
4417                     return '"%d":' % i if v.endswith(':') else '%d' % i
4418
4419             if v in vars:
4420                 return vars[v]
4421
4422         return '"%s"' % v
4423
4424     return re.sub(r'''(?sx)
4425         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4426         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4427         {comment}|,(?={skip}[\]}}])|
4428         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4429         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4430         [0-9]+(?={skip}:)|
4431         !+
4432         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4433
4434
4435 def qualities(quality_ids):
4436     """ Get a numeric quality value out of a list of possible values """
4437     def q(qid):
4438         try:
4439             return quality_ids.index(qid)
4440         except ValueError:
4441             return -1
4442     return q
4443
4444
4445 DEFAULT_OUTTMPL = {
4446     'default': '%(title)s [%(id)s].%(ext)s',
4447     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4448 }
4449 OUTTMPL_TYPES = {
4450     'chapter': None,
4451     'subtitle': None,
4452     'thumbnail': None,
4453     'description': 'description',
4454     'annotation': 'annotations.xml',
4455     'infojson': 'info.json',
4456     'pl_thumbnail': None,
4457     'pl_description': 'description',
4458     'pl_infojson': 'info.json',
4459 }
4460
4461 # As of [1] format syntax is:
4462 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4463 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4464 STR_FORMAT_RE_TMPL = r'''(?x)
4465     (?<!%)(?P<prefix>(?:%%)*)
4466     %
4467     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4468     (?P<format>
4469         (?:[#0\-+ ]+)?  # conversion flags (optional)
4470         (?:\d+)?  # minimum field width (optional)
4471         (?:\.\d+)?  # precision (optional)
4472         [hlL]?  # length modifier (optional)
4473         {1}  # conversion type
4474     )
4475 '''
4476
4477
4478 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4479
4480
4481 def limit_length(s, length):
4482     """ Add ellipses to overly long strings """
4483     if s is None:
4484         return None
4485     ELLIPSES = '...'
4486     if len(s) > length:
4487         return s[:length - len(ELLIPSES)] + ELLIPSES
4488     return s
4489
4490
4491 def version_tuple(v):
4492     return tuple(int(e) for e in re.split(r'[-.]', v))
4493
4494
4495 def is_outdated_version(version, limit, assume_new=True):
4496     if not version:
4497         return not assume_new
4498     try:
4499         return version_tuple(version) < version_tuple(limit)
4500     except ValueError:
4501         return not assume_new
4502
4503
4504 def ytdl_is_updateable():
4505     """ Returns if yt-dlp can be updated with -U """
4506     return False
4507
4508     from zipimport import zipimporter
4509
4510     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4511
4512
4513 def args_to_str(args):
4514     # Get a short string representation for a subprocess command
4515     return ' '.join(compat_shlex_quote(a) for a in args)
4516
4517
4518 def error_to_compat_str(err):
4519     err_str = str(err)
4520     # On python 2 error byte string must be decoded with proper
4521     # encoding rather than ascii
4522     if sys.version_info[0] < 3:
4523         err_str = err_str.decode(preferredencoding())
4524     return err_str
4525
4526
4527 def mimetype2ext(mt):
4528     if mt is None:
4529         return None
4530
4531     ext = {
4532         'audio/mp4': 'm4a',
4533         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4534         # it's the most popular one
4535         'audio/mpeg': 'mp3',
4536         'audio/x-wav': 'wav',
4537     }.get(mt)
4538     if ext is not None:
4539         return ext
4540
4541     _, _, res = mt.rpartition('/')
4542     res = res.split(';')[0].strip().lower()
4543
4544     return {
4545         '3gpp': '3gp',
4546         'smptett+xml': 'tt',
4547         'ttaf+xml': 'dfxp',
4548         'ttml+xml': 'ttml',
4549         'x-flv': 'flv',
4550         'x-mp4-fragmented': 'mp4',
4551         'x-ms-sami': 'sami',
4552         'x-ms-wmv': 'wmv',
4553         'mpegurl': 'm3u8',
4554         'x-mpegurl': 'm3u8',
4555         'vnd.apple.mpegurl': 'm3u8',
4556         'dash+xml': 'mpd',
4557         'f4m+xml': 'f4m',
4558         'hds+xml': 'f4m',
4559         'vnd.ms-sstr+xml': 'ism',
4560         'quicktime': 'mov',
4561         'mp2t': 'ts',
4562         'x-wav': 'wav',
4563     }.get(res, res)
4564
4565
4566 def parse_codecs(codecs_str):
4567     # http://tools.ietf.org/html/rfc6381
4568     if not codecs_str:
4569         return {}
4570     split_codecs = list(filter(None, map(
4571         str.strip, codecs_str.strip().strip(',').split(','))))
4572     vcodec, acodec = None, None
4573     for full_codec in split_codecs:
4574         codec = full_codec.split('.')[0]
4575         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4576             if not vcodec:
4577                 vcodec = full_codec
4578         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4579             if not acodec:
4580                 acodec = full_codec
4581         else:
4582             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4583     if not vcodec and not acodec:
4584         if len(split_codecs) == 2:
4585             return {
4586                 'vcodec': split_codecs[0],
4587                 'acodec': split_codecs[1],
4588             }
4589     else:
4590         return {
4591             'vcodec': vcodec or 'none',
4592             'acodec': acodec or 'none',
4593         }
4594     return {}
4595
4596
4597 def urlhandle_detect_ext(url_handle):
4598     getheader = url_handle.headers.get
4599
4600     cd = getheader('Content-Disposition')
4601     if cd:
4602         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4603         if m:
4604             e = determine_ext(m.group('filename'), default_ext=None)
4605             if e:
4606                 return e
4607
4608     return mimetype2ext(getheader('Content-Type'))
4609
4610
4611 def encode_data_uri(data, mime_type):
4612     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4613
4614
4615 def age_restricted(content_limit, age_limit):
4616     """ Returns True iff the content should be blocked """
4617
4618     if age_limit is None:  # No limit set
4619         return False
4620     if content_limit is None:
4621         return False  # Content available for everyone
4622     return age_limit < content_limit
4623
4624
4625 def is_html(first_bytes):
4626     """ Detect whether a file contains HTML by examining its first bytes. """
4627
4628     BOMS = [
4629         (b'\xef\xbb\xbf', 'utf-8'),
4630         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4631         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4632         (b'\xff\xfe', 'utf-16-le'),
4633         (b'\xfe\xff', 'utf-16-be'),
4634     ]
4635     for bom, enc in BOMS:
4636         if first_bytes.startswith(bom):
4637             s = first_bytes[len(bom):].decode(enc, 'replace')
4638             break
4639     else:
4640         s = first_bytes.decode('utf-8', 'replace')
4641
4642     return re.match(r'^\s*<', s)
4643
4644
4645 def determine_protocol(info_dict):
4646     protocol = info_dict.get('protocol')
4647     if protocol is not None:
4648         return protocol
4649
4650     url = info_dict['url']
4651     if url.startswith('rtmp'):
4652         return 'rtmp'
4653     elif url.startswith('mms'):
4654         return 'mms'
4655     elif url.startswith('rtsp'):
4656         return 'rtsp'
4657
4658     ext = determine_ext(url)
4659     if ext == 'm3u8':
4660         return 'm3u8'
4661     elif ext == 'f4m':
4662         return 'f4m'
4663
4664     return compat_urllib_parse_urlparse(url).scheme
4665
4666
4667 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4668     """ Render a list of rows, each as a list of values """
4669
4670     def get_max_lens(table):
4671         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4672
4673     def filter_using_list(row, filterArray):
4674         return [col for (take, col) in zip(filterArray, row) if take]
4675
4676     if hideEmpty:
4677         max_lens = get_max_lens(data)
4678         header_row = filter_using_list(header_row, max_lens)
4679         data = [filter_using_list(row, max_lens) for row in data]
4680
4681     table = [header_row] + data
4682     max_lens = get_max_lens(table)
4683     if delim:
4684         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4685     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4686     return '\n'.join(format_str % tuple(row) for row in table)
4687
4688
4689 def _match_one(filter_part, dct, incomplete):
4690     # TODO: Generalize code with YoutubeDL._build_format_filter
4691     STRING_OPERATORS = {
4692         '*=': operator.contains,
4693         '^=': lambda attr, value: attr.startswith(value),
4694         '$=': lambda attr, value: attr.endswith(value),
4695         '~=': lambda attr, value: re.search(value, attr),
4696     }
4697     COMPARISON_OPERATORS = {
4698         **STRING_OPERATORS,
4699         '<=': operator.le,  # "<=" must be defined above "<"
4700         '<': operator.lt,
4701         '>=': operator.ge,
4702         '>': operator.gt,
4703         '=': operator.eq,
4704     }
4705
4706     operator_rex = re.compile(r'''(?x)\s*
4707         (?P<key>[a-z_]+)
4708         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4709         (?:
4710             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4711             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4712             (?P<strval>.+?)
4713         )
4714         \s*$
4715         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4716     m = operator_rex.search(filter_part)
4717     if m:
4718         unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4719         if m.group('negation'):
4720             op = lambda attr, value: not unnegated_op(attr, value)
4721         else:
4722             op = unnegated_op
4723         actual_value = dct.get(m.group('key'))
4724         if (m.group('quotedstrval') is not None
4725             or m.group('strval') is not None
4726             # If the original field is a string and matching comparisonvalue is
4727             # a number we should respect the origin of the original field
4728             # and process comparison value as a string (see
4729             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4730             or actual_value is not None and m.group('intval') is not None
4731                 and isinstance(actual_value, compat_str)):
4732             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4733             quote = m.group('quote')
4734             if quote is not None:
4735                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4736         else:
4737             if m.group('op') in STRING_OPERATORS:
4738                 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4739             try:
4740                 comparison_value = int(m.group('intval'))
4741             except ValueError:
4742                 comparison_value = parse_filesize(m.group('intval'))
4743                 if comparison_value is None:
4744                     comparison_value = parse_filesize(m.group('intval') + 'B')
4745                 if comparison_value is None:
4746                     raise ValueError(
4747                         'Invalid integer value %r in filter part %r' % (
4748                             m.group('intval'), filter_part))
4749         if actual_value is None:
4750             return incomplete or m.group('none_inclusive')
4751         return op(actual_value, comparison_value)
4752
4753     UNARY_OPERATORS = {
4754         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4755         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4756     }
4757     operator_rex = re.compile(r'''(?x)\s*
4758         (?P<op>%s)\s*(?P<key>[a-z_]+)
4759         \s*$
4760         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4761     m = operator_rex.search(filter_part)
4762     if m:
4763         op = UNARY_OPERATORS[m.group('op')]
4764         actual_value = dct.get(m.group('key'))
4765         if incomplete and actual_value is None:
4766             return True
4767         return op(actual_value)
4768
4769     raise ValueError('Invalid filter part %r' % filter_part)
4770
4771
4772 def match_str(filter_str, dct, incomplete=False):
4773     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4774         When incomplete, all conditions passes on missing fields
4775     """
4776     return all(
4777         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4778         for filter_part in re.split(r'(?<!\\)&', filter_str))
4779
4780
4781 def match_filter_func(filter_str):
4782     def _match_func(info_dict, *args, **kwargs):
4783         if match_str(filter_str, info_dict, *args, **kwargs):
4784             return None
4785         else:
4786             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4787             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4788     return _match_func
4789
4790
4791 def parse_dfxp_time_expr(time_expr):
4792     if not time_expr:
4793         return
4794
4795     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4796     if mobj:
4797         return float(mobj.group('time_offset'))
4798
4799     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4800     if mobj:
4801         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4802
4803
4804 def srt_subtitles_timecode(seconds):
4805     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4806
4807
4808 def dfxp2srt(dfxp_data):
4809     '''
4810     @param dfxp_data A bytes-like object containing DFXP data
4811     @returns A unicode object containing converted SRT data
4812     '''
4813     LEGACY_NAMESPACES = (
4814         (b'http://www.w3.org/ns/ttml', [
4815             b'http://www.w3.org/2004/11/ttaf1',
4816             b'http://www.w3.org/2006/04/ttaf1',
4817             b'http://www.w3.org/2006/10/ttaf1',
4818         ]),
4819         (b'http://www.w3.org/ns/ttml#styling', [
4820             b'http://www.w3.org/ns/ttml#style',
4821         ]),
4822     )
4823
4824     SUPPORTED_STYLING = [
4825         'color',
4826         'fontFamily',
4827         'fontSize',
4828         'fontStyle',
4829         'fontWeight',
4830         'textDecoration'
4831     ]
4832
4833     _x = functools.partial(xpath_with_ns, ns_map={
4834         'xml': 'http://www.w3.org/XML/1998/namespace',
4835         'ttml': 'http://www.w3.org/ns/ttml',
4836         'tts': 'http://www.w3.org/ns/ttml#styling',
4837     })
4838
4839     styles = {}
4840     default_style = {}
4841
4842     class TTMLPElementParser(object):
4843         _out = ''
4844         _unclosed_elements = []
4845         _applied_styles = []
4846
4847         def start(self, tag, attrib):
4848             if tag in (_x('ttml:br'), 'br'):
4849                 self._out += '\n'
4850             else:
4851                 unclosed_elements = []
4852                 style = {}
4853                 element_style_id = attrib.get('style')
4854                 if default_style:
4855                     style.update(default_style)
4856                 if element_style_id:
4857                     style.update(styles.get(element_style_id, {}))
4858                 for prop in SUPPORTED_STYLING:
4859                     prop_val = attrib.get(_x('tts:' + prop))
4860                     if prop_val:
4861                         style[prop] = prop_val
4862                 if style:
4863                     font = ''
4864                     for k, v in sorted(style.items()):
4865                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4866                             continue
4867                         if k == 'color':
4868                             font += ' color="%s"' % v
4869                         elif k == 'fontSize':
4870                             font += ' size="%s"' % v
4871                         elif k == 'fontFamily':
4872                             font += ' face="%s"' % v
4873                         elif k == 'fontWeight' and v == 'bold':
4874                             self._out += '<b>'
4875                             unclosed_elements.append('b')
4876                         elif k == 'fontStyle' and v == 'italic':
4877                             self._out += '<i>'
4878                             unclosed_elements.append('i')
4879                         elif k == 'textDecoration' and v == 'underline':
4880                             self._out += '<u>'
4881                             unclosed_elements.append('u')
4882                     if font:
4883                         self._out += '<font' + font + '>'
4884                         unclosed_elements.append('font')
4885                     applied_style = {}
4886                     if self._applied_styles:
4887                         applied_style.update(self._applied_styles[-1])
4888                     applied_style.update(style)
4889                     self._applied_styles.append(applied_style)
4890                 self._unclosed_elements.append(unclosed_elements)
4891
4892         def end(self, tag):
4893             if tag not in (_x('ttml:br'), 'br'):
4894                 unclosed_elements = self._unclosed_elements.pop()
4895                 for element in reversed(unclosed_elements):
4896                     self._out += '</%s>' % element
4897                 if unclosed_elements and self._applied_styles:
4898                     self._applied_styles.pop()
4899
4900         def data(self, data):
4901             self._out += data
4902
4903         def close(self):
4904             return self._out.strip()
4905
4906     def parse_node(node):
4907         target = TTMLPElementParser()
4908         parser = xml.etree.ElementTree.XMLParser(target=target)
4909         parser.feed(xml.etree.ElementTree.tostring(node))
4910         return parser.close()
4911
4912     for k, v in LEGACY_NAMESPACES:
4913         for ns in v:
4914             dfxp_data = dfxp_data.replace(ns, k)
4915
4916     dfxp = compat_etree_fromstring(dfxp_data)
4917     out = []
4918     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4919
4920     if not paras:
4921         raise ValueError('Invalid dfxp/TTML subtitle')
4922
4923     repeat = False
4924     while True:
4925         for style in dfxp.findall(_x('.//ttml:style')):
4926             style_id = style.get('id') or style.get(_x('xml:id'))
4927             if not style_id:
4928                 continue
4929             parent_style_id = style.get('style')
4930             if parent_style_id:
4931                 if parent_style_id not in styles:
4932                     repeat = True
4933                     continue
4934                 styles[style_id] = styles[parent_style_id].copy()
4935             for prop in SUPPORTED_STYLING:
4936                 prop_val = style.get(_x('tts:' + prop))
4937                 if prop_val:
4938                     styles.setdefault(style_id, {})[prop] = prop_val
4939         if repeat:
4940             repeat = False
4941         else:
4942             break
4943
4944     for p in ('body', 'div'):
4945         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4946         if ele is None:
4947             continue
4948         style = styles.get(ele.get('style'))
4949         if not style:
4950             continue
4951         default_style.update(style)
4952
4953     for para, index in zip(paras, itertools.count(1)):
4954         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4955         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4956         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4957         if begin_time is None:
4958             continue
4959         if not end_time:
4960             if not dur:
4961                 continue
4962             end_time = begin_time + dur
4963         out.append('%d\n%s --> %s\n%s\n\n' % (
4964             index,
4965             srt_subtitles_timecode(begin_time),
4966             srt_subtitles_timecode(end_time),
4967             parse_node(para)))
4968
4969     return ''.join(out)
4970
4971
4972 def cli_option(params, command_option, param):
4973     param = params.get(param)
4974     if param:
4975         param = compat_str(param)
4976     return [command_option, param] if param is not None else []
4977
4978
4979 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4980     param = params.get(param)
4981     if param is None:
4982         return []
4983     assert isinstance(param, bool)
4984     if separator:
4985         return [command_option + separator + (true_value if param else false_value)]
4986     return [command_option, true_value if param else false_value]
4987
4988
4989 def cli_valueless_option(params, command_option, param, expected_value=True):
4990     param = params.get(param)
4991     return [command_option] if param == expected_value else []
4992
4993
4994 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4995     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4996         if use_compat:
4997             return argdict
4998         else:
4999             argdict = None
5000     if argdict is None:
5001         return default
5002     assert isinstance(argdict, dict)
5003
5004     assert isinstance(keys, (list, tuple))
5005     for key_list in keys:
5006         arg_list = list(filter(
5007             lambda x: x is not None,
5008             [argdict.get(key.lower()) for key in variadic(key_list)]))
5009         if arg_list:
5010             return [arg for args in arg_list for arg in args]
5011     return default
5012
5013
5014 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5015     main_key, exe = main_key.lower(), exe.lower()
5016     root_key = exe if main_key == exe else f'{main_key}+{exe}'
5017     keys = [f'{root_key}{k}' for k in (keys or [''])]
5018     if root_key in keys:
5019         if main_key != exe:
5020             keys.append((main_key, exe))
5021         keys.append('default')
5022     else:
5023         use_compat = False
5024     return cli_configuration_args(argdict, keys, default, use_compat)
5025
5026
5027 class ISO639Utils(object):
5028     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5029     _lang_map = {
5030         'aa': 'aar',
5031         'ab': 'abk',
5032         'ae': 'ave',
5033         'af': 'afr',
5034         'ak': 'aka',
5035         'am': 'amh',
5036         'an': 'arg',
5037         'ar': 'ara',
5038         'as': 'asm',
5039         'av': 'ava',
5040         'ay': 'aym',
5041         'az': 'aze',
5042         'ba': 'bak',
5043         'be': 'bel',
5044         'bg': 'bul',
5045         'bh': 'bih',
5046         'bi': 'bis',
5047         'bm': 'bam',
5048         'bn': 'ben',
5049         'bo': 'bod',
5050         'br': 'bre',
5051         'bs': 'bos',
5052         'ca': 'cat',
5053         'ce': 'che',
5054         'ch': 'cha',
5055         'co': 'cos',
5056         'cr': 'cre',
5057         'cs': 'ces',
5058         'cu': 'chu',
5059         'cv': 'chv',
5060         'cy': 'cym',
5061         'da': 'dan',
5062         'de': 'deu',
5063         'dv': 'div',
5064         'dz': 'dzo',
5065         'ee': 'ewe',
5066         'el': 'ell',
5067         'en': 'eng',
5068         'eo': 'epo',
5069         'es': 'spa',
5070         'et': 'est',
5071         'eu': 'eus',
5072         'fa': 'fas',
5073         'ff': 'ful',
5074         'fi': 'fin',
5075         'fj': 'fij',
5076         'fo': 'fao',
5077         'fr': 'fra',
5078         'fy': 'fry',
5079         'ga': 'gle',
5080         'gd': 'gla',
5081         'gl': 'glg',
5082         'gn': 'grn',
5083         'gu': 'guj',
5084         'gv': 'glv',
5085         'ha': 'hau',
5086         'he': 'heb',
5087         'iw': 'heb',  # Replaced by he in 1989 revision
5088         'hi': 'hin',
5089         'ho': 'hmo',
5090         'hr': 'hrv',
5091         'ht': 'hat',
5092         'hu': 'hun',
5093         'hy': 'hye',
5094         'hz': 'her',
5095         'ia': 'ina',
5096         'id': 'ind',
5097         'in': 'ind',  # Replaced by id in 1989 revision
5098         'ie': 'ile',
5099         'ig': 'ibo',
5100         'ii': 'iii',
5101         'ik': 'ipk',
5102         'io': 'ido',
5103         'is': 'isl',
5104         'it': 'ita',
5105         'iu': 'iku',
5106         'ja': 'jpn',
5107         'jv': 'jav',
5108         'ka': 'kat',
5109         'kg': 'kon',
5110         'ki': 'kik',
5111         'kj': 'kua',
5112         'kk': 'kaz',
5113         'kl': 'kal',
5114         'km': 'khm',
5115         'kn': 'kan',
5116         'ko': 'kor',
5117         'kr': 'kau',
5118         'ks': 'kas',
5119         'ku': 'kur',
5120         'kv': 'kom',
5121         'kw': 'cor',
5122         'ky': 'kir',
5123         'la': 'lat',
5124         'lb': 'ltz',
5125         'lg': 'lug',
5126         'li': 'lim',
5127         'ln': 'lin',
5128         'lo': 'lao',
5129         'lt': 'lit',
5130         'lu': 'lub',
5131         'lv': 'lav',
5132         'mg': 'mlg',
5133         'mh': 'mah',
5134         'mi': 'mri',
5135         'mk': 'mkd',
5136         'ml': 'mal',
5137         'mn': 'mon',
5138         'mr': 'mar',
5139         'ms': 'msa',
5140         'mt': 'mlt',
5141         'my': 'mya',
5142         'na': 'nau',
5143         'nb': 'nob',
5144         'nd': 'nde',
5145         'ne': 'nep',
5146         'ng': 'ndo',
5147         'nl': 'nld',
5148         'nn': 'nno',
5149         'no': 'nor',
5150         'nr': 'nbl',
5151         'nv': 'nav',
5152         'ny': 'nya',
5153         'oc': 'oci',
5154         'oj': 'oji',
5155         'om': 'orm',
5156         'or': 'ori',
5157         'os': 'oss',
5158         'pa': 'pan',
5159         'pi': 'pli',
5160         'pl': 'pol',
5161         'ps': 'pus',
5162         'pt': 'por',
5163         'qu': 'que',
5164         'rm': 'roh',
5165         'rn': 'run',
5166         'ro': 'ron',
5167         'ru': 'rus',
5168         'rw': 'kin',
5169         'sa': 'san',
5170         'sc': 'srd',
5171         'sd': 'snd',
5172         'se': 'sme',
5173         'sg': 'sag',
5174         'si': 'sin',
5175         'sk': 'slk',
5176         'sl': 'slv',
5177         'sm': 'smo',
5178         'sn': 'sna',
5179         'so': 'som',
5180         'sq': 'sqi',
5181         'sr': 'srp',
5182         'ss': 'ssw',
5183         'st': 'sot',
5184         'su': 'sun',
5185         'sv': 'swe',
5186         'sw': 'swa',
5187         'ta': 'tam',
5188         'te': 'tel',
5189         'tg': 'tgk',
5190         'th': 'tha',
5191         'ti': 'tir',
5192         'tk': 'tuk',
5193         'tl': 'tgl',
5194         'tn': 'tsn',
5195         'to': 'ton',
5196         'tr': 'tur',
5197         'ts': 'tso',
5198         'tt': 'tat',
5199         'tw': 'twi',
5200         'ty': 'tah',
5201         'ug': 'uig',
5202         'uk': 'ukr',
5203         'ur': 'urd',
5204         'uz': 'uzb',
5205         've': 'ven',
5206         'vi': 'vie',
5207         'vo': 'vol',
5208         'wa': 'wln',
5209         'wo': 'wol',
5210         'xh': 'xho',
5211         'yi': 'yid',
5212         'ji': 'yid',  # Replaced by yi in 1989 revision
5213         'yo': 'yor',
5214         'za': 'zha',
5215         'zh': 'zho',
5216         'zu': 'zul',
5217     }
5218
5219     @classmethod
5220     def short2long(cls, code):
5221         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5222         return cls._lang_map.get(code[:2])
5223
5224     @classmethod
5225     def long2short(cls, code):
5226         """Convert language code from ISO 639-2/T to ISO 639-1"""
5227         for short_name, long_name in cls._lang_map.items():
5228             if long_name == code:
5229                 return short_name
5230
5231
5232 class ISO3166Utils(object):
5233     # From http://data.okfn.org/data/core/country-list
5234     _country_map = {
5235         'AF': 'Afghanistan',
5236         'AX': 'Åland Islands',
5237         'AL': 'Albania',
5238         'DZ': 'Algeria',
5239         'AS': 'American Samoa',
5240         'AD': 'Andorra',
5241         'AO': 'Angola',
5242         'AI': 'Anguilla',
5243         'AQ': 'Antarctica',
5244         'AG': 'Antigua and Barbuda',
5245         'AR': 'Argentina',
5246         'AM': 'Armenia',
5247         'AW': 'Aruba',
5248         'AU': 'Australia',
5249         'AT': 'Austria',
5250         'AZ': 'Azerbaijan',
5251         'BS': 'Bahamas',
5252         'BH': 'Bahrain',
5253         'BD': 'Bangladesh',
5254         'BB': 'Barbados',
5255         'BY': 'Belarus',
5256         'BE': 'Belgium',
5257         'BZ': 'Belize',
5258         'BJ': 'Benin',
5259         'BM': 'Bermuda',
5260         'BT': 'Bhutan',
5261         'BO': 'Bolivia, Plurinational State of',
5262         'BQ': 'Bonaire, Sint Eustatius and Saba',
5263         'BA': 'Bosnia and Herzegovina',
5264         'BW': 'Botswana',
5265         'BV': 'Bouvet Island',
5266         'BR': 'Brazil',
5267         'IO': 'British Indian Ocean Territory',
5268         'BN': 'Brunei Darussalam',
5269         'BG': 'Bulgaria',
5270         'BF': 'Burkina Faso',
5271         'BI': 'Burundi',
5272         'KH': 'Cambodia',
5273         'CM': 'Cameroon',
5274         'CA': 'Canada',
5275         'CV': 'Cape Verde',
5276         'KY': 'Cayman Islands',
5277         'CF': 'Central African Republic',
5278         'TD': 'Chad',
5279         'CL': 'Chile',
5280         'CN': 'China',
5281         'CX': 'Christmas Island',
5282         'CC': 'Cocos (Keeling) Islands',
5283         'CO': 'Colombia',
5284         'KM': 'Comoros',
5285         'CG': 'Congo',
5286         'CD': 'Congo, the Democratic Republic of the',
5287         'CK': 'Cook Islands',
5288         'CR': 'Costa Rica',
5289         'CI': 'Côte d\'Ivoire',
5290         'HR': 'Croatia',
5291         'CU': 'Cuba',
5292         'CW': 'Curaçao',
5293         'CY': 'Cyprus',
5294         'CZ': 'Czech Republic',
5295         'DK': 'Denmark',
5296         'DJ': 'Djibouti',
5297         'DM': 'Dominica',
5298         'DO': 'Dominican Republic',
5299         'EC': 'Ecuador',
5300         'EG': 'Egypt',
5301         'SV': 'El Salvador',
5302         'GQ': 'Equatorial Guinea',
5303         'ER': 'Eritrea',
5304         'EE': 'Estonia',
5305         'ET': 'Ethiopia',
5306         'FK': 'Falkland Islands (Malvinas)',
5307         'FO': 'Faroe Islands',
5308         'FJ': 'Fiji',
5309         'FI': 'Finland',
5310         'FR': 'France',
5311         'GF': 'French Guiana',
5312         'PF': 'French Polynesia',
5313         'TF': 'French Southern Territories',
5314         'GA': 'Gabon',
5315         'GM': 'Gambia',
5316         'GE': 'Georgia',
5317         'DE': 'Germany',
5318         'GH': 'Ghana',
5319         'GI': 'Gibraltar',
5320         'GR': 'Greece',
5321         'GL': 'Greenland',
5322         'GD': 'Grenada',
5323         'GP': 'Guadeloupe',
5324         'GU': 'Guam',
5325         'GT': 'Guatemala',
5326         'GG': 'Guernsey',
5327         'GN': 'Guinea',
5328         'GW': 'Guinea-Bissau',
5329         'GY': 'Guyana',
5330         'HT': 'Haiti',
5331         'HM': 'Heard Island and McDonald Islands',
5332         'VA': 'Holy See (Vatican City State)',
5333         'HN': 'Honduras',
5334         'HK': 'Hong Kong',
5335         'HU': 'Hungary',
5336         'IS': 'Iceland',
5337         'IN': 'India',
5338         'ID': 'Indonesia',
5339         'IR': 'Iran, Islamic Republic of',
5340         'IQ': 'Iraq',
5341         'IE': 'Ireland',
5342         'IM': 'Isle of Man',
5343         'IL': 'Israel',
5344         'IT': 'Italy',
5345         'JM': 'Jamaica',
5346         'JP': 'Japan',
5347         'JE': 'Jersey',
5348         'JO': 'Jordan',
5349         'KZ': 'Kazakhstan',
5350         'KE': 'Kenya',
5351         'KI': 'Kiribati',
5352         'KP': 'Korea, Democratic People\'s Republic of',
5353         'KR': 'Korea, Republic of',
5354         'KW': 'Kuwait',
5355         'KG': 'Kyrgyzstan',
5356         'LA': 'Lao People\'s Democratic Republic',
5357         'LV': 'Latvia',
5358         'LB': 'Lebanon',
5359         'LS': 'Lesotho',
5360         'LR': 'Liberia',
5361         'LY': 'Libya',
5362         'LI': 'Liechtenstein',
5363         'LT': 'Lithuania',
5364         'LU': 'Luxembourg',
5365         'MO': 'Macao',
5366         'MK': 'Macedonia, the Former Yugoslav Republic of',
5367         'MG': 'Madagascar',
5368         'MW': 'Malawi',
5369         'MY': 'Malaysia',
5370         'MV': 'Maldives',
5371         'ML': 'Mali',
5372         'MT': 'Malta',
5373         'MH': 'Marshall Islands',
5374         'MQ': 'Martinique',
5375         'MR': 'Mauritania',
5376         'MU': 'Mauritius',
5377         'YT': 'Mayotte',
5378         'MX': 'Mexico',
5379         'FM': 'Micronesia, Federated States of',
5380         'MD': 'Moldova, Republic of',
5381         'MC': 'Monaco',
5382         'MN': 'Mongolia',
5383         'ME': 'Montenegro',
5384         'MS': 'Montserrat',
5385         'MA': 'Morocco',
5386         'MZ': 'Mozambique',
5387         'MM': 'Myanmar',
5388         'NA': 'Namibia',
5389         'NR': 'Nauru',
5390         'NP': 'Nepal',
5391         'NL': 'Netherlands',
5392         'NC': 'New Caledonia',
5393         'NZ': 'New Zealand',
5394         'NI': 'Nicaragua',
5395         'NE': 'Niger',
5396         'NG': 'Nigeria',
5397         'NU': 'Niue',
5398         'NF': 'Norfolk Island',
5399         'MP': 'Northern Mariana Islands',
5400         'NO': 'Norway',
5401         'OM': 'Oman',
5402         'PK': 'Pakistan',
5403         'PW': 'Palau',
5404         'PS': 'Palestine, State of',
5405         'PA': 'Panama',
5406         'PG': 'Papua New Guinea',
5407         'PY': 'Paraguay',
5408         'PE': 'Peru',
5409         'PH': 'Philippines',
5410         'PN': 'Pitcairn',
5411         'PL': 'Poland',
5412         'PT': 'Portugal',
5413         'PR': 'Puerto Rico',
5414         'QA': 'Qatar',
5415         'RE': 'Réunion',
5416         'RO': 'Romania',
5417         'RU': 'Russian Federation',
5418         'RW': 'Rwanda',
5419         'BL': 'Saint Barthélemy',
5420         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5421         'KN': 'Saint Kitts and Nevis',
5422         'LC': 'Saint Lucia',
5423         'MF': 'Saint Martin (French part)',
5424         'PM': 'Saint Pierre and Miquelon',
5425         'VC': 'Saint Vincent and the Grenadines',
5426         'WS': 'Samoa',
5427         'SM': 'San Marino',
5428         'ST': 'Sao Tome and Principe',
5429         'SA': 'Saudi Arabia',
5430         'SN': 'Senegal',
5431         'RS': 'Serbia',
5432         'SC': 'Seychelles',
5433         'SL': 'Sierra Leone',
5434         'SG': 'Singapore',
5435         'SX': 'Sint Maarten (Dutch part)',
5436         'SK': 'Slovakia',
5437         'SI': 'Slovenia',
5438         'SB': 'Solomon Islands',
5439         'SO': 'Somalia',
5440         'ZA': 'South Africa',
5441         'GS': 'South Georgia and the South Sandwich Islands',
5442         'SS': 'South Sudan',
5443         'ES': 'Spain',
5444         'LK': 'Sri Lanka',
5445         'SD': 'Sudan',
5446         'SR': 'Suriname',
5447         'SJ': 'Svalbard and Jan Mayen',
5448         'SZ': 'Swaziland',
5449         'SE': 'Sweden',
5450         'CH': 'Switzerland',
5451         'SY': 'Syrian Arab Republic',
5452         'TW': 'Taiwan, Province of China',
5453         'TJ': 'Tajikistan',
5454         'TZ': 'Tanzania, United Republic of',
5455         'TH': 'Thailand',
5456         'TL': 'Timor-Leste',
5457         'TG': 'Togo',
5458         'TK': 'Tokelau',
5459         'TO': 'Tonga',
5460         'TT': 'Trinidad and Tobago',
5461         'TN': 'Tunisia',
5462         'TR': 'Turkey',
5463         'TM': 'Turkmenistan',
5464         'TC': 'Turks and Caicos Islands',
5465         'TV': 'Tuvalu',
5466         'UG': 'Uganda',
5467         'UA': 'Ukraine',
5468         'AE': 'United Arab Emirates',
5469         'GB': 'United Kingdom',
5470         'US': 'United States',
5471         'UM': 'United States Minor Outlying Islands',
5472         'UY': 'Uruguay',
5473         'UZ': 'Uzbekistan',
5474         'VU': 'Vanuatu',
5475         'VE': 'Venezuela, Bolivarian Republic of',
5476         'VN': 'Viet Nam',
5477         'VG': 'Virgin Islands, British',
5478         'VI': 'Virgin Islands, U.S.',
5479         'WF': 'Wallis and Futuna',
5480         'EH': 'Western Sahara',
5481         'YE': 'Yemen',
5482         'ZM': 'Zambia',
5483         'ZW': 'Zimbabwe',
5484     }
5485
5486     @classmethod
5487     def short2full(cls, code):
5488         """Convert an ISO 3166-2 country code to the corresponding full name"""
5489         return cls._country_map.get(code.upper())
5490
5491
5492 class GeoUtils(object):
5493     # Major IPv4 address blocks per country
5494     _country_ip_map = {
5495         'AD': '46.172.224.0/19',
5496         'AE': '94.200.0.0/13',
5497         'AF': '149.54.0.0/17',
5498         'AG': '209.59.64.0/18',
5499         'AI': '204.14.248.0/21',
5500         'AL': '46.99.0.0/16',
5501         'AM': '46.70.0.0/15',
5502         'AO': '105.168.0.0/13',
5503         'AP': '182.50.184.0/21',
5504         'AQ': '23.154.160.0/24',
5505         'AR': '181.0.0.0/12',
5506         'AS': '202.70.112.0/20',
5507         'AT': '77.116.0.0/14',
5508         'AU': '1.128.0.0/11',
5509         'AW': '181.41.0.0/18',
5510         'AX': '185.217.4.0/22',
5511         'AZ': '5.197.0.0/16',
5512         'BA': '31.176.128.0/17',
5513         'BB': '65.48.128.0/17',
5514         'BD': '114.130.0.0/16',
5515         'BE': '57.0.0.0/8',
5516         'BF': '102.178.0.0/15',
5517         'BG': '95.42.0.0/15',
5518         'BH': '37.131.0.0/17',
5519         'BI': '154.117.192.0/18',
5520         'BJ': '137.255.0.0/16',
5521         'BL': '185.212.72.0/23',
5522         'BM': '196.12.64.0/18',
5523         'BN': '156.31.0.0/16',
5524         'BO': '161.56.0.0/16',
5525         'BQ': '161.0.80.0/20',
5526         'BR': '191.128.0.0/12',
5527         'BS': '24.51.64.0/18',
5528         'BT': '119.2.96.0/19',
5529         'BW': '168.167.0.0/16',
5530         'BY': '178.120.0.0/13',
5531         'BZ': '179.42.192.0/18',
5532         'CA': '99.224.0.0/11',
5533         'CD': '41.243.0.0/16',
5534         'CF': '197.242.176.0/21',
5535         'CG': '160.113.0.0/16',
5536         'CH': '85.0.0.0/13',
5537         'CI': '102.136.0.0/14',
5538         'CK': '202.65.32.0/19',
5539         'CL': '152.172.0.0/14',
5540         'CM': '102.244.0.0/14',
5541         'CN': '36.128.0.0/10',
5542         'CO': '181.240.0.0/12',
5543         'CR': '201.192.0.0/12',
5544         'CU': '152.206.0.0/15',
5545         'CV': '165.90.96.0/19',
5546         'CW': '190.88.128.0/17',
5547         'CY': '31.153.0.0/16',
5548         'CZ': '88.100.0.0/14',
5549         'DE': '53.0.0.0/8',
5550         'DJ': '197.241.0.0/17',
5551         'DK': '87.48.0.0/12',
5552         'DM': '192.243.48.0/20',
5553         'DO': '152.166.0.0/15',
5554         'DZ': '41.96.0.0/12',
5555         'EC': '186.68.0.0/15',
5556         'EE': '90.190.0.0/15',
5557         'EG': '156.160.0.0/11',
5558         'ER': '196.200.96.0/20',
5559         'ES': '88.0.0.0/11',
5560         'ET': '196.188.0.0/14',
5561         'EU': '2.16.0.0/13',
5562         'FI': '91.152.0.0/13',
5563         'FJ': '144.120.0.0/16',
5564         'FK': '80.73.208.0/21',
5565         'FM': '119.252.112.0/20',
5566         'FO': '88.85.32.0/19',
5567         'FR': '90.0.0.0/9',
5568         'GA': '41.158.0.0/15',
5569         'GB': '25.0.0.0/8',
5570         'GD': '74.122.88.0/21',
5571         'GE': '31.146.0.0/16',
5572         'GF': '161.22.64.0/18',
5573         'GG': '62.68.160.0/19',
5574         'GH': '154.160.0.0/12',
5575         'GI': '95.164.0.0/16',
5576         'GL': '88.83.0.0/19',
5577         'GM': '160.182.0.0/15',
5578         'GN': '197.149.192.0/18',
5579         'GP': '104.250.0.0/19',
5580         'GQ': '105.235.224.0/20',
5581         'GR': '94.64.0.0/13',
5582         'GT': '168.234.0.0/16',
5583         'GU': '168.123.0.0/16',
5584         'GW': '197.214.80.0/20',
5585         'GY': '181.41.64.0/18',
5586         'HK': '113.252.0.0/14',
5587         'HN': '181.210.0.0/16',
5588         'HR': '93.136.0.0/13',
5589         'HT': '148.102.128.0/17',
5590         'HU': '84.0.0.0/14',
5591         'ID': '39.192.0.0/10',
5592         'IE': '87.32.0.0/12',
5593         'IL': '79.176.0.0/13',
5594         'IM': '5.62.80.0/20',
5595         'IN': '117.192.0.0/10',
5596         'IO': '203.83.48.0/21',
5597         'IQ': '37.236.0.0/14',
5598         'IR': '2.176.0.0/12',
5599         'IS': '82.221.0.0/16',
5600         'IT': '79.0.0.0/10',
5601         'JE': '87.244.64.0/18',
5602         'JM': '72.27.0.0/17',
5603         'JO': '176.29.0.0/16',
5604         'JP': '133.0.0.0/8',
5605         'KE': '105.48.0.0/12',
5606         'KG': '158.181.128.0/17',
5607         'KH': '36.37.128.0/17',
5608         'KI': '103.25.140.0/22',
5609         'KM': '197.255.224.0/20',
5610         'KN': '198.167.192.0/19',
5611         'KP': '175.45.176.0/22',
5612         'KR': '175.192.0.0/10',
5613         'KW': '37.36.0.0/14',
5614         'KY': '64.96.0.0/15',
5615         'KZ': '2.72.0.0/13',
5616         'LA': '115.84.64.0/18',
5617         'LB': '178.135.0.0/16',
5618         'LC': '24.92.144.0/20',
5619         'LI': '82.117.0.0/19',
5620         'LK': '112.134.0.0/15',
5621         'LR': '102.183.0.0/16',
5622         'LS': '129.232.0.0/17',
5623         'LT': '78.56.0.0/13',
5624         'LU': '188.42.0.0/16',
5625         'LV': '46.109.0.0/16',
5626         'LY': '41.252.0.0/14',
5627         'MA': '105.128.0.0/11',
5628         'MC': '88.209.64.0/18',
5629         'MD': '37.246.0.0/16',
5630         'ME': '178.175.0.0/17',
5631         'MF': '74.112.232.0/21',
5632         'MG': '154.126.0.0/17',
5633         'MH': '117.103.88.0/21',
5634         'MK': '77.28.0.0/15',
5635         'ML': '154.118.128.0/18',
5636         'MM': '37.111.0.0/17',
5637         'MN': '49.0.128.0/17',
5638         'MO': '60.246.0.0/16',
5639         'MP': '202.88.64.0/20',
5640         'MQ': '109.203.224.0/19',
5641         'MR': '41.188.64.0/18',
5642         'MS': '208.90.112.0/22',
5643         'MT': '46.11.0.0/16',
5644         'MU': '105.16.0.0/12',
5645         'MV': '27.114.128.0/18',
5646         'MW': '102.70.0.0/15',
5647         'MX': '187.192.0.0/11',
5648         'MY': '175.136.0.0/13',
5649         'MZ': '197.218.0.0/15',
5650         'NA': '41.182.0.0/16',
5651         'NC': '101.101.0.0/18',
5652         'NE': '197.214.0.0/18',
5653         'NF': '203.17.240.0/22',
5654         'NG': '105.112.0.0/12',
5655         'NI': '186.76.0.0/15',
5656         'NL': '145.96.0.0/11',
5657         'NO': '84.208.0.0/13',
5658         'NP': '36.252.0.0/15',
5659         'NR': '203.98.224.0/19',
5660         'NU': '49.156.48.0/22',
5661         'NZ': '49.224.0.0/14',
5662         'OM': '5.36.0.0/15',
5663         'PA': '186.72.0.0/15',
5664         'PE': '186.160.0.0/14',
5665         'PF': '123.50.64.0/18',
5666         'PG': '124.240.192.0/19',
5667         'PH': '49.144.0.0/13',
5668         'PK': '39.32.0.0/11',
5669         'PL': '83.0.0.0/11',
5670         'PM': '70.36.0.0/20',
5671         'PR': '66.50.0.0/16',
5672         'PS': '188.161.0.0/16',
5673         'PT': '85.240.0.0/13',
5674         'PW': '202.124.224.0/20',
5675         'PY': '181.120.0.0/14',
5676         'QA': '37.210.0.0/15',
5677         'RE': '102.35.0.0/16',
5678         'RO': '79.112.0.0/13',
5679         'RS': '93.86.0.0/15',
5680         'RU': '5.136.0.0/13',
5681         'RW': '41.186.0.0/16',
5682         'SA': '188.48.0.0/13',
5683         'SB': '202.1.160.0/19',
5684         'SC': '154.192.0.0/11',
5685         'SD': '102.120.0.0/13',
5686         'SE': '78.64.0.0/12',
5687         'SG': '8.128.0.0/10',
5688         'SI': '188.196.0.0/14',
5689         'SK': '78.98.0.0/15',
5690         'SL': '102.143.0.0/17',
5691         'SM': '89.186.32.0/19',
5692         'SN': '41.82.0.0/15',
5693         'SO': '154.115.192.0/18',
5694         'SR': '186.179.128.0/17',
5695         'SS': '105.235.208.0/21',
5696         'ST': '197.159.160.0/19',
5697         'SV': '168.243.0.0/16',
5698         'SX': '190.102.0.0/20',
5699         'SY': '5.0.0.0/16',
5700         'SZ': '41.84.224.0/19',
5701         'TC': '65.255.48.0/20',
5702         'TD': '154.68.128.0/19',
5703         'TG': '196.168.0.0/14',
5704         'TH': '171.96.0.0/13',
5705         'TJ': '85.9.128.0/18',
5706         'TK': '27.96.24.0/21',
5707         'TL': '180.189.160.0/20',
5708         'TM': '95.85.96.0/19',
5709         'TN': '197.0.0.0/11',
5710         'TO': '175.176.144.0/21',
5711         'TR': '78.160.0.0/11',
5712         'TT': '186.44.0.0/15',
5713         'TV': '202.2.96.0/19',
5714         'TW': '120.96.0.0/11',
5715         'TZ': '156.156.0.0/14',
5716         'UA': '37.52.0.0/14',
5717         'UG': '102.80.0.0/13',
5718         'US': '6.0.0.0/8',
5719         'UY': '167.56.0.0/13',
5720         'UZ': '84.54.64.0/18',
5721         'VA': '212.77.0.0/19',
5722         'VC': '207.191.240.0/21',
5723         'VE': '186.88.0.0/13',
5724         'VG': '66.81.192.0/20',
5725         'VI': '146.226.0.0/16',
5726         'VN': '14.160.0.0/11',
5727         'VU': '202.80.32.0/20',
5728         'WF': '117.20.32.0/21',
5729         'WS': '202.4.32.0/19',
5730         'YE': '134.35.0.0/16',
5731         'YT': '41.242.116.0/22',
5732         'ZA': '41.0.0.0/11',
5733         'ZM': '102.144.0.0/13',
5734         'ZW': '102.177.192.0/18',
5735     }
5736
5737     @classmethod
5738     def random_ipv4(cls, code_or_block):
5739         if len(code_or_block) == 2:
5740             block = cls._country_ip_map.get(code_or_block.upper())
5741             if not block:
5742                 return None
5743         else:
5744             block = code_or_block
5745         addr, preflen = block.split('/')
5746         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5747         addr_max = addr_min | (0xffffffff >> int(preflen))
5748         return compat_str(socket.inet_ntoa(
5749             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5750
5751
5752 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5753     def __init__(self, proxies=None):
5754         # Set default handlers
5755         for type in ('http', 'https'):
5756             setattr(self, '%s_open' % type,
5757                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5758                         meth(r, proxy, type))
5759         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5760
5761     def proxy_open(self, req, proxy, type):
5762         req_proxy = req.headers.get('Ytdl-request-proxy')
5763         if req_proxy is not None:
5764             proxy = req_proxy
5765             del req.headers['Ytdl-request-proxy']
5766
5767         if proxy == '__noproxy__':
5768             return None  # No Proxy
5769         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5770             req.add_header('Ytdl-socks-proxy', proxy)
5771             # yt-dlp's http/https handlers do wrapping the socket with socks
5772             return None
5773         return compat_urllib_request.ProxyHandler.proxy_open(
5774             self, req, proxy, type)
5775
5776
5777 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5778 # released into Public Domain
5779 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5780
5781 def long_to_bytes(n, blocksize=0):
5782     """long_to_bytes(n:long, blocksize:int) : string
5783     Convert a long integer to a byte string.
5784
5785     If optional blocksize is given and greater than zero, pad the front of the
5786     byte string with binary zeros so that the length is a multiple of
5787     blocksize.
5788     """
5789     # after much testing, this algorithm was deemed to be the fastest
5790     s = b''
5791     n = int(n)
5792     while n > 0:
5793         s = compat_struct_pack('>I', n & 0xffffffff) + s
5794         n = n >> 32
5795     # strip off leading zeros
5796     for i in range(len(s)):
5797         if s[i] != b'\000'[0]:
5798             break
5799     else:
5800         # only happens when n == 0
5801         s = b'\000'
5802         i = 0
5803     s = s[i:]
5804     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5805     # de-padding being done above, but sigh...
5806     if blocksize > 0 and len(s) % blocksize:
5807         s = (blocksize - len(s) % blocksize) * b'\000' + s
5808     return s
5809
5810
5811 def bytes_to_long(s):
5812     """bytes_to_long(string) : long
5813     Convert a byte string to a long integer.
5814
5815     This is (essentially) the inverse of long_to_bytes().
5816     """
5817     acc = 0
5818     length = len(s)
5819     if length % 4:
5820         extra = (4 - length % 4)
5821         s = b'\000' * extra + s
5822         length = length + extra
5823     for i in range(0, length, 4):
5824         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5825     return acc
5826
5827
5828 def ohdave_rsa_encrypt(data, exponent, modulus):
5829     '''
5830     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5831
5832     Input:
5833         data: data to encrypt, bytes-like object
5834         exponent, modulus: parameter e and N of RSA algorithm, both integer
5835     Output: hex string of encrypted data
5836
5837     Limitation: supports one block encryption only
5838     '''
5839
5840     payload = int(binascii.hexlify(data[::-1]), 16)
5841     encrypted = pow(payload, exponent, modulus)
5842     return '%x' % encrypted
5843
5844
5845 def pkcs1pad(data, length):
5846     """
5847     Padding input data with PKCS#1 scheme
5848
5849     @param {int[]} data        input data
5850     @param {int}   length      target length
5851     @returns {int[]}           padded data
5852     """
5853     if len(data) > length - 11:
5854         raise ValueError('Input data too long for PKCS#1 padding')
5855
5856     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5857     return [0, 2] + pseudo_random + [0] + data
5858
5859
5860 def encode_base_n(num, n, table=None):
5861     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5862     if not table:
5863         table = FULL_TABLE[:n]
5864
5865     if n > len(table):
5866         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5867
5868     if num == 0:
5869         return table[0]
5870
5871     ret = ''
5872     while num:
5873         ret = table[num % n] + ret
5874         num = num // n
5875     return ret
5876
5877
5878 def decode_packed_codes(code):
5879     mobj = re.search(PACKED_CODES_RE, code)
5880     obfuscated_code, base, count, symbols = mobj.groups()
5881     base = int(base)
5882     count = int(count)
5883     symbols = symbols.split('|')
5884     symbol_table = {}
5885
5886     while count:
5887         count -= 1
5888         base_n_count = encode_base_n(count, base)
5889         symbol_table[base_n_count] = symbols[count] or base_n_count
5890
5891     return re.sub(
5892         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5893         obfuscated_code)
5894
5895
5896 def caesar(s, alphabet, shift):
5897     if shift == 0:
5898         return s
5899     l = len(alphabet)
5900     return ''.join(
5901         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5902         for c in s)
5903
5904
5905 def rot47(s):
5906     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5907
5908
5909 def parse_m3u8_attributes(attrib):
5910     info = {}
5911     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5912         if val.startswith('"'):
5913             val = val[1:-1]
5914         info[key] = val
5915     return info
5916
5917
5918 def urshift(val, n):
5919     return val >> n if val >= 0 else (val + 0x100000000) >> n
5920
5921
5922 # Based on png2str() written by @gdkchan and improved by @yokrysty
5923 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5924 def decode_png(png_data):
5925     # Reference: https://www.w3.org/TR/PNG/
5926     header = png_data[8:]
5927
5928     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5929         raise IOError('Not a valid PNG file.')
5930
5931     int_map = {1: '>B', 2: '>H', 4: '>I'}
5932     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5933
5934     chunks = []
5935
5936     while header:
5937         length = unpack_integer(header[:4])
5938         header = header[4:]
5939
5940         chunk_type = header[:4]
5941         header = header[4:]
5942
5943         chunk_data = header[:length]
5944         header = header[length:]
5945
5946         header = header[4:]  # Skip CRC
5947
5948         chunks.append({
5949             'type': chunk_type,
5950             'length': length,
5951             'data': chunk_data
5952         })
5953
5954     ihdr = chunks[0]['data']
5955
5956     width = unpack_integer(ihdr[:4])
5957     height = unpack_integer(ihdr[4:8])
5958
5959     idat = b''
5960
5961     for chunk in chunks:
5962         if chunk['type'] == b'IDAT':
5963             idat += chunk['data']
5964
5965     if not idat:
5966         raise IOError('Unable to read PNG data.')
5967
5968     decompressed_data = bytearray(zlib.decompress(idat))
5969
5970     stride = width * 3
5971     pixels = []
5972
5973     def _get_pixel(idx):
5974         x = idx % stride
5975         y = idx // stride
5976         return pixels[y][x]
5977
5978     for y in range(height):
5979         basePos = y * (1 + stride)
5980         filter_type = decompressed_data[basePos]
5981
5982         current_row = []
5983
5984         pixels.append(current_row)
5985
5986         for x in range(stride):
5987             color = decompressed_data[1 + basePos + x]
5988             basex = y * stride + x
5989             left = 0
5990             up = 0
5991
5992             if x > 2:
5993                 left = _get_pixel(basex - 3)
5994             if y > 0:
5995                 up = _get_pixel(basex - stride)
5996
5997             if filter_type == 1:  # Sub
5998                 color = (color + left) & 0xff
5999             elif filter_type == 2:  # Up
6000                 color = (color + up) & 0xff
6001             elif filter_type == 3:  # Average
6002                 color = (color + ((left + up) >> 1)) & 0xff
6003             elif filter_type == 4:  # Paeth
6004                 a = left
6005                 b = up
6006                 c = 0
6007
6008                 if x > 2 and y > 0:
6009                     c = _get_pixel(basex - stride - 3)
6010
6011                 p = a + b - c
6012
6013                 pa = abs(p - a)
6014                 pb = abs(p - b)
6015                 pc = abs(p - c)
6016
6017                 if pa <= pb and pa <= pc:
6018                     color = (color + a) & 0xff
6019                 elif pb <= pc:
6020                     color = (color + b) & 0xff
6021                 else:
6022                     color = (color + c) & 0xff
6023
6024             current_row.append(color)
6025
6026     return width, height, pixels
6027
6028
6029 def write_xattr(path, key, value):
6030     # This mess below finds the best xattr tool for the job
6031     try:
6032         # try the pyxattr module...
6033         import xattr
6034
6035         if hasattr(xattr, 'set'):  # pyxattr
6036             # Unicode arguments are not supported in python-pyxattr until
6037             # version 0.5.0
6038             # See https://github.com/ytdl-org/youtube-dl/issues/5498
6039             pyxattr_required_version = '0.5.0'
6040             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6041                 # TODO: fallback to CLI tools
6042                 raise XAttrUnavailableError(
6043                     'python-pyxattr is detected but is too old. '
6044                     'yt-dlp requires %s or above while your version is %s. '
6045                     'Falling back to other xattr implementations' % (
6046                         pyxattr_required_version, xattr.__version__))
6047
6048             setxattr = xattr.set
6049         else:  # xattr
6050             setxattr = xattr.setxattr
6051
6052         try:
6053             setxattr(path, key, value)
6054         except EnvironmentError as e:
6055             raise XAttrMetadataError(e.errno, e.strerror)
6056
6057     except ImportError:
6058         if compat_os_name == 'nt':
6059             # Write xattrs to NTFS Alternate Data Streams:
6060             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6061             assert ':' not in key
6062             assert os.path.exists(path)
6063
6064             ads_fn = path + ':' + key
6065             try:
6066                 with open(ads_fn, 'wb') as f:
6067                     f.write(value)
6068             except EnvironmentError as e:
6069                 raise XAttrMetadataError(e.errno, e.strerror)
6070         else:
6071             user_has_setfattr = check_executable('setfattr', ['--version'])
6072             user_has_xattr = check_executable('xattr', ['-h'])
6073
6074             if user_has_setfattr or user_has_xattr:
6075
6076                 value = value.decode('utf-8')
6077                 if user_has_setfattr:
6078                     executable = 'setfattr'
6079                     opts = ['-n', key, '-v', value]
6080                 elif user_has_xattr:
6081                     executable = 'xattr'
6082                     opts = ['-w', key, value]
6083
6084                 cmd = ([encodeFilename(executable, True)]
6085                        + [encodeArgument(o) for o in opts]
6086                        + [encodeFilename(path, True)])
6087
6088                 try:
6089                     p = subprocess.Popen(
6090                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6091                 except EnvironmentError as e:
6092                     raise XAttrMetadataError(e.errno, e.strerror)
6093                 stdout, stderr = process_communicate_or_kill(p)
6094                 stderr = stderr.decode('utf-8', 'replace')
6095                 if p.returncode != 0:
6096                     raise XAttrMetadataError(p.returncode, stderr)
6097
6098             else:
6099                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6100                 if sys.platform.startswith('linux'):
6101                     raise XAttrUnavailableError(
6102                         "Couldn't find a tool to set the xattrs. "
6103                         "Install either the python 'pyxattr' or 'xattr' "
6104                         "modules, or the GNU 'attr' package "
6105                         "(which contains the 'setfattr' tool).")
6106                 else:
6107                     raise XAttrUnavailableError(
6108                         "Couldn't find a tool to set the xattrs. "
6109                         "Install either the python 'xattr' module, "
6110                         "or the 'xattr' binary.")
6111
6112
6113 def random_birthday(year_field, month_field, day_field):
6114     start_date = datetime.date(1950, 1, 1)
6115     end_date = datetime.date(1995, 12, 31)
6116     offset = random.randint(0, (end_date - start_date).days)
6117     random_date = start_date + datetime.timedelta(offset)
6118     return {
6119         year_field: str(random_date.year),
6120         month_field: str(random_date.month),
6121         day_field: str(random_date.day),
6122     }
6123
6124
6125 # Templates for internet shortcut files, which are plain text files.
6126 DOT_URL_LINK_TEMPLATE = '''
6127 [InternetShortcut]
6128 URL=%(url)s
6129 '''.lstrip()
6130
6131 DOT_WEBLOC_LINK_TEMPLATE = '''
6132 <?xml version="1.0" encoding="UTF-8"?>
6133 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6134 <plist version="1.0">
6135 <dict>
6136 \t<key>URL</key>
6137 \t<string>%(url)s</string>
6138 </dict>
6139 </plist>
6140 '''.lstrip()
6141
6142 DOT_DESKTOP_LINK_TEMPLATE = '''
6143 [Desktop Entry]
6144 Encoding=UTF-8
6145 Name=%(filename)s
6146 Type=Link
6147 URL=%(url)s
6148 Icon=text-html
6149 '''.lstrip()
6150
6151
6152 def iri_to_uri(iri):
6153     """
6154     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6155
6156     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6157     """
6158
6159     iri_parts = compat_urllib_parse_urlparse(iri)
6160
6161     if '[' in iri_parts.netloc:
6162         raise ValueError('IPv6 URIs are not, yet, supported.')
6163         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6164
6165     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6166
6167     net_location = ''
6168     if iri_parts.username:
6169         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6170         if iri_parts.password is not None:
6171             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6172         net_location += '@'
6173
6174     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6175     # The 'idna' encoding produces ASCII text.
6176     if iri_parts.port is not None and iri_parts.port != 80:
6177         net_location += ':' + str(iri_parts.port)
6178
6179     return compat_urllib_parse_urlunparse(
6180         (iri_parts.scheme,
6181             net_location,
6182
6183             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6184
6185             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6186             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6187
6188             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6189             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6190
6191             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6192
6193     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6194
6195
6196 def to_high_limit_path(path):
6197     if sys.platform in ['win32', 'cygwin']:
6198         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6199         return r'\\?\ '.rstrip() + os.path.abspath(path)
6200
6201     return path
6202
6203
6204 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6205     if field is None:
6206         val = obj if obj is not None else default
6207     else:
6208         val = obj.get(field, default)
6209     if func and val not in ignore:
6210         val = func(val)
6211     return template % val if val not in ignore else default
6212
6213
6214 def clean_podcast_url(url):
6215     return re.sub(r'''(?x)
6216         (?:
6217             (?:
6218                 chtbl\.com/track|
6219                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6220                 play\.podtrac\.com
6221             )/[^/]+|
6222             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6223             flex\.acast\.com|
6224             pd(?:
6225                 cn\.co| # https://podcorn.com/analytics-prefix/
6226                 st\.fm # https://podsights.com/docs/
6227             )/e
6228         )/''', '', url)
6229
6230
6231 _HEX_TABLE = '0123456789abcdef'
6232
6233
6234 def random_uuidv4():
6235     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6236
6237
6238 def make_dir(path, to_screen=None):
6239     try:
6240         dn = os.path.dirname(path)
6241         if dn and not os.path.exists(dn):
6242             os.makedirs(dn)
6243         return True
6244     except (OSError, IOError) as err:
6245         if callable(to_screen) is not None:
6246             to_screen('unable to create directory ' + error_to_compat_str(err))
6247         return False
6248
6249
6250 def get_executable_path():
6251     from zipimport import zipimporter
6252     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6253         path = os.path.dirname(sys.executable)
6254     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6255         path = os.path.join(os.path.dirname(__file__), '../..')
6256     else:
6257         path = os.path.join(os.path.dirname(__file__), '..')
6258     return os.path.abspath(path)
6259
6260
6261 def load_plugins(name, suffix, namespace):
6262     plugin_info = [None]
6263     classes = []
6264     try:
6265         plugin_info = imp.find_module(
6266             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6267         plugins = imp.load_module(name, *plugin_info)
6268         for name in dir(plugins):
6269             if name in namespace:
6270                 continue
6271             if not name.endswith(suffix):
6272                 continue
6273             klass = getattr(plugins, name)
6274             classes.append(klass)
6275             namespace[name] = klass
6276     except ImportError:
6277         pass
6278     finally:
6279         if plugin_info[0] is not None:
6280             plugin_info[0].close()
6281     return classes
6282
6283
6284 def traverse_obj(
6285         obj, *path_list, default=None, expected_type=None, get_all=True,
6286         casesense=True, is_user_input=False, traverse_string=False):
6287     ''' Traverse nested list/dict/tuple
6288     @param path_list        A list of paths which are checked one by one.
6289                             Each path is a list of keys where each key is a string,
6290                             a tuple of strings or "...". When a tuple is given,
6291                             all the keys given in the tuple are traversed, and
6292                             "..." traverses all the keys in the object
6293     @param default          Default value to return
6294     @param expected_type    Only accept final value of this type (Can also be any callable)
6295     @param get_all          Return all the values obtained from a path or only the first one
6296     @param casesense        Whether to consider dictionary keys as case sensitive
6297     @param is_user_input    Whether the keys are generated from user input. If True,
6298                             strings are converted to int/slice if necessary
6299     @param traverse_string  Whether to traverse inside strings. If True, any
6300                             non-compatible object will also be converted into a string
6301     # TODO: Write tests
6302     '''
6303     if not casesense:
6304         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6305         path_list = (map(_lower, variadic(path)) for path in path_list)
6306
6307     def _traverse_obj(obj, path, _current_depth=0):
6308         nonlocal depth
6309         if obj is None:
6310             return None
6311         path = tuple(variadic(path))
6312         for i, key in enumerate(path):
6313             if isinstance(key, (list, tuple)):
6314                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6315                 key = ...
6316             if key is ...:
6317                 obj = (obj.values() if isinstance(obj, dict)
6318                        else obj if isinstance(obj, (list, tuple, LazyList))
6319                        else str(obj) if traverse_string else [])
6320                 _current_depth += 1
6321                 depth = max(depth, _current_depth)
6322                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6323             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6324                 obj = (obj.get(key) if casesense or (key in obj)
6325                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6326             else:
6327                 if is_user_input:
6328                     key = (int_or_none(key) if ':' not in key
6329                            else slice(*map(int_or_none, key.split(':'))))
6330                     if key == slice(None):
6331                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6332                 if not isinstance(key, (int, slice)):
6333                     return None
6334                 if not isinstance(obj, (list, tuple, LazyList)):
6335                     if not traverse_string:
6336                         return None
6337                     obj = str(obj)
6338                 try:
6339                     obj = obj[key]
6340                 except IndexError:
6341                     return None
6342         return obj
6343
6344     if isinstance(expected_type, type):
6345         type_test = lambda val: val if isinstance(val, expected_type) else None
6346     elif expected_type is not None:
6347         type_test = expected_type
6348     else:
6349         type_test = lambda val: val
6350
6351     for path in path_list:
6352         depth = 0
6353         val = _traverse_obj(obj, path)
6354         if val is not None:
6355             if depth:
6356                 for _ in range(depth - 1):
6357                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6358                 val = [v for v in map(type_test, val) if v is not None]
6359                 if val:
6360                     return val if get_all else val[0]
6361             else:
6362                 val = type_test(val)
6363                 if val is not None:
6364                     return val
6365     return default
6366
6367
6368 def traverse_dict(dictn, keys, casesense=True):
6369     ''' For backward compatibility. Do not use '''
6370     return traverse_obj(dictn, keys, casesense=casesense,
6371                         is_user_input=True, traverse_string=True)
6372
6373
6374 def variadic(x, allowed_types=(str, bytes)):
6375     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6376
6377
6378 def get_windows_version():
6379     ''' Get Windows version. None if it's not running on Windows '''
6380     if compat_os_name == 'nt':
6381         return version_tuple(platform.win32_ver()[1])
6382     else:
6383         return None